Constructs a learner class object for fitting a superlearner.

learner_sl(
  learners,
  info = NULL,
  nfolds = 5L,
  meta.learner = metalearner_nnls,
  model.score = mse,
  learner.args = NULL,
  ...
)

Arguments

learners

(list) List of learner objects (i.e. learner_glm)

info

(character) Optional information to describe the instantiated learner object.

nfolds

(integer) Number of folds to use in cross-validation to estimate the ensemble weights.

meta.learner

(function) Algorithm to learn the ensemble weights (default non-negative least squares). Must be a function of the response (nx1 vector), y, and the predictions (nxp matrix), pred, with p being the number of learners. Alternatively, this can be set to the character value "discrete", in which case the Discrete Super-Learner is applied where the model with the lowest risk (model-score) is given weight 1 and all other learners weight 0.

model.score

(function) Model scoring method (see learner)

learner.args

(list) Additional arguments to learner$new().

...

Additional arguments to superlearner

Value

learner object.

See also

Examples

sim1 <- function(n = 5e2) {
   x1 <- rnorm(n, sd = 2)
   x2 <- rnorm(n)
   y <- x1 + cos(x1) + rnorm(n, sd = 0.5**.5)
   data.frame(y, x1, x2)
}
d <- sim1()

m <- list(
  "mean" = learner_glm(y ~ 1),
  "glm" = learner_glm(y ~ x1 + x2),
  "iso" = learner_isoreg(y ~ x1)
)

s <- learner_sl(m, nfolds = 10)
s$estimate(d)
pr <- s$predict(d)
if (interactive()) {
    plot(y ~ x1, data = d)
    points(d$x1, pr, col = 2, cex = 0.5)
    lines(cos(x1) + x1 ~ x1, data = d[order(d$x1), ],
          lwd = 4, col = lava::Col("darkblue", 0.3))
}
print(s)
#> ────────── learner object ──────────
#> superlearner
#> 	mean
#> 	glm
#> 	iso 
#> 
#> Estimate arguments: learners=<list>, nfolds=10, meta.learner=<function>, model.score=<function> 
#> Predict arguments:   
#> Formula: y ~ 1 <environment: 0x55ff0d1fe908> 
#> ─────────────────────────────────────
#>          score     weight
#> mean 5.2661490 0.05883677
#> glm  0.9606129 0.06683244
#> iso  0.5279369 0.87433079
# weights(s$fit)
# score(s$fit)

cvres <- cv(s, data = d, nfolds = 3, rep = 2)
cvres
#> 
#> 3-fold cross-validation with 2 repetitions
#> 
#> ── mse 
#>         mean      sd     min     max
#> sl   0.56376 0.04026 0.50827 0.60064
#> mean 5.31597 0.67276 4.62674 6.15548
#> glm  0.96730 0.07912 0.83995 1.06951
#> iso  0.55300 0.03934 0.50965 0.60017
#> 
#> ── mae 
#>         mean      sd     min     max
#> sl   0.58900 0.02678 0.54525 0.61950
#> mean 1.85244 0.09149 1.73685 1.96075
#> glm  0.80978 0.03366 0.75282 0.85151
#> iso  0.57893 0.02685 0.54367 0.61560
#> 
#> ── weight 
#>         mean      sd     min     max
#> sl         -       -       -       -
#> mean 0.03659 0.04399 0.00000 0.10359
#> glm  0.08412 0.02900 0.05093 0.11624
#> iso  0.87929 0.02431 0.83767 0.90239
# coef(cvres)
# score(cvres)