Constructs a learner class object for fitting a superlearner.
learner_sl(
learners,
info = NULL,
nfolds = 5L,
meta.learner = metalearner_nnls,
model.score = mse,
learner.args = NULL,
...
)
(list) List of learner objects (i.e. learner_glm)
(character) Optional information to describe the instantiated learner object.
(integer) Number of folds to use in cross-validation to estimate the ensemble weights.
(function) Algorithm to learn the ensemble weights
(default non-negative least squares). Must be a function of the response
(nx1 vector), y
, and the predictions (nxp matrix), pred
, with p being
the number of learners. Alternatively, this can be set to the character
value "discrete", in which case the Discrete Super-Learner is applied where
the model with the lowest risk (model-score) is given weight 1 and all
other learners weight 0.
(function) Model scoring method (see learner)
(list) Additional arguments to learner$new().
Additional arguments to superlearner
learner object.
sim1 <- function(n = 5e2) {
x1 <- rnorm(n, sd = 2)
x2 <- rnorm(n)
y <- x1 + cos(x1) + rnorm(n, sd = 0.5**.5)
data.frame(y, x1, x2)
}
d <- sim1()
m <- list(
"mean" = learner_glm(y ~ 1),
"glm" = learner_glm(y ~ x1 + x2),
"iso" = learner_isoreg(y ~ x1)
)
s <- learner_sl(m, nfolds = 10)
s$estimate(d)
pr <- s$predict(d)
if (interactive()) {
plot(y ~ x1, data = d)
points(d$x1, pr, col = 2, cex = 0.5)
lines(cos(x1) + x1 ~ x1, data = d[order(d$x1), ],
lwd = 4, col = lava::Col("darkblue", 0.3))
}
print(s)
#> ────────── learner object ──────────
#> superlearner
#> mean
#> glm
#> iso
#>
#> Estimate arguments: learners=<list>, nfolds=10, meta.learner=<function>, model.score=<function>
#> Predict arguments:
#> Formula: y ~ 1 <environment: 0x55ff0d1fe908>
#> ─────────────────────────────────────
#> score weight
#> mean 5.2661490 0.05883677
#> glm 0.9606129 0.06683244
#> iso 0.5279369 0.87433079
# weights(s$fit)
# score(s$fit)
cvres <- cv(s, data = d, nfolds = 3, rep = 2)
cvres
#>
#> 3-fold cross-validation with 2 repetitions
#>
#> ── mse
#> mean sd min max
#> sl 0.56376 0.04026 0.50827 0.60064
#> mean 5.31597 0.67276 4.62674 6.15548
#> glm 0.96730 0.07912 0.83995 1.06951
#> iso 0.55300 0.03934 0.50965 0.60017
#>
#> ── mae
#> mean sd min max
#> sl 0.58900 0.02678 0.54525 0.61950
#> mean 1.85244 0.09149 1.73685 1.96075
#> glm 0.80978 0.03366 0.75282 0.85151
#> iso 0.57893 0.02685 0.54367 0.61560
#>
#> ── weight
#> mean sd min max
#> sl - - - -
#> mean 0.03659 0.04399 0.00000 0.10359
#> glm 0.08412 0.02900 0.05093 0.11624
#> iso 0.87929 0.02431 0.83767 0.90239
# coef(cvres)
# score(cvres)