This function creates a predictor object (class ml_model) from a list of existing ml_model objects. When estimating this model a stacked prediction will be created by weighting together the predictions of each of the initial models. The weights are learned using cross-validation.
predictor_sl(
model.list,
info = NULL,
nfolds = 5L,
meta.learner = metalearner_nnls,
model.score = mse,
...
)
List of ml_model objects (i.e. predictor_glm)
Optional model description to store in model object
Number of folds to use in cross validation
meta.learner function (default non-negative least
squares). Must be a function of the response (nx1 vector), y
, and the
predictions (nxp matrix), pred
.
model scoring method (see ml_model)
additional argument to superlearner
Luedtke & van der Laan (2016) Super-Learning of an Optimal Dynamic Treatment Rule, The International Journal of Biostatistics.
ml_model predictor_glm predictor_xgboost
sim1 <- function(n = 5e2) {
n <- 5e2
x1 <- rnorm(n, sd = 2)
x2 <- rnorm(n)
y <- x1 + cos(x1) + rnorm(n, sd = 0.5**.5)
d <- data.frame(y, x1, x2)
d
}
d <- sim1() |> mets::dsort(~x1)
m <- list(
"mean" = predictor_glm(y ~ 1),
"glm" = predictor_glm(y ~ x1 + x2),
"iso" = predictor_isoreg(y ~ x1)
)
s <- predictor_sl(m, nfolds=10)
s$estimate(d)
pr <- s$predict(d)
if (interactive()) {
plot(y ~ x1, data = d)
points(d$x1, pr, col = 2, cex = 0.5)
lines(cos(x1) + x1 ~ x1, data = d,
lwd = 4, col = lava::Col("darkblue", 0.3))
}
print(s)
#> Prediction Model (class ml_model)
#> _________________________________
#>
#> superlearner
#> mean
#> glm
#> iso
#>
#> Arguments:
#> model.list = m
#> nfolds = 10
#> meta.learner = metalearner_nnls
#> model.score = mse
#> formula = list(mean = y ~ 1, glm = y ~ x1 + x2, iso = y ~ x1)
#>
#> _________________________________
#>
#> score weight
#> mean 4.8391070 0.00000000
#> glm 1.0082826 0.06544436
#> iso 0.5816424 0.93455564
## weights(s)
## score(s)
cvres <- summary(s, data=d, nfolds=3, rep=2)
cvres
#> Call: cv(models = list(performance = object), data = data, nfolds = nfolds,
#> rep = rep, model.score = score_sl)
#>
#> 3-fold cross-validation with 2 repetitions
#>
#> score.sl score.mean score.glm score.iso weight.mean weight.glm
#> performance 0.577512 4.837403 1.005582 0.5793271 0 0.07855155
#> weight.iso
#> performance 0.9214484
## coef(cvres)
## score(cvres)