Estimation of functional of parameters. Wald tests, robust standard errors,
cluster robust standard errors, LRT (when f is not a function)...
Usage
# Default S3 method
estimate(
x = NULL,
f = NULL,
...,
data,
id,
iddata,
stack = TRUE,
average = FALSE,
subset,
score.deriv,
level = 0.95,
IC = robust,
type = c("robust", "df", "mbn"),
keep,
use,
regex = FALSE,
ignore.case = FALSE,
contrast,
null,
vcov,
coef,
robust = TRUE,
df = NULL,
print = NULL,
labels,
label.width,
only.coef = FALSE,
back.transform = NULL,
folds = 0,
cluster,
R = 0,
null.sim
)Arguments
- x
model object (
glm,lvmfit, ...)- f
transformation of model parameters and (optionally) data, or contrast matrix (or vector)
- ...
additional arguments to lower level functions
- data
data.frame- id
(optional) id-variable corresponding to ic decomposition of model parameters.
- iddata
(optional) id-variable for 'data'
- stack
if TRUE (default) the i.i.d. decomposition is automatically stacked according to 'id'
- average
if TRUE averages are calculated
- subset
(optional) subset of data.frame on which to condition (logical expression or variable name)
- score.deriv
(optional) derivative of mean score function
- level
level of confidence limits
- IC
if TRUE (default) the influence function decompositions are also returned (extract with
ICmethod)- type
type of small-sample correction
- keep
(optional) index of parameters to keep from final result
- use
(optional) index of parameters to use in calculations
- regex
If TRUE use regular expression (perl compatible) for keep, use arguments
- ignore.case
Ignore case-sensitiveness in regular expression
- contrast
(optional) Contrast matrix for final Wald test
- null
(optional) null hypothesis to test
- vcov
(optional) covariance matrix of parameter estimates (e.g. Wald-test)
- coef
(optional) parameter coefficient
- robust
if TRUE robust standard errors are calculated. If FALSE p-values for linear models are calculated from t-distribution
- df
degrees of freedom (default obtained from 'df.residual')
(optional) print function
- labels
(optional) names of coefficients
- label.width
(optional) max width of labels
- only.coef
if TRUE only the coefficient matrix is return
- back.transform
(optional) transform of parameters and confidence intervals
- folds
(optional) aggregate influence functions (divide and conquer)
- cluster
(obsolete) alias for 'id'.
- R
Number of simulations (simulated p-values)
- null.sim
Mean under the null for simulations
Details
influence function decomposition of estimator \(\widehat{\theta}\) based
on data \(Z_1,\ldots,Z_n\): $$\sqrt{n}(\widehat{\theta}-\theta) =
\frac{1}{\sqrt{n}}\sum_{i=1}^n IC(Z_i; P) + o_p(1)$$ can be extracted with
the IC method.
Examples
## Simulation from logistic regression model
m <- lvm(y~x+z);
distribution(m,y~x) <- binomial.lvm("logit")
d <- sim(m,1000)
g <- glm(y~z+x,data=d,family=binomial())
g0 <- glm(y~1,data=d,family=binomial())
## LRT
estimate(g,g0)
#>
#> - Likelihood ratio test -
#>
#> data:
#> chisq = 214.1, df = 2, p-value < 2.2e-16
#> sample estimates:
#> log likelihood (model 1) log likelihood (model 2)
#> -570.8831 -677.9319
#>
## Plain estimates (robust standard errors)
estimate(g)
#> Estimate Std.Err 2.5% 97.5% P-value
#> (Intercept) -0.07117 0.09911 -0.2654 0.1231 4.727e-01
#> z 0.95923 0.08326 0.7960 1.1224 1.037e-30
#> x 1.01347 0.14415 0.7309 1.2960 2.055e-12
## Testing contrasts
estimate(g,null=0)
#> Estimate Std.Err 2.5% 97.5% P-value
#> [(Intercept)] -0.07117 0.09911 -0.2654 0.1231 4.727e-01
#> [z] 0.95923 0.08326 0.7960 1.1224 1.037e-30
#> [x] 1.01347 0.14415 0.7309 1.2960 2.055e-12
#>
#> Null Hypothesis:
#> [(Intercept)] = 0
#> [z] = 0
#> [x] = 0
#>
#> chisq = 191.8908, df = 3, p-value < 2.2e-16
estimate(g,rbind(c(1,1,0),c(1,0,2)))
#> Estimate Std.Err 2.5% 97.5% P-value
#> [(Intercept)] + [z] 0.8881 0.1297 0.6339 1.142 7.471e-12
#> [(Intercept)] + 2[x] 1.9558 0.2318 1.5015 2.410 3.202e-17
#>
#> Null Hypothesis:
#> [(Intercept)] + [z] = 0
#> [(Intercept)] + 2[x] = 0
#>
#> chisq = 158.1963, df = 2, p-value < 2.2e-16
estimate(g,rbind(c(1,1,0),c(1,0,2)),null=c(1,2))
#> Estimate Std.Err 2.5% 97.5% P-value
#> [(Intercept)] + [z] 0.8881 0.1297 0.6339 1.142 0.3880
#> [(Intercept)] + 2[x] 1.9558 0.2318 1.5015 2.410 0.8486
#>
#> Null Hypothesis:
#> [(Intercept)] + [z] = 1
#> [(Intercept)] + 2[x] = 2
#>
#> chisq = 0.9273, df = 2, p-value = 0.629
estimate(g,2:3) ## same as cbind(0,1,-1)
#> Estimate Std.Err 2.5% 97.5% P-value
#> [z] - [x] -0.05424 0.1602 -0.3682 0.2597 0.7349
#>
#> Null Hypothesis:
#> [z] - [x] = 0
estimate(g,as.list(2:3)) ## same as rbind(c(0,1,0),c(0,0,1))
#> Estimate Std.Err 2.5% 97.5% P-value
#> [z] 0.9592 0.08326 0.7960 1.122 1.037e-30
#> [x] 1.0135 0.14415 0.7309 1.296 2.055e-12
#>
#> Null Hypothesis:
#> [z] = 0
#> [x] = 0
#>
#> chisq = 169.5678, df = 2, p-value < 2.2e-16
## Alternative syntax
estimate(g,"z","z"-"x",2*"z"-3*"x")
#> Estimate Std.Err 2.5% 97.5% P-value
#> [z] 0.95923 0.08326 0.7960 1.1224 1.037e-30
#> [z] - [x] -0.05424 0.16020 -0.3682 0.2597 7.349e-01
#> 2[z] - 3[x] -1.12195 0.44994 -2.0038 -0.2401 1.265e-02
#>
#> Null Hypothesis:
#> [z] = 0
#> [z] - [x] = 0
#> 2[z] - 3[x] = 0
#>
#> chisq = 169.5678, df = 2, p-value < 2.2e-16
estimate(g,"?") ## Wildcards
#> Estimate Std.Err 2.5% 97.5% P-value
#> [z] - [x] -0.05424 0.1602 -0.3682 0.2597 0.7349
#>
#> Null Hypothesis:
#> [z] - [x] = 0
estimate(g,"*Int*","z")
#> Estimate Std.Err 2.5% 97.5% P-value
#> [(Intercept)] -0.07117 0.09911 -0.2654 0.1231 4.727e-01
#> [z] 0.95923 0.08326 0.7960 1.1224 1.037e-30
#>
#> Null Hypothesis:
#> [(Intercept)] = 0
#> [z] = 0
#>
#> chisq = 133.3069, df = 2, p-value < 2.2e-16
estimate(g,"1","2"-"3",null=c(0,1))
#> Estimate Std.Err 2.5% 97.5% P-value
#> [(Intercept)] -0.07117 0.09911 -0.2654 0.1231 4.727e-01
#> [z] - [x] -0.05424 0.16020 -0.3682 0.2597 4.674e-11
#>
#> Null Hypothesis:
#> [(Intercept)] = 0
#> [z] - [x] = 1
#>
#> chisq = 61.6299, df = 2, p-value = 4.142e-14
estimate(g,2,3)
#> Estimate Std.Err 2.5% 97.5% P-value
#> [z] 0.9592 0.08326 0.7960 1.122 1.037e-30
#> [x] 1.0135 0.14415 0.7309 1.296 2.055e-12
#>
#> Null Hypothesis:
#> [z] = 0
#> [x] = 0
#>
#> chisq = 169.5678, df = 2, p-value < 2.2e-16
## Usual (non-robust) confidence intervals
estimate(g,robust=FALSE)
#> Estimate Std.Err 2.5% 97.5% P-value
#> (Intercept) -0.07117 0.09814 -0.2635 0.1212 4.683e-01
#> z 0.95923 0.08316 0.7962 1.1222 8.788e-31
#> x 1.01347 0.14564 0.7280 1.2989 3.432e-12
## Transformations
estimate(g,function(p) p[1]+p[2])
#> Estimate Std.Err 2.5% 97.5% P-value
#> (Intercept) 0.8881 0.1297 0.6339 1.142 7.471e-12
## Multiple parameters
e <- estimate(g,function(p) c(p[1]+p[2], p[1]*p[2]))
e
#> Estimate Std.Err 2.5% 97.5% P-value
#> (Intercept) 0.88806 0.12967 0.6339 1.1422 7.471e-12
#> (Intercept).1 -0.06827 0.09523 -0.2549 0.1184 4.734e-01
vcov(e)
#> (Intercept) (Intercept)
#> (Intercept) 0.016815549 0.008955385
#> (Intercept) 0.008955385 0.009068500
## Label new parameters
estimate(g,function(p) list("a1"=p[1]+p[2], "b1"=p[1]*p[2]))
#> Estimate Std.Err 2.5% 97.5% P-value
#> a1 0.88806 0.12967 0.6339 1.1422 7.471e-12
#> b1 -0.06827 0.09523 -0.2549 0.1184 4.734e-01
##'
## Multiple group
m <- lvm(y~x)
m <- baptize(m)
d2 <- d1 <- sim(m,50,seed=1)
e <- estimate(list(m,m),list(d1,d2))
estimate(e) ## Wrong
#> Estimate Std.Err 2.5% 97.5% P-value
#> y@1 0.1044 0.08277 -0.05785 0.2666 2.073e-01
#> y~x@1 0.9665 0.08727 0.79541 1.1375 1.677e-28
#> y~~y@1 0.6764 0.10629 0.46803 0.8847 1.977e-10
ee <- estimate(e, id=rep(seq(nrow(d1)), 2)) ## Clustered
ee
#> Estimate Std.Err 2.5% 97.5% P-value
#> y@1 0.1044 0.1171 -0.1250 0.3338 3.725e-01
#> y~x@1 0.9665 0.1234 0.7246 1.2084 4.859e-15
#> y~~y@1 0.6764 0.1503 0.3817 0.9710 6.814e-06
estimate(lm(y~x,d1))
#> Estimate Std.Err 2.5% 97.5% P-value
#> (Intercept) 0.1044 0.1171 -0.1251 0.3338 3.726e-01
#> x 0.9665 0.1234 0.7246 1.2084 4.853e-15
## Marginalize
f <- function(p,data)
list(p0=lava:::expit(p["(Intercept)"] + p["z"]*data[,"z"]),
p1=lava:::expit(p["(Intercept)"] + p["x"] + p["z"]*data[,"z"]))
e <- estimate(g, f, average=TRUE)
e
#> Estimate Std.Err 2.5% 97.5% P-value
#> p0 0.4860 0.02150 0.4438 0.5281 4.175e-113
#> p1 0.6884 0.01992 0.6494 0.7275 1.089e-261
estimate(e,diff)
#> Estimate Std.Err 2.5% 97.5% P-value
#> p1 0.2024 0.02802 0.1475 0.2573 5.069e-13
estimate(e,cbind(1,1))
#> Estimate Std.Err 2.5% 97.5% P-value
#> [p0] + [p1] 1.174 0.03055 1.115 1.234 1.976e-323
#>
#> Null Hypothesis:
#> [p0] + [p1] = 0
## Clusters and subset (conditional marginal effects)
d$id <- rep(seq(nrow(d)/4),each=4)
estimate(g,function(p,data)
list(p0=lava:::expit(p[1] + p["z"]*data[,"z"])),
subset=d$z>0, id=d$id, average=TRUE)
#> Estimate Std.Err 2.5% 97.5% P-value
#> p0 0.6611 0.02218 0.6176 0.7046 3.296e-195
## More examples with clusters:
m <- lvm(c(y1,y2,y3)~u+x)
d <- sim(m,10)
l1 <- glm(y1~x,data=d)
l2 <- glm(y2~x,data=d)
l3 <- glm(y3~x,data=d)
## Some random id-numbers
id1 <- c(1,1,4,1,3,1,2,3,4,5)
id2 <- c(1,2,3,4,5,6,7,8,1,1)
id3 <- seq(10)
## Un-stacked and stacked i.i.d. decomposition
IC(estimate(l1,id=id1,stack=FALSE))
#> (Intercept) x
#> 1 0.6443282 -1.5615821
#> 1 0.8292245 -0.3681720
#> 4 1.5725072 -0.7053035
#> 1 -1.5588173 -1.2129716
#> 3 1.5615501 2.9936336
#> 1 -1.0524707 7.9231560
#> 2 0.7800032 -2.7353224
#> 3 -4.2476305 -4.4726609
#> 4 -0.2651809 -0.4020531
#> 5 1.7364862 0.5412760
#> attr(,"bread")
#> (Intercept) x
#> (Intercept) 4.175654 2.360059
#> x 2.360059 12.463714
IC(estimate(l1,id=id1))
#> (Intercept) x
#> 1 -0.5688677 2.3902152
#> 2 0.3900016 -1.3676612
#> 3 -1.3430402 -0.7395136
#> 4 0.6536632 -0.5536783
#> 5 0.8682431 0.2706380
#> attr(,"bread")
#> (Intercept) x
#> (Intercept) 4.175654 2.360059
#> x 2.360059 12.463714
#> attr(,"N")
#> [1] 10
## Combined i.i.d. decomposition
e1 <- estimate(l1,id=id1)
e2 <- estimate(l2,id=id2)
e3 <- estimate(l3,id=id3)
(a2 <- merge(e1,e2,e3))
#> Estimate Std.Err 2.5% 97.5% P-value
#> (Intercept) -0.1858 0.3721 -0.91501 0.5434 0.61751
#> x 0.2707 0.5834 -0.87285 1.4142 0.64268
#> ─────────────
#> (Intercept).1 0.6284 0.3617 -0.08055 1.3373 0.08234
#> x.1 0.4415 0.7723 -1.07229 1.9552 0.56759
#> ─────────────
#> (Intercept).2 0.5316 0.3364 -0.12786 1.1910 0.11412
#> x.2 1.0678 0.5171 0.05420 2.0813 0.03894
## If all models were estimated on the same data we could use the
## syntax:
## Reduce(merge,estimate(list(l1,l2,l3)))
## Same:
IC(a1 <- merge(l1,l2,l3,id=list(id1,id2,id3)))
#> (Intercept) x (Intercept).1 x.1 (Intercept).2 x.2
#> 1 -1.1377353 4.780430 -0.0683124 -2.9519582 -0.87137910 2.1118587
#> 2 0.7800032 -2.735322 1.7168997 0.5351707 -0.43814610 0.1945349
#> 3 -2.6860804 -1.479027 0.4120817 -0.1829625 1.36962838 -0.6143080
#> 4 1.3073264 -1.107357 1.9069664 -0.8553157 -0.28910871 -0.2249658
#> 5 1.7364862 0.541276 -1.6893508 -1.3145444 -0.20205165 -0.3873514
#> 6 0.0000000 0.000000 -1.2666254 -2.4282362 -0.06001604 0.4518097
#> 7 0.0000000 0.000000 -0.9076952 6.8332644 0.87312198 -3.0618723
#> 8 0.0000000 0.000000 -0.1039640 0.3645818 -1.21352121 -1.2778110
#> 9 0.0000000 0.000000 0.0000000 0.0000000 2.11627845 3.2085890
#> 10 0.0000000 0.000000 0.0000000 0.0000000 -1.28480599 -0.4004838
IC(merge(l1,l2,l3,id=TRUE)) # one-to-one (same clusters)
#> (Intercept) x (Intercept).1 x.1 (Intercept).2 x.2
#> 1 0.6443282 -1.5615821 0.7607628 -1.8437710 -0.87137910 2.1118587
#> 2 0.8292245 -0.3681720 1.7168997 0.5351707 -0.43814610 0.1945349
#> 3 1.5725072 -0.7053035 0.4120817 -0.1829625 1.36962838 -0.6143080
#> 4 -1.5588173 -1.2129716 1.9069664 -0.8553157 -0.28910871 -0.2249658
#> 5 1.5615501 2.9936336 -1.6893508 -1.3145444 -0.20205165 -0.3873514
#> 6 -1.0524707 7.9231560 -1.2666254 -2.4282362 -0.06001604 0.4518097
#> 7 0.7800032 -2.7353224 -0.9076952 6.8332644 0.87312198 -3.0618723
#> 8 -4.2476305 -4.4726609 -0.1039640 0.3645818 -1.21352121 -1.2778110
#> 9 -0.2651809 -0.4020531 -0.3212921 -0.3383135 2.11627845 3.2085890
#> 10 1.7364862 0.5412760 -0.5077830 -0.7698737 -1.28480599 -0.4004838
IC(merge(l1,l2,l3,id=FALSE)) # independence
#> (Intercept) x (Intercept).1 x.1 (Intercept).2 x.2
#> 1 1.9329845 -4.684746 0.0000000 0.0000000 0.0000000 0.0000000
#> 2 2.4876735 -1.104516 0.0000000 0.0000000 0.0000000 0.0000000
#> 3 4.7175217 -2.115911 0.0000000 0.0000000 0.0000000 0.0000000
#> 4 -4.6764519 -3.638915 0.0000000 0.0000000 0.0000000 0.0000000
#> 5 4.6846502 8.980901 0.0000000 0.0000000 0.0000000 0.0000000
#> 6 -3.1574121 23.769468 0.0000000 0.0000000 0.0000000 0.0000000
#> 7 2.3400095 -8.205967 0.0000000 0.0000000 0.0000000 0.0000000
#> 8 -12.7428915 -13.417983 0.0000000 0.0000000 0.0000000 0.0000000
#> 9 -0.7955426 -1.206159 0.0000000 0.0000000 0.0000000 0.0000000
#> 10 5.2094586 1.623828 0.0000000 0.0000000 0.0000000 0.0000000
#> 11 0.0000000 0.000000 2.2822883 -5.5313129 0.0000000 0.0000000
#> 12 0.0000000 0.000000 5.1506990 1.6055121 0.0000000 0.0000000
#> 13 0.0000000 0.000000 1.2362452 -0.5488874 0.0000000 0.0000000
#> 14 0.0000000 0.000000 5.7208992 -2.5659471 0.0000000 0.0000000
#> 15 0.0000000 0.000000 -5.0680524 -3.9436331 0.0000000 0.0000000
#> 16 0.0000000 0.000000 -3.7998762 -7.2847086 0.0000000 0.0000000
#> 17 0.0000000 0.000000 -2.7230856 20.4997932 0.0000000 0.0000000
#> 18 0.0000000 0.000000 -0.3118919 1.0937453 0.0000000 0.0000000
#> 19 0.0000000 0.000000 -0.9638764 -1.0149405 0.0000000 0.0000000
#> 20 0.0000000 0.000000 -1.5233491 -2.3096211 0.0000000 0.0000000
#> 21 0.0000000 0.000000 0.0000000 0.0000000 -2.6141373 6.3355761
#> 22 0.0000000 0.000000 0.0000000 0.0000000 -1.3144383 0.5836048
#> 23 0.0000000 0.000000 0.0000000 0.0000000 4.1088851 -1.8429239
#> 24 0.0000000 0.000000 0.0000000 0.0000000 -0.8673261 -0.6748975
#> 25 0.0000000 0.000000 0.0000000 0.0000000 -0.6061550 -1.1620542
#> 26 0.0000000 0.000000 0.0000000 0.0000000 -0.1800481 1.3554291
#> 27 0.0000000 0.000000 0.0000000 0.0000000 2.6193659 -9.1856169
#> 28 0.0000000 0.000000 0.0000000 0.0000000 -3.6405636 -3.8334329
#> 29 0.0000000 0.000000 0.0000000 0.0000000 6.3488353 9.6257670
#> 30 0.0000000 0.000000 0.0000000 0.0000000 -3.8544180 -1.2014514
## Monte Carlo approach, simple trend test example
m <- categorical(lvm(),~x,K=5)
regression(m,additive=TRUE) <- y~x
d <- simulate(m,100,seed=1,'y~x'=0.1)
l <- lm(y~-1+factor(x),data=d)
f <- function(x) coef(lm(x~seq_along(x)))[2]
null <- rep(mean(coef(l)),length(coef(l)))
## just need to make sure we simulate under H0: slope=0
estimate(l,f,R=1e2,null.sim=null)
#> 100 replications
#>
#> seq_along(x)
#> Mean 0.014709
#> SD 0.065440
#>
#> 2.5% -0.097458
#> 97.5% 0.146613
#>
#> Estimate 0.080949
#> P-value 0.180000
#>
estimate(l,f)
#> Estimate Std.Err 2.5% 97.5% P-value
#> seq_along(x) 0.08095 0.06135 -0.03929 0.2012 0.187
