Estimation of functional of parameters

Estimation of functional of parameters. Wald tests, robust standard errors, cluster robust standard errors, LRT (when f is not a function)...

Usage

# Default S3 method
estimate(
  x = NULL,
  f = NULL,
  ...,
  data,
  id,
  iddata,
  stack = TRUE,
  average = FALSE,
  subset,
  score.deriv,
  level = 0.95,
  IC = robust,
  type = c("robust", "df", "mbn"),
  keep,
  use,
  regex = FALSE,
  ignore.case = FALSE,
  contrast,
  null,
  vcov,
  coef,
  robust = TRUE,
  df = NULL,
  print = NULL,
  labels,
  label.width,
  only.coef = FALSE,
  back.transform = NULL,
  folds = 0,
  cluster,
  R = 0,
  null.sim
)

Arguments

x: model object (glm, lvmfit, ...)
f: transformation of model parameters and (optionally) data, or contrast matrix (or vector)
...: additional arguments to lower level functions
data: data.frame
id: (optional) id-variable corresponding to ic decomposition of model parameters.
iddata: (optional) id-variable for 'data'
stack: if TRUE (default) the i.i.d. decomposition is automatically stacked according to 'id'
average: if TRUE averages are calculated
subset: (optional) subset of data.frame on which to condition (logical expression or variable name)
score.deriv: (optional) derivative of mean score function
level: level of confidence limits
IC: if TRUE (default) the influence function decompositions are also returned (extract with IC method)
type: type of small-sample correction
keep: (optional) index of parameters to keep from final result
use: (optional) index of parameters to use in calculations
regex: If TRUE use regular expression (perl compatible) for keep, use arguments
ignore.case: Ignore case-sensitiveness in regular expression
contrast: (optional) Contrast matrix for final Wald test
null: (optional) null hypothesis to test
vcov: (optional) covariance matrix of parameter estimates (e.g. Wald-test)
coef: (optional) parameter coefficient
robust: if TRUE robust standard errors are calculated. If FALSE p-values for linear models are calculated from t-distribution
df: degrees of freedom (default obtained from 'df.residual')
print: (optional) print function
labels: (optional) names of coefficients
label.width: (optional) max width of labels
only.coef: if TRUE only the coefficient matrix is return
back.transform: (optional) transform of parameters and confidence intervals
folds: (optional) aggregate influence functions (divide and conquer)
cluster: (obsolete) alias for 'id'.
R: Number of simulations (simulated p-values)
null.sim: Mean under the null for simulations

Details

influence function decomposition of estimator $\widehat{\theta}$ based on data $Z_1,\ldots,Z_n$: $$\sqrt{n}(\widehat{\theta}-\theta) = \frac{1}{\sqrt{n}}\sum_{i=1}^n IC(Z_i; P) + o_p(1)$$ can be extracted with the IC method.

Examples


## Simulation from logistic regression model
m <- lvm(y~x+z);
distribution(m,y~x) <- binomial.lvm("logit")
d <- sim(m,1000)
g <- glm(y~z+x,data=d,family=binomial())
g0 <- glm(y~1,data=d,family=binomial())

## LRT
estimate(g,g0)
#> 
#> 	- Likelihood ratio test -
#> 
#> data:  
#> chisq = 214.1, df = 2, p-value < 2.2e-16
#> sample estimates:
#> log likelihood (model 1) log likelihood (model 2) 
#>                -570.8831                -677.9319 
#> 

## Plain estimates (robust standard errors)
estimate(g)
#>             Estimate Std.Err    2.5%  97.5%   P-value
#> (Intercept) -0.07117 0.09911 -0.2654 0.1231 4.727e-01
#> z            0.95923 0.08326  0.7960 1.1224 1.037e-30
#> x            1.01347 0.14415  0.7309 1.2960 2.055e-12

## Testing contrasts
estimate(g,null=0)
#>               Estimate Std.Err    2.5%  97.5%   P-value
#> [(Intercept)] -0.07117 0.09911 -0.2654 0.1231 4.727e-01
#> [z]            0.95923 0.08326  0.7960 1.1224 1.037e-30
#> [x]            1.01347 0.14415  0.7309 1.2960 2.055e-12
#> 
#>  Null Hypothesis: 
#>   [(Intercept)] = 0
#>   [z] = 0
#>   [x] = 0 
#>  
#> chisq = 191.8908, df = 3, p-value < 2.2e-16
estimate(g,rbind(c(1,1,0),c(1,0,2)))
#>                      Estimate Std.Err   2.5% 97.5%   P-value
#> [(Intercept)] + [z]    0.8881  0.1297 0.6339 1.142 7.471e-12
#> [(Intercept)] + 2[x]   1.9558  0.2318 1.5015 2.410 3.202e-17
#> 
#>  Null Hypothesis: 
#>   [(Intercept)] + [z] = 0
#>   [(Intercept)] + 2[x] = 0 
#>  
#> chisq = 158.1963, df = 2, p-value < 2.2e-16
estimate(g,rbind(c(1,1,0),c(1,0,2)),null=c(1,2))
#>                      Estimate Std.Err   2.5% 97.5% P-value
#> [(Intercept)] + [z]    0.8881  0.1297 0.6339 1.142  0.3880
#> [(Intercept)] + 2[x]   1.9558  0.2318 1.5015 2.410  0.8486
#> 
#>  Null Hypothesis: 
#>   [(Intercept)] + [z] = 1
#>   [(Intercept)] + 2[x] = 2 
#>  
#> chisq = 0.9273, df = 2, p-value = 0.629
estimate(g,2:3) ## same as cbind(0,1,-1)
#>           Estimate Std.Err    2.5%  97.5% P-value
#> [z] - [x] -0.05424  0.1602 -0.3682 0.2597  0.7349
#> 
#>  Null Hypothesis: 
#>   [z] - [x] = 0 
estimate(g,as.list(2:3)) ## same as rbind(c(0,1,0),c(0,0,1))
#>     Estimate Std.Err   2.5% 97.5%   P-value
#> [z]   0.9592 0.08326 0.7960 1.122 1.037e-30
#> [x]   1.0135 0.14415 0.7309 1.296 2.055e-12
#> 
#>  Null Hypothesis: 
#>   [z] = 0
#>   [x] = 0 
#>  
#> chisq = 169.5678, df = 2, p-value < 2.2e-16
## Alternative syntax
estimate(g,"z","z"-"x",2*"z"-3*"x")
#>             Estimate Std.Err    2.5%   97.5%   P-value
#> [z]          0.95923 0.08326  0.7960  1.1224 1.037e-30
#> [z] - [x]   -0.05424 0.16020 -0.3682  0.2597 7.349e-01
#> 2[z] - 3[x] -1.12195 0.44994 -2.0038 -0.2401 1.265e-02
#> 
#>  Null Hypothesis: 
#>   [z] = 0
#>   [z] - [x] = 0
#>   2[z] - 3[x] = 0 
#>  
#> chisq = 169.5678, df = 2, p-value < 2.2e-16
estimate(g,"?")  ## Wildcards
#>           Estimate Std.Err    2.5%  97.5% P-value
#> [z] - [x] -0.05424  0.1602 -0.3682 0.2597  0.7349
#> 
#>  Null Hypothesis: 
#>   [z] - [x] = 0 
estimate(g,"*Int*","z")
#>               Estimate Std.Err    2.5%  97.5%   P-value
#> [(Intercept)] -0.07117 0.09911 -0.2654 0.1231 4.727e-01
#> [z]            0.95923 0.08326  0.7960 1.1224 1.037e-30
#> 
#>  Null Hypothesis: 
#>   [(Intercept)] = 0
#>   [z] = 0 
#>  
#> chisq = 133.3069, df = 2, p-value < 2.2e-16
estimate(g,"1","2"-"3",null=c(0,1))
#>               Estimate Std.Err    2.5%  97.5%   P-value
#> [(Intercept)] -0.07117 0.09911 -0.2654 0.1231 4.727e-01
#> [z] - [x]     -0.05424 0.16020 -0.3682 0.2597 4.674e-11
#> 
#>  Null Hypothesis: 
#>   [(Intercept)] = 0
#>   [z] - [x] = 1 
#>  
#> chisq = 61.6299, df = 2, p-value = 4.142e-14
estimate(g,2,3)
#>     Estimate Std.Err   2.5% 97.5%   P-value
#> [z]   0.9592 0.08326 0.7960 1.122 1.037e-30
#> [x]   1.0135 0.14415 0.7309 1.296 2.055e-12
#> 
#>  Null Hypothesis: 
#>   [z] = 0
#>   [x] = 0 
#>  
#> chisq = 169.5678, df = 2, p-value < 2.2e-16

## Usual (non-robust) confidence intervals
estimate(g,robust=FALSE)
#>             Estimate Std.Err    2.5%  97.5%   P-value
#> (Intercept) -0.07117 0.09814 -0.2635 0.1212 4.683e-01
#> z            0.95923 0.08316  0.7962 1.1222 8.788e-31
#> x            1.01347 0.14564  0.7280 1.2989 3.432e-12

## Transformations
estimate(g,function(p) p[1]+p[2])
#>             Estimate Std.Err   2.5% 97.5%   P-value
#> (Intercept)   0.8881  0.1297 0.6339 1.142 7.471e-12

## Multiple parameters
e <- estimate(g,function(p) c(p[1]+p[2], p[1]*p[2]))
e
#>               Estimate Std.Err    2.5%  97.5%   P-value
#> (Intercept)    0.88806 0.12967  0.6339 1.1422 7.471e-12
#> (Intercept).1 -0.06827 0.09523 -0.2549 0.1184 4.734e-01
vcov(e)
#>             (Intercept) (Intercept)
#> (Intercept) 0.016815549 0.008955385
#> (Intercept) 0.008955385 0.009068500

## Label new parameters
estimate(g,function(p) list("a1"=p[1]+p[2], "b1"=p[1]*p[2]))
#>    Estimate Std.Err    2.5%  97.5%   P-value
#> a1  0.88806 0.12967  0.6339 1.1422 7.471e-12
#> b1 -0.06827 0.09523 -0.2549 0.1184 4.734e-01
##'
## Multiple group
m <- lvm(y~x)
m <- baptize(m)
d2 <- d1 <- sim(m,50,seed=1)
e <- estimate(list(m,m),list(d1,d2))
estimate(e) ## Wrong
#>        Estimate Std.Err     2.5%  97.5%   P-value
#> y@1      0.1044 0.08277 -0.05785 0.2666 2.073e-01
#> y~x@1    0.9665 0.08727  0.79541 1.1375 1.677e-28
#> y~~y@1   0.6764 0.10629  0.46803 0.8847 1.977e-10
ee <- estimate(e, id=rep(seq(nrow(d1)), 2)) ## Clustered
ee
#>        Estimate Std.Err    2.5%  97.5%   P-value
#> y@1      0.1044  0.1171 -0.1250 0.3338 3.725e-01
#> y~x@1    0.9665  0.1234  0.7246 1.2084 4.859e-15
#> y~~y@1   0.6764  0.1503  0.3817 0.9710 6.814e-06
estimate(lm(y~x,d1))
#>             Estimate Std.Err    2.5%  97.5%   P-value
#> (Intercept)   0.1044  0.1171 -0.1251 0.3338 3.726e-01
#> x             0.9665  0.1234  0.7246 1.2084 4.853e-15

## Marginalize
f <- function(p,data)
  list(p0=lava:::expit(p["(Intercept)"] + p["z"]*data[,"z"]),
       p1=lava:::expit(p["(Intercept)"] + p["x"] + p["z"]*data[,"z"]))
e <- estimate(g, f, average=TRUE)
e
#>    Estimate Std.Err   2.5%  97.5%    P-value
#> p0   0.4860 0.02150 0.4438 0.5281 4.175e-113
#> p1   0.6884 0.01992 0.6494 0.7275 1.089e-261
estimate(e,diff)
#>    Estimate Std.Err   2.5%  97.5%   P-value
#> p1   0.2024 0.02802 0.1475 0.2573 5.069e-13
estimate(e,cbind(1,1))
#>             Estimate Std.Err  2.5% 97.5%    P-value
#> [p0] + [p1]    1.174 0.03055 1.115 1.234 1.976e-323
#> 
#>  Null Hypothesis: 
#>   [p0] + [p1] = 0 

## Clusters and subset (conditional marginal effects)
d$id <- rep(seq(nrow(d)/4),each=4)
estimate(g,function(p,data)
         list(p0=lava:::expit(p[1] + p["z"]*data[,"z"])),
         subset=d$z>0, id=d$id, average=TRUE)
#>    Estimate Std.Err   2.5%  97.5%    P-value
#> p0   0.6611 0.02218 0.6176 0.7046 3.296e-195

## More examples with clusters:
m <- lvm(c(y1,y2,y3)~u+x)
d <- sim(m,10)
l1 <- glm(y1~x,data=d)
l2 <- glm(y2~x,data=d)
l3 <- glm(y3~x,data=d)

## Some random id-numbers
id1 <- c(1,1,4,1,3,1,2,3,4,5)
id2 <- c(1,2,3,4,5,6,7,8,1,1)
id3 <- seq(10)

## Un-stacked and stacked i.i.d. decomposition
IC(estimate(l1,id=id1,stack=FALSE))
#>   (Intercept)          x
#> 1   0.6443282 -1.5615821
#> 1   0.8292245 -0.3681720
#> 4   1.5725072 -0.7053035
#> 1  -1.5588173 -1.2129716
#> 3   1.5615501  2.9936336
#> 1  -1.0524707  7.9231560
#> 2   0.7800032 -2.7353224
#> 3  -4.2476305 -4.4726609
#> 4  -0.2651809 -0.4020531
#> 5   1.7364862  0.5412760
#> attr(,"bread")
#>             (Intercept)         x
#> (Intercept)    4.175654  2.360059
#> x              2.360059 12.463714
IC(estimate(l1,id=id1))
#>   (Intercept)          x
#> 1  -0.5688677  2.3902152
#> 2   0.3900016 -1.3676612
#> 3  -1.3430402 -0.7395136
#> 4   0.6536632 -0.5536783
#> 5   0.8682431  0.2706380
#> attr(,"bread")
#>             (Intercept)         x
#> (Intercept)    4.175654  2.360059
#> x              2.360059 12.463714
#> attr(,"N")
#> [1] 10

## Combined i.i.d. decomposition
e1 <- estimate(l1,id=id1)
e2 <- estimate(l2,id=id2)
e3 <- estimate(l3,id=id3)
(a2 <- merge(e1,e2,e3))
#>               Estimate Std.Err     2.5%  97.5% P-value
#> (Intercept)    -0.1858  0.3721 -0.91501 0.5434 0.61751
#> x               0.2707  0.5834 -0.87285 1.4142 0.64268
#> ─────────────                                         
#> (Intercept).1   0.6284  0.3617 -0.08055 1.3373 0.08234
#> x.1             0.4415  0.7723 -1.07229 1.9552 0.56759
#> ─────────────                                         
#> (Intercept).2   0.5316  0.3364 -0.12786 1.1910 0.11412
#> x.2             1.0678  0.5171  0.05420 2.0813 0.03894

## If all models were estimated on the same data we could use the
## syntax:
## Reduce(merge,estimate(list(l1,l2,l3)))

## Same:
IC(a1 <- merge(l1,l2,l3,id=list(id1,id2,id3)))
#>    (Intercept)         x (Intercept).1        x.1 (Intercept).2        x.2
#> 1   -1.1377353  4.780430    -0.0683124 -2.9519582   -0.87137910  2.1118587
#> 2    0.7800032 -2.735322     1.7168997  0.5351707   -0.43814610  0.1945349
#> 3   -2.6860804 -1.479027     0.4120817 -0.1829625    1.36962838 -0.6143080
#> 4    1.3073264 -1.107357     1.9069664 -0.8553157   -0.28910871 -0.2249658
#> 5    1.7364862  0.541276    -1.6893508 -1.3145444   -0.20205165 -0.3873514
#> 6    0.0000000  0.000000    -1.2666254 -2.4282362   -0.06001604  0.4518097
#> 7    0.0000000  0.000000    -0.9076952  6.8332644    0.87312198 -3.0618723
#> 8    0.0000000  0.000000    -0.1039640  0.3645818   -1.21352121 -1.2778110
#> 9    0.0000000  0.000000     0.0000000  0.0000000    2.11627845  3.2085890
#> 10   0.0000000  0.000000     0.0000000  0.0000000   -1.28480599 -0.4004838

IC(merge(l1,l2,l3,id=TRUE)) # one-to-one (same clusters)
#>    (Intercept)          x (Intercept).1        x.1 (Intercept).2        x.2
#> 1    0.6443282 -1.5615821     0.7607628 -1.8437710   -0.87137910  2.1118587
#> 2    0.8292245 -0.3681720     1.7168997  0.5351707   -0.43814610  0.1945349
#> 3    1.5725072 -0.7053035     0.4120817 -0.1829625    1.36962838 -0.6143080
#> 4   -1.5588173 -1.2129716     1.9069664 -0.8553157   -0.28910871 -0.2249658
#> 5    1.5615501  2.9936336    -1.6893508 -1.3145444   -0.20205165 -0.3873514
#> 6   -1.0524707  7.9231560    -1.2666254 -2.4282362   -0.06001604  0.4518097
#> 7    0.7800032 -2.7353224    -0.9076952  6.8332644    0.87312198 -3.0618723
#> 8   -4.2476305 -4.4726609    -0.1039640  0.3645818   -1.21352121 -1.2778110
#> 9   -0.2651809 -0.4020531    -0.3212921 -0.3383135    2.11627845  3.2085890
#> 10   1.7364862  0.5412760    -0.5077830 -0.7698737   -1.28480599 -0.4004838
IC(merge(l1,l2,l3,id=FALSE)) # independence
#>    (Intercept)          x (Intercept).1        x.1 (Intercept).2        x.2
#> 1    1.9329845  -4.684746     0.0000000  0.0000000     0.0000000  0.0000000
#> 2    2.4876735  -1.104516     0.0000000  0.0000000     0.0000000  0.0000000
#> 3    4.7175217  -2.115911     0.0000000  0.0000000     0.0000000  0.0000000
#> 4   -4.6764519  -3.638915     0.0000000  0.0000000     0.0000000  0.0000000
#> 5    4.6846502   8.980901     0.0000000  0.0000000     0.0000000  0.0000000
#> 6   -3.1574121  23.769468     0.0000000  0.0000000     0.0000000  0.0000000
#> 7    2.3400095  -8.205967     0.0000000  0.0000000     0.0000000  0.0000000
#> 8  -12.7428915 -13.417983     0.0000000  0.0000000     0.0000000  0.0000000
#> 9   -0.7955426  -1.206159     0.0000000  0.0000000     0.0000000  0.0000000
#> 10   5.2094586   1.623828     0.0000000  0.0000000     0.0000000  0.0000000
#> 11   0.0000000   0.000000     2.2822883 -5.5313129     0.0000000  0.0000000
#> 12   0.0000000   0.000000     5.1506990  1.6055121     0.0000000  0.0000000
#> 13   0.0000000   0.000000     1.2362452 -0.5488874     0.0000000  0.0000000
#> 14   0.0000000   0.000000     5.7208992 -2.5659471     0.0000000  0.0000000
#> 15   0.0000000   0.000000    -5.0680524 -3.9436331     0.0000000  0.0000000
#> 16   0.0000000   0.000000    -3.7998762 -7.2847086     0.0000000  0.0000000
#> 17   0.0000000   0.000000    -2.7230856 20.4997932     0.0000000  0.0000000
#> 18   0.0000000   0.000000    -0.3118919  1.0937453     0.0000000  0.0000000
#> 19   0.0000000   0.000000    -0.9638764 -1.0149405     0.0000000  0.0000000
#> 20   0.0000000   0.000000    -1.5233491 -2.3096211     0.0000000  0.0000000
#> 21   0.0000000   0.000000     0.0000000  0.0000000    -2.6141373  6.3355761
#> 22   0.0000000   0.000000     0.0000000  0.0000000    -1.3144383  0.5836048
#> 23   0.0000000   0.000000     0.0000000  0.0000000     4.1088851 -1.8429239
#> 24   0.0000000   0.000000     0.0000000  0.0000000    -0.8673261 -0.6748975
#> 25   0.0000000   0.000000     0.0000000  0.0000000    -0.6061550 -1.1620542
#> 26   0.0000000   0.000000     0.0000000  0.0000000    -0.1800481  1.3554291
#> 27   0.0000000   0.000000     0.0000000  0.0000000     2.6193659 -9.1856169
#> 28   0.0000000   0.000000     0.0000000  0.0000000    -3.6405636 -3.8334329
#> 29   0.0000000   0.000000     0.0000000  0.0000000     6.3488353  9.6257670
#> 30   0.0000000   0.000000     0.0000000  0.0000000    -3.8544180 -1.2014514


## Monte Carlo approach, simple trend test example

m <- categorical(lvm(),~x,K=5)
regression(m,additive=TRUE) <- y~x
d <- simulate(m,100,seed=1,'y~x'=0.1)
l <- lm(y~-1+factor(x),data=d)

f <- function(x) coef(lm(x~seq_along(x)))[2]
null <- rep(mean(coef(l)),length(coef(l)))
##  just need to make sure we simulate under H0: slope=0
estimate(l,f,R=1e2,null.sim=null)
#> 100 replications
#> 
#>          seq_along(x)
#> Mean         0.014709
#> SD           0.065440
#>                      
#> 2.5%        -0.097458
#> 97.5%        0.146613
#>                      
#> Estimate     0.080949
#> P-value      0.180000
#> 

estimate(l,f)
#>              Estimate Std.Err     2.5%  97.5% P-value
#> seq_along(x)  0.08095 0.06135 -0.03929 0.2012   0.187

Usage

Arguments

Details

See also

Examples