Estimation of functional of parameters. Wald tests, robust standard errors, cluster robust standard errors, LRT (when f is not a function)...

# S3 method for default
estimate(
  x = NULL,
  f = NULL,
  ...,
  data,
  id,
  iddata,
  stack = TRUE,
  average = FALSE,
  subset,
  score.deriv,
  level = 0.95,
  iid = robust,
  type = c("robust", "df", "mbn"),
  keep,
  use,
  regex = FALSE,
  contrast,
  null,
  vcov,
  coef,
  robust = TRUE,
  df = NULL,
  print = NULL,
  labels,
  label.width,
  only.coef = FALSE,
  back.transform = NULL,
  folds = 0,
  cluster,
  R = 0,
  null.sim
)

Arguments

x

model object (glm, lvmfit, ...)

f

transformation of model parameters and (optionally) data, or contrast matrix (or vector)

...

additional arguments to lower level functions

data

data.frame

id

(optional) id-variable corresponding to iid decomposition of model parameters.

iddata

(optional) id-variable for 'data'

stack

if TRUE (default) the i.i.d. decomposition is automatically stacked according to 'id'

average

if TRUE averages are calculated

subset

(optional) subset of data.frame on which to condition (logical expression or variable name)

score.deriv

(optional) derivative of mean score function

level

level of confidence limits

iid

if TRUE (default) the iid decompositions are also returned (extract with iid method)

type

type of small-sample correction

keep

(optional) index of parameters to keep from final result

use

(optional) index of parameters to use in calculations

regex

If TRUE use regular expression (perl compatible) for keep,use arguments

contrast

(optional) Contrast matrix for final Wald test

null

(optional) null hypothesis to test

vcov

(optional) covariance matrix of parameter estimates (e.g. Wald-test)

coef

(optional) parameter coefficient

robust

if TRUE robust standard errors are calculated. If FALSE p-values for linear models are calculated from t-distribution

df

degrees of freedom (default obtained from 'df.residual')

print

(optional) print function

labels

(optional) names of coefficients

label.width

(optional) max width of labels

only.coef

if TRUE only the coefficient matrix is return

back.transform

(optional) transform of parameters and confidence intervals

folds

(optional) aggregate influence functions (divide and conquer)

cluster

(obsolete) alias for 'id'.

R

Number of simulations (simulated p-values)

null.sim

Mean under the null for simulations

Details

iid decomposition $$\sqrt{n}(\widehat{\theta}-\theta) = \sum_{i=1}^n\epsilon_i + o_p(1)$$ can be extracted with the iid method.

Examples

## Simulation from logistic regression model m <- lvm(y~x+z); distribution(m,y~x) <- binomial.lvm("logit") d <- sim(m,1000) g <- glm(y~z+x,data=d,family=binomial()) g0 <- glm(y~1,data=d,family=binomial()) ## LRT estimate(g,g0)
#> #> - Likelihood ratio test - #> #> data: #> chisq = 200.23, df = 2, p-value < 2.2e-16 #> sample estimates: #> log likelihood (model 1) log likelihood (model 2) #> -572.4913 -672.6041 #>
## Plain estimates (robust standard errors) estimate(g)
#> Estimate Std.Err 2.5% 97.5% P-value #> (Intercept) 0.05007 0.09463 -0.1354 0.2355 5.967e-01 #> z 0.99379 0.08444 0.8283 1.1593 5.666e-32 #> x 0.87090 0.14726 0.5823 1.1595 3.335e-09
## Testing contrasts estimate(g,null=0)
#> Estimate Std.Err 2.5% 97.5% P-value #> [(Intercept)] 0.05007 0.09463 -0.1354 0.2355 5.967e-01 #> [z] 0.99379 0.08444 0.8283 1.1593 5.666e-32 #> [x] 0.87090 0.14726 0.5823 1.1595 3.335e-09 #> #> Null Hypothesis: #> [(Intercept)] = 0 #> [z] = 0 #> [x] = 0 #> #> chisq = 166.6092, df = 3, p-value < 2.2e-16
estimate(g,rbind(c(1,1,0),c(1,0,2)))
#> Estimate Std.Err 2.5% 97.5% P-value #> [(Intercept)] + [z] 1.044 0.1251 0.7987 1.289 7.172e-17 #> [(Intercept)] + 2[x] 1.792 0.2441 1.3134 2.270 2.131e-13 #> #> Null Hypothesis: #> [(Intercept)] + [z] = 0 #> [(Intercept)] + 2[x] = 0 #> #> chisq = 140.8145, df = 2, p-value < 2.2e-16
estimate(g,rbind(c(1,1,0),c(1,0,2)),null=c(1,2))
#> Estimate Std.Err 2.5% 97.5% P-value #> [(Intercept)] + [z] 1.044 0.1251 0.7987 1.289 0.7259 #> [(Intercept)] + 2[x] 1.792 0.2441 1.3134 2.270 0.3939 #> #> Null Hypothesis: #> [(Intercept)] + [z] = 1 #> [(Intercept)] + 2[x] = 2 #> #> chisq = 0.788, df = 2, p-value = 0.6744
estimate(g,2:3) ## same as cbind(0,1,-1)
#> Estimate Std.Err 2.5% 97.5% P-value #> [z] - [x] 0.1229 0.1526 -0.1762 0.4219 0.4206 #> #> Null Hypothesis: #> [z] - [x] = 0
estimate(g,as.list(2:3)) ## same as rbind(c(0,1,0),c(0,0,1))
#> Estimate Std.Err 2.5% 97.5% P-value #> [z] 0.9938 0.08444 0.8283 1.159 5.666e-32 #> [x] 0.8709 0.14726 0.5823 1.160 3.335e-09 #> #> Null Hypothesis: #> [z] = 0 #> [x] = 0 #> #> chisq = 149.9225, df = 2, p-value < 2.2e-16
## Alternative syntax estimate(g,"z","z"-"x",2*"z"-3*"x")
#> Estimate Std.Err 2.5% 97.5% P-value #> [z] 0.9938 0.08444 0.8283 1.1593 5.666e-32 #> [z] - [x] 0.1229 0.15258 -0.1762 0.4219 4.206e-01 #> 2[z] - 3[x] -0.6251 0.43643 -1.4805 0.2303 1.520e-01 #> #> Null Hypothesis: #> [z] = 0 #> [z] - [x] = 0 #> 2[z] - 3[x] = 0 #> #> chisq = 149.9225, df = 2, p-value < 2.2e-16
estimate(g,z,z-x,2*z-3*x)
#> Estimate Std.Err 2.5% 97.5% P-value #> [z] 0.9938 0.08444 0.8283 1.1593 5.666e-32 #> [z] - [x] 0.1229 0.15258 -0.1762 0.4219 4.206e-01 #> 2[z] - 3[x] -0.6251 0.43643 -1.4805 0.2303 1.520e-01 #> #> Null Hypothesis: #> [z] = 0 #> [z] - [x] = 0 #> 2[z] - 3[x] = 0 #> #> chisq = 149.9225, df = 2, p-value < 2.2e-16
estimate(g,"?") ## Wilcards
#> Estimate Std.Err 2.5% 97.5% P-value #> [z] - [x] 0.1229 0.1526 -0.1762 0.4219 0.4206 #> #> Null Hypothesis: #> [z] - [x] = 0
estimate(g,"*Int*","z")
#> Estimate Std.Err 2.5% 97.5% P-value #> [(Intercept)] 0.05007 0.09463 -0.1354 0.2355 5.967e-01 #> [z] 0.99379 0.08444 0.8283 1.1593 5.666e-32 #> #> Null Hypothesis: #> [(Intercept)] = 0 #> [z] = 0 #> #> chisq = 139.2215, df = 2, p-value < 2.2e-16
estimate(g,"1","2"-"3",null=c(0,1))
#> Estimate Std.Err 2.5% 97.5% P-value #> [(Intercept)] 0.05007 0.09463 -0.1354 0.2355 5.967e-01 #> [z] - [x] 0.12289 0.15258 -0.1762 0.4219 8.999e-09 #> #> Null Hypothesis: #> [(Intercept)] = 0 #> [z] - [x] = 1 #> #> chisq = 58.99, df = 2, p-value = 1.551e-13
estimate(g,2,3)
#> Estimate Std.Err 2.5% 97.5% P-value #> [z] 0.9938 0.08444 0.8283 1.159 5.666e-32 #> [x] 0.8709 0.14726 0.5823 1.160 3.335e-09 #> #> Null Hypothesis: #> [z] = 0 #> [x] = 0 #> #> chisq = 149.9225, df = 2, p-value < 2.2e-16
## Usual (non-robust) confidence intervals estimate(g,robust=FALSE)
#> Estimate Std.Err 2.5% 97.5% P-value #> (Intercept) 0.05007 0.09892 -0.1438 0.244 6.127e-01 #> z 0.99379 0.08513 0.8269 1.161 1.730e-31 #> x 0.87090 0.14578 0.5852 1.157 2.315e-09
## Transformations estimate(g,function(p) p[1]+p[2])
#> Estimate Std.Err 2.5% 97.5% P-value #> (Intercept) 1.044 0.1251 0.7987 1.289 7.172e-17
## Multiple parameters e <- estimate(g,function(p) c(p[1]+p[2],p[1]*p[2])) e
#> Estimate Std.Err 2.5% 97.5% P-value #> (Intercept) 1.04386 0.12510 0.7987 1.289 7.172e-17 #> (Intercept).1 0.04976 0.09402 -0.1345 0.234 5.966e-01
vcov(e)
#> (Intercept) (Intercept) #> (Intercept) 0.015650224 0.009028627 #> (Intercept) 0.009028627 0.008839434
## Label new parameters estimate(g,function(p) list("a1"=p[1]+p[2],"b1"=p[1]*p[2]))
#> Estimate Std.Err 2.5% 97.5% P-value #> a1 1.04386 0.12510 0.7987 1.289 7.172e-17 #> b1 0.04976 0.09402 -0.1345 0.234 5.966e-01
##' ## Multiple group m <- lvm(y~x) m <- baptize(m) d2 <- d1 <- sim(m,50) e <- estimate(list(m,m),list(d1,d2)) estimate(e) ## Wrong
#> Estimate Std.Err 2.5% 97.5% P-value #> y@1 -0.112 0.1157 -0.3388 0.1148 3.330e-01 #> y~x@1 1.021 0.1094 0.8069 1.2357 9.970e-21 #> y~~y@1 1.051 0.1296 0.7972 1.3053 4.989e-16
estimate(e,id=rep(seq(nrow(d1)),2))
#> Estimate Std.Err 2.5% 97.5% P-value #> y@1 -0.112 0.1637 -0.4328 0.2087 4.936e-01 #> y~x@1 1.021 0.1547 0.7181 1.3245 4.062e-11 #> y~~y@1 1.051 0.1833 0.6920 1.4105 9.700e-09
estimate(lm(y~x,d1))
#> Estimate Std.Err 2.5% 97.5% P-value #> (Intercept) -0.112 0.1637 -0.4328 0.2087 4.936e-01 #> x 1.021 0.1547 0.7181 1.3245 4.060e-11
## Marginalize f <- function(p,data) list(p0=lava:::expit(p["(Intercept)"] + p["z"]*data[,"z"]), p1=lava:::expit(p["(Intercept)"] + p["x"] + p["z"]*data[,"z"])) e <- estimate(g, f, average=TRUE) e
#> Estimate Std.Err 2.5% 97.5% P-value #> p0 0.5135 0.02106 0.4722 0.5548 2.730e-131 #> p1 0.6857 0.02015 0.6462 0.7252 8.148e-254
estimate(e,diff)
#> Estimate Std.Err 2.5% 97.5% P-value #> p1 0.1722 0.0279 0.1175 0.2269 6.753e-10
estimate(e,cbind(1,1))
#> Estimate Std.Err 2.5% 97.5% P-value #> [p0] + [p1] 1.199 0.03034 1.14 1.259 0 #> #> Null Hypothesis: #> [p0] + [p1] = 0
## Clusters and subset (conditional marginal effects) d$id <- rep(seq(nrow(d)/4),each=4) estimate(g,function(p,data) list(p0=lava:::expit(p[1] + p["z"]*data[,"z"])), subset=d$z>0, id=d$id, average=TRUE)
#> Estimate Std.Err 2.5% 97.5% P-value #> p0 0.6853 0.02364 0.639 0.7316 8.719e-185
## More examples with clusters: m <- lvm(c(y1,y2,y3)~u+x) d <- sim(m,10) l1 <- glm(y1~x,data=d) l2 <- glm(y2~x,data=d) l3 <- glm(y3~x,data=d) ## Some random id-numbers id1 <- c(1,1,4,1,3,1,2,3,4,5) id2 <- c(1,2,3,4,5,6,7,8,1,1) id3 <- seq(10) ## Un-stacked and stacked i.i.d. decomposition iid(estimate(l1,id=id1,stack=FALSE))
#> (Intercept) x #> 1 0.172497867 0.008106054 #> 1 -0.022857634 0.005279655 #> 4 0.103550277 -0.073419312 #> 1 0.008766599 0.004027337 #> 3 -0.050447854 0.088497932 #> 1 0.074913808 0.044500060 #> 2 -0.047330035 0.047206095 #> 3 -0.072376131 -0.045563289 #> 4 -0.071627140 -0.161833849 #> 5 -0.095089758 0.083199317 #> attr(,"bread") #> (Intercept) x #> (Intercept) 0.089353820 0.005329519 #> x 0.005329519 0.108811009
iid(estimate(l1,id=id1))
#> (Intercept) x #> 1 0.23332064 0.06191311 #> 2 -0.04733003 0.04720609 #> 3 -0.12282398 0.04293464 #> 4 0.03192314 -0.23525316 #> 5 -0.09508976 0.08319932 #> attr(,"bread") #> (Intercept) x #> (Intercept) 0.089353820 0.005329519 #> x 0.005329519 0.108811009 #> attr(,"N") #> [1] 10
## Combined i.i.d. decomposition e1 <- estimate(l1,id=id1) e2 <- estimate(l2,id=id2) e3 <- estimate(l3,id=id3) (a2 <- merge(e1,e2,e3))
#> Estimate Std.Err 2.5% 97.5% P-value #> (Intercept) 0.5842 0.2861 0.02351 1.145 0.0411359 #> x 0.4907 0.2649 -0.02848 1.010 0.0639630 #> _____________ #> (Intercept).1 0.1833 0.4222 -0.64418 1.011 0.6642148 #> x.1 1.4272 0.4048 0.63388 2.220 0.0004218 #> _____________ #> (Intercept).2 0.8690 0.3591 0.16529 1.573 0.0155069 #> x.2 0.6283 0.3387 -0.03559 1.292 0.0636095
## If all models were estimated on the same data we could use the ## syntax: ## Reduce(merge,estimate(list(l1,l2,l3))) ## Same: iid(a1 <- merge(l1,l2,l3,id=list(id1,id2,id3)))
#> (Intercept) x (Intercept).1 x.1 (Intercept).2 x.2 #> 1 0.23332064 0.06191311 0.07130731 0.01191396 0.093734464 0.004404789 #> 2 -0.04733003 0.04720609 -0.14841748 0.03428146 -0.123829415 0.028602110 #> 3 -0.12282398 0.04293464 -0.05696788 0.04039142 0.082113479 -0.058220175 #> 4 0.03192314 -0.23525316 -0.20366429 -0.09356247 -0.051706742 -0.023753848 #> 5 -0.09508976 0.08319932 0.18966074 -0.33271154 0.101388334 -0.177860053 #> 6 0.00000000 0.00000000 0.13031215 0.07740761 0.086589778 0.051435783 #> 7 0.00000000 0.00000000 -0.15433683 0.15393268 -0.265172321 0.264477932 #> 8 0.00000000 0.00000000 0.17210628 0.10834689 -0.002522956 -0.001588288 #> 9 0.00000000 0.00000000 0.00000000 0.00000000 -0.005749861 -0.012991196 #> 10 0.00000000 0.00000000 0.00000000 0.00000000 0.085155239 -0.074507054
iid(merge(l1,l2,l3,id=TRUE)) # one-to-one (same clusters)
#> (Intercept) x (Intercept).1 x.1 (Intercept).2 #> 1 0.172497867 0.008106054 0.02954169 0.001388229 0.093734464 #> 2 -0.022857634 0.005279655 -0.14841748 0.034281459 -0.123829415 #> 3 0.103550277 -0.073419312 -0.05696788 0.040391419 0.082113479 #> 4 0.008766599 0.004027337 -0.20366429 -0.093562470 -0.051706742 #> 5 -0.050447854 0.088497932 0.18966074 -0.332711538 0.101388334 #> 6 0.074913808 0.044500060 0.13031215 0.077407607 0.086589778 #> 7 -0.047330035 0.047206095 -0.15433683 0.153932679 -0.265172321 #> 8 -0.072376131 -0.045563289 0.17210628 0.108346885 -0.002522956 #> 9 -0.071627140 -0.161833849 0.01501709 0.033929501 -0.005749861 #> 10 -0.095089758 0.083199317 0.02674852 -0.023403772 0.085155239 #> x.2 #> 1 0.004404789 #> 2 0.028602110 #> 3 -0.058220175 #> 4 -0.023753848 #> 5 -0.177860053 #> 6 0.051435783 #> 7 0.264477932 #> 8 -0.001588288 #> 9 -0.012991196 #> 10 -0.074507054
iid(merge(l1,l2,l3,id=FALSE)) # independence
#> (Intercept) x (Intercept).1 x.1 (Intercept).2 #> 1 0.172497867 0.008106054 0.00000000 0.000000000 0.000000000 #> 2 -0.022857634 0.005279655 0.00000000 0.000000000 0.000000000 #> 3 0.103550277 -0.073419312 0.00000000 0.000000000 0.000000000 #> 4 0.008766599 0.004027337 0.00000000 0.000000000 0.000000000 #> 5 -0.050447854 0.088497932 0.00000000 0.000000000 0.000000000 #> 6 0.074913808 0.044500060 0.00000000 0.000000000 0.000000000 #> 7 -0.047330035 0.047206095 0.00000000 0.000000000 0.000000000 #> 8 -0.072376131 -0.045563289 0.00000000 0.000000000 0.000000000 #> 9 -0.071627140 -0.161833849 0.00000000 0.000000000 0.000000000 #> 10 -0.095089758 0.083199317 0.00000000 0.000000000 0.000000000 #> 11 0.000000000 0.000000000 0.02954169 0.001388229 0.000000000 #> 12 0.000000000 0.000000000 -0.14841748 0.034281459 0.000000000 #> 13 0.000000000 0.000000000 -0.05696788 0.040391419 0.000000000 #> 14 0.000000000 0.000000000 -0.20366429 -0.093562470 0.000000000 #> 15 0.000000000 0.000000000 0.18966074 -0.332711538 0.000000000 #> 16 0.000000000 0.000000000 0.13031215 0.077407607 0.000000000 #> 17 0.000000000 0.000000000 -0.15433683 0.153932679 0.000000000 #> 18 0.000000000 0.000000000 0.17210628 0.108346885 0.000000000 #> 19 0.000000000 0.000000000 0.01501709 0.033929501 0.000000000 #> 20 0.000000000 0.000000000 0.02674852 -0.023403772 0.000000000 #> 21 0.000000000 0.000000000 0.00000000 0.000000000 0.093734464 #> 22 0.000000000 0.000000000 0.00000000 0.000000000 -0.123829415 #> 23 0.000000000 0.000000000 0.00000000 0.000000000 0.082113479 #> 24 0.000000000 0.000000000 0.00000000 0.000000000 -0.051706742 #> 25 0.000000000 0.000000000 0.00000000 0.000000000 0.101388334 #> 26 0.000000000 0.000000000 0.00000000 0.000000000 0.086589778 #> 27 0.000000000 0.000000000 0.00000000 0.000000000 -0.265172321 #> 28 0.000000000 0.000000000 0.00000000 0.000000000 -0.002522956 #> 29 0.000000000 0.000000000 0.00000000 0.000000000 -0.005749861 #> 30 0.000000000 0.000000000 0.00000000 0.000000000 0.085155239 #> x.2 #> 1 0.000000000 #> 2 0.000000000 #> 3 0.000000000 #> 4 0.000000000 #> 5 0.000000000 #> 6 0.000000000 #> 7 0.000000000 #> 8 0.000000000 #> 9 0.000000000 #> 10 0.000000000 #> 11 0.000000000 #> 12 0.000000000 #> 13 0.000000000 #> 14 0.000000000 #> 15 0.000000000 #> 16 0.000000000 #> 17 0.000000000 #> 18 0.000000000 #> 19 0.000000000 #> 20 0.000000000 #> 21 0.004404789 #> 22 0.028602110 #> 23 -0.058220175 #> 24 -0.023753848 #> 25 -0.177860053 #> 26 0.051435783 #> 27 0.264477932 #> 28 -0.001588288 #> 29 -0.012991196 #> 30 -0.074507054
## Monte Carlo approach, simple trend test example m <- categorical(lvm(),~x,K=5) regression(m,additive=TRUE) <- y~x d <- simulate(m,100,seed=1,'y~x'=0.1) l <- lm(y~-1+factor(x),data=d) f <- function(x) coef(lm(x~seq_along(x)))[2] null <- rep(mean(coef(l)),length(coef(l))) ## just need to make sure we simulate under H0: slope=0 estimate(l,f,R=1e2,null.sim=null)
#> 100 replications #> #> seq_along(x) #> Mean -0.011126 #> SD 0.065451 #> #> 2.5% -0.117538 #> 97.5% 0.097405 #> #> Estimate 0.080949 #> P-value 0.250000 #>
estimate(l,f)
#> Estimate Std.Err 2.5% 97.5% P-value #> seq_along(x) 0.08095 0.06135 -0.03929 0.2012 0.187