Estimation of functional of parameters. Wald tests, robust standard errors, cluster robust standard errors, LRT (when f is not a function)...

# S3 method for default
estimate(
  x = NULL,
  f = NULL,
  ...,
  data,
  id,
  iddata,
  stack = TRUE,
  average = FALSE,
  subset,
  score.deriv,
  level = 0.95,
  IC = robust,
  type = c("robust", "df", "mbn"),
  keep,
  use,
  regex = FALSE,
  contrast,
  null,
  vcov,
  coef,
  robust = TRUE,
  df = NULL,
  print = NULL,
  labels,
  label.width,
  only.coef = FALSE,
  back.transform = NULL,
  folds = 0,
  cluster,
  R = 0,
  null.sim
)

Arguments

x

model object (glm, lvmfit, ...)

f

transformation of model parameters and (optionally) data, or contrast matrix (or vector)

...

additional arguments to lower level functions

data

data.frame

id

(optional) id-variable corresponding to ic decomposition of model parameters.

iddata

(optional) id-variable for 'data'

stack

if TRUE (default) the i.i.d. decomposition is automatically stacked according to 'id'

average

if TRUE averages are calculated

subset

(optional) subset of data.frame on which to condition (logical expression or variable name)

score.deriv

(optional) derivative of mean score function

level

level of confidence limits

IC

if TRUE (default) the influence function decompositions are also returned (extract with IC method)

type

type of small-sample correction

keep

(optional) index of parameters to keep from final result

use

(optional) index of parameters to use in calculations

regex

If TRUE use regular expression (perl compatible) for keep,use arguments

contrast

(optional) Contrast matrix for final Wald test

null

(optional) null hypothesis to test

vcov

(optional) covariance matrix of parameter estimates (e.g. Wald-test)

coef

(optional) parameter coefficient

robust

if TRUE robust standard errors are calculated. If FALSE p-values for linear models are calculated from t-distribution

df

degrees of freedom (default obtained from 'df.residual')

print

(optional) print function

labels

(optional) names of coefficients

label.width

(optional) max width of labels

only.coef

if TRUE only the coefficient matrix is return

back.transform

(optional) transform of parameters and confidence intervals

folds

(optional) aggregate influence functions (divide and conquer)

cluster

(obsolete) alias for 'id'.

R

Number of simulations (simulated p-values)

null.sim

Mean under the null for simulations

Details

influence function decomposition $$\sqrt{n}(\widehat{\theta}-\theta) = \sum_{i=1}^n\epsilon_i + o_p(1)$$ can be extracted with the IC method.

Examples


## Simulation from logistic regression model
m <- lvm(y~x+z);
distribution(m,y~x) <- binomial.lvm("logit")
d <- sim(m,1000)
g <- glm(y~z+x,data=d,family=binomial())
g0 <- glm(y~1,data=d,family=binomial())

## LRT
estimate(g,g0)
#> 
#> 	- Likelihood ratio test -
#> 
#> data:  
#> chisq = 209.91, df = 2, p-value < 2.2e-16
#> sample estimates:
#> log likelihood (model 1) log likelihood (model 2) 
#>                -567.6493                -672.6041 
#> 

## Plain estimates (robust standard errors)
estimate(g)
#>              Estimate Std.Err    2.5%  97.5%   P-value
#> (Intercept) -0.001888 0.09785 -0.1937 0.1899 9.846e-01
#> z            0.953974 0.08114  0.7949 1.1130 6.481e-32
#> x            1.009058 0.14720  0.7205 1.2976 7.135e-12

## Testing contrasts
estimate(g,null=0)
#>                Estimate Std.Err    2.5%  97.5%   P-value
#> [(Intercept)] -0.001888 0.09785 -0.1937 0.1899 9.846e-01
#> [z]            0.953974 0.08114  0.7949 1.1130 6.481e-32
#> [x]            1.009058 0.14720  0.7205 1.2976 7.135e-12
#> 
#>  Null Hypothesis: 
#>   [(Intercept)] = 0
#>   [z] = 0
#>   [x] = 0 
#>  
#> chisq = 180.732, df = 3, p-value < 2.2e-16
estimate(g,rbind(c(1,1,0),c(1,0,2)))
#>                      Estimate Std.Err   2.5% 97.5%   P-value
#> [(Intercept)] + [z]    0.9521  0.1260 0.7052 1.199 4.075e-14
#> [(Intercept)] + 2[x]   2.0162  0.2404 1.5451 2.487 4.939e-17
#> 
#>  Null Hypothesis: 
#>   [(Intercept)] + [z] = 0
#>   [(Intercept)] + 2[x] = 0 
#>  
#> chisq = 153.6241, df = 2, p-value < 2.2e-16
estimate(g,rbind(c(1,1,0),c(1,0,2)),null=c(1,2))
#>                      Estimate Std.Err   2.5% 97.5% P-value
#> [(Intercept)] + [z]    0.9521  0.1260 0.7052 1.199  0.7037
#> [(Intercept)] + 2[x]   2.0162  0.2404 1.5451 2.487  0.9462
#> 
#>  Null Hypothesis: 
#>   [(Intercept)] + [z] = 1
#>   [(Intercept)] + 2[x] = 2 
#>  
#> chisq = 0.1447, df = 2, p-value = 0.9302
estimate(g,2:3) ## same as cbind(0,1,-1)
#>           Estimate Std.Err    2.5%  97.5% P-value
#> [z] - [x] -0.05508  0.1537 -0.3563 0.2461    0.72
#> 
#>  Null Hypothesis: 
#>   [z] - [x] = 0 
estimate(g,as.list(2:3)) ## same as rbind(c(0,1,0),c(0,0,1))
#>     Estimate Std.Err   2.5% 97.5%   P-value
#> [z]    0.954 0.08114 0.7949 1.113 6.481e-32
#> [x]    1.009 0.14720 0.7205 1.298 7.135e-12
#> 
#>  Null Hypothesis: 
#>   [z] = 0
#>   [x] = 0 
#>  
#> chisq = 159.9584, df = 2, p-value < 2.2e-16
## Alternative syntax
estimate(g,"z","z"-"x",2*"z"-3*"x")
#>             Estimate Std.Err    2.5%   97.5%   P-value
#> [z]          0.95397 0.08114  0.7949  1.1130 6.481e-32
#> [z] - [x]   -0.05508 0.15367 -0.3563  0.2461 7.200e-01
#> 2[z] - 3[x] -1.11922 0.43991 -1.9814 -0.2570 1.095e-02
#> 
#>  Null Hypothesis: 
#>   [z] = 0
#>   [z] - [x] = 0
#>   2[z] - 3[x] = 0 
#>  
#> chisq = 159.9584, df = 2, p-value < 2.2e-16
estimate(g,z,z-x,2*z-3*x)
#>             Estimate Std.Err    2.5%   97.5%   P-value
#> [z]          0.95397 0.08114  0.7949  1.1130 6.481e-32
#> [z] - [x]   -0.05508 0.15367 -0.3563  0.2461 7.200e-01
#> 2[z] - 3[x] -1.11922 0.43991 -1.9814 -0.2570 1.095e-02
#> 
#>  Null Hypothesis: 
#>   [z] = 0
#>   [z] - [x] = 0
#>   2[z] - 3[x] = 0 
#>  
#> chisq = 159.9584, df = 2, p-value < 2.2e-16
estimate(g,"?")  ## Wildcards
#>           Estimate Std.Err    2.5%  97.5% P-value
#> [z] - [x] -0.05508  0.1537 -0.3563 0.2461    0.72
#> 
#>  Null Hypothesis: 
#>   [z] - [x] = 0 
estimate(g,"*Int*","z")
#>                Estimate Std.Err    2.5%  97.5%   P-value
#> [(Intercept)] -0.001888 0.09785 -0.1937 0.1899 9.846e-01
#> [z]            0.953974 0.08114  0.7949 1.1130 6.481e-32
#> 
#>  Null Hypothesis: 
#>   [(Intercept)] = 0
#>   [z] = 0 
#>  
#> chisq = 138.2721, df = 2, p-value < 2.2e-16
estimate(g,"1","2"-"3",null=c(0,1))
#>                Estimate Std.Err    2.5%  97.5%   P-value
#> [(Intercept)] -0.001888 0.09785 -0.1937 0.1899 9.846e-01
#> [z] - [x]     -0.055083 0.15367 -0.3563 0.2461 6.606e-12
#> 
#>  Null Hypothesis: 
#>   [(Intercept)] = 0
#>   [z] - [x] = 1 
#>  
#> chisq = 77.8686, df = 2, p-value < 2.2e-16
estimate(g,2,3)
#>     Estimate Std.Err   2.5% 97.5%   P-value
#> [z]    0.954 0.08114 0.7949 1.113 6.481e-32
#> [x]    1.009 0.14720 0.7205 1.298 7.135e-12
#> 
#>  Null Hypothesis: 
#>   [z] = 0
#>   [x] = 0 
#>  
#> chisq = 159.9584, df = 2, p-value < 2.2e-16

## Usual (non-robust) confidence intervals
estimate(g,robust=FALSE)
#>              Estimate Std.Err    2.5%  97.5%   P-value
#> (Intercept) -0.001888 0.09817 -0.1943 0.1905 9.847e-01
#> z            0.953974 0.08318  0.7909 1.1170 1.892e-30
#> x            1.009058 0.14639  0.7221 1.2960 5.469e-12

## Transformations
estimate(g,function(p) p[1]+p[2])
#>             Estimate Std.Err   2.5% 97.5%   P-value
#> (Intercept)   0.9521   0.126 0.7052 1.199 4.075e-14

## Multiple parameters
e <- estimate(g,function(p) c(p[1]+p[2],p[1]*p[2]))
e
#>                Estimate Std.Err    2.5%  97.5%   P-value
#> (Intercept)    0.952086 0.12596  0.7052 1.1990 4.075e-14
#> (Intercept).1 -0.001801 0.09335 -0.1848 0.1812 9.846e-01
vcov(e)
#>             (Intercept) (Intercept)
#> (Intercept)  0.01586612 0.008982930
#> (Intercept)  0.00898293 0.008714966

## Label new parameters
estimate(g,function(p) list("a1"=p[1]+p[2],"b1"=p[1]*p[2]))
#>     Estimate Std.Err    2.5%  97.5%   P-value
#> a1  0.952086 0.12596  0.7052 1.1990 4.075e-14
#> b1 -0.001801 0.09335 -0.1848 0.1812 9.846e-01
##'
## Multiple group
m <- lvm(y~x)
m <- baptize(m)
d2 <- d1 <- sim(m,50,seed=1)
e <- estimate(list(m,m),list(d1,d2))
estimate(e) ## Wrong
#>        Estimate Std.Err     2.5%  97.5%   P-value
#> y@1      0.1044 0.08277 -0.05785 0.2666 2.073e-01
#> y~x@1    0.9665 0.08727  0.79541 1.1375 1.677e-28
#> y~~y@1   0.6764 0.10629  0.46803 0.8847 1.977e-10
ee <- estimate(e, id=rep(seq(nrow(d1)), 2)) ## Clustered
ee
#>        Estimate Std.Err    2.5%  97.5%   P-value
#> y@1      0.1044  0.1171 -0.1250 0.3338 3.725e-01
#> y~x@1    0.9665  0.1234  0.7246 1.2084 4.859e-15
#> y~~y@1   0.6764  0.1503  0.3817 0.9710 6.814e-06
estimate(lm(y~x,d1))
#>             Estimate Std.Err    2.5%  97.5%   P-value
#> (Intercept)   0.1044  0.1171 -0.1251 0.3338 3.726e-01
#> x             0.9665  0.1234  0.7246 1.2084 4.853e-15

## Marginalize
f <- function(p,data)
  list(p0=lava:::expit(p["(Intercept)"] + p["z"]*data[,"z"]),
       p1=lava:::expit(p["(Intercept)"] + p["x"] + p["z"]*data[,"z"]))
e <- estimate(g, f, average=TRUE)
e
#>    Estimate Std.Err   2.5%  97.5%    P-value
#> p0   0.5010 0.02140 0.4591 0.5429 3.025e-121
#> p1   0.7007 0.01973 0.6620 0.7393 3.516e-276
estimate(e,diff)
#>    Estimate Std.Err  2.5%  97.5%   P-value
#> p1   0.1997  0.0279 0.145 0.2543 8.194e-13
estimate(e,cbind(1,1))
#>             Estimate Std.Err  2.5% 97.5% P-value
#> [p0] + [p1]    1.202 0.03027 1.142 1.261       0
#> 
#>  Null Hypothesis: 
#>   [p0] + [p1] = 0 

## Clusters and subset (conditional marginal effects)
d$id <- rep(seq(nrow(d)/4),each=4)
estimate(g,function(p,data)
         list(p0=lava:::expit(p[1] + p["z"]*data[,"z"])),
         subset=d$z>0, id=d$id, average=TRUE)
#>    Estimate Std.Err   2.5%  97.5%    P-value
#> p0   0.6754 0.02282 0.6307 0.7202 1.558e-192

## More examples with clusters:
m <- lvm(c(y1,y2,y3)~u+x)
d <- sim(m,10)
l1 <- glm(y1~x,data=d)
l2 <- glm(y2~x,data=d)
l3 <- glm(y3~x,data=d)

## Some random id-numbers
id1 <- c(1,1,4,1,3,1,2,3,4,5)
id2 <- c(1,2,3,4,5,6,7,8,1,1)
id3 <- seq(10)

## Un-stacked and stacked i.i.d. decomposition
IC(estimate(l1,id=id1,stack=FALSE))
#> Error in IC(estimate(l1, id = id1, stack = FALSE)): could not find function "IC"
IC(estimate(l1,id=id1))
#> Error in IC(estimate(l1, id = id1)): could not find function "IC"

## Combined i.i.d. decomposition
e1 <- estimate(l1,id=id1)
e2 <- estimate(l2,id=id2)
e3 <- estimate(l3,id=id3)
(a2 <- merge(e1,e2,e3))
#>               Estimate Std.Err     2.5% 97.5%   P-value
#> (Intercept)     0.5418  0.2472  0.05731 1.026 0.0283949
#> x               0.9636  0.2592  0.45568 1.472 0.0002006
#> _____________                                          
#> (Intercept).1   0.2156  0.5105 -0.78505 1.216 0.6728124
#> x.1             1.2369  0.4854  0.28554 2.188 0.0108277
#> _____________                                          
#> (Intercept).2   0.4769  0.2979 -0.10705 1.061 0.1094554
#> x.2             1.1545  0.3714  0.42660 1.882 0.0018795

## If all models were estimated on the same data we could use the
## syntax:
## Reduce(merge,estimate(list(l1,l2,l3)))

## Same:
IC(a1 <- merge(l1,l2,l3,id=list(id1,id2,id3)))
#> Error in IC(a1 <- merge(l1, l2, l3, id = list(id1, id2, id3))): could not find function "IC"

IC(merge(l1,l2,l3,id=TRUE)) # one-to-one (same clusters)
#> Error in IC(merge(l1, l2, l3, id = TRUE)): could not find function "IC"
IC(merge(l1,l2,l3,id=FALSE)) # independence
#> Error in IC(merge(l1, l2, l3, id = FALSE)): could not find function "IC"


## Monte Carlo approach, simple trend test example

m <- categorical(lvm(),~x,K=5)
regression(m,additive=TRUE) <- y~x
d <- simulate(m,100,seed=1,'y~x'=0.1)
l <- lm(y~-1+factor(x),data=d)

f <- function(x) coef(lm(x~seq_along(x)))[2]
null <- rep(mean(coef(l)),length(coef(l))) ## just need to make sure we simulate under H0: slope=0
estimate(l,f,R=1e2,null.sim=null)
#> 100 replications
#> 
#>          seq_along(x)
#> Mean         0.014615
#> SD           0.064600
#>                      
#> 2.5%        -0.094477
#> 97.5%        0.146613
#>                      
#> Estimate     0.080949
#> P-value      0.180000
#> 

estimate(l,f)
#>              Estimate Std.Err     2.5%  97.5% P-value
#> seq_along(x)  0.08095 0.06135 -0.03929 0.2012   0.187