Sample blockwise from clustered data

blocksample(data, size, idvar = NULL, replace = TRUE, ...)

Arguments

data

Data frame

size

Size of samples

idvar

Column defining the clusters

replace

Logical indicating wether to sample with replacement

...

additional arguments to lower level functions

Value

data.frame

Details

Original id is stored in the attribute 'id'

Author

Klaus K. Holst

Examples


d <- data.frame(x=rnorm(5), z=rnorm(5), id=c(4,10,10,5,5), v=rnorm(5))
(dd <- blocksample(d,size=20,~id))
#>                x          z id          v
#> 1   -0.199163996 -0.8751277  1 -0.5425570
#> 4   -0.234900827 -1.0110285  2  1.3142519
#> 5    0.806117016 -1.5661159  2 -0.7775337
#> 1.1 -0.199163996 -0.8751277  3 -0.5425570
#> 2    0.007765097  0.7091612  4 -1.0851680
#> 3    0.111327769 -0.3777224  4 -1.5270909
#> 4.1 -0.234900827 -1.0110285  5  1.3142519
#> 5.1  0.806117016 -1.5661159  5 -0.7775337
#> 4.2 -0.234900827 -1.0110285  6  1.3142519
#> 5.2  0.806117016 -1.5661159  6 -0.7775337
#> 2.1  0.007765097  0.7091612  7 -1.0851680
#> 3.1  0.111327769 -0.3777224  7 -1.5270909
#> 4.3 -0.234900827 -1.0110285  8  1.3142519
#> 5.3  0.806117016 -1.5661159  8 -0.7775337
#> 1.2 -0.199163996 -0.8751277  9 -0.5425570
#> 1.3 -0.199163996 -0.8751277 10 -0.5425570
#> 1.4 -0.199163996 -0.8751277 11 -0.5425570
#> 2.2  0.007765097  0.7091612 12 -1.0851680
#> 3.2  0.111327769 -0.3777224 12 -1.5270909
#> 4.4 -0.234900827 -1.0110285 13  1.3142519
#> 5.4  0.806117016 -1.5661159 13 -0.7775337
#> 1.5 -0.199163996 -0.8751277 14 -0.5425570
#> 2.3  0.007765097  0.7091612 15 -1.0851680
#> 3.3  0.111327769 -0.3777224 15 -1.5270909
#> 1.6 -0.199163996 -0.8751277 16 -0.5425570
#> 1.7 -0.199163996 -0.8751277 17 -0.5425570
#> 4.5 -0.234900827 -1.0110285 18  1.3142519
#> 5.5  0.806117016 -1.5661159 18 -0.7775337
#> 4.6 -0.234900827 -1.0110285 19  1.3142519
#> 5.6  0.806117016 -1.5661159 19 -0.7775337
#> 4.7 -0.234900827 -1.0110285 20  1.3142519
#> 5.7  0.806117016 -1.5661159 20 -0.7775337
attributes(dd)$id
#>  [1]  4  5  5  4 10 10  5  5  5  5 10 10  5  5  4  4  4 10 10  5  5  4 10 10  4
#> [26]  4  5  5  5  5  5  5

if (FALSE) { # \dontrun{
blocksample(data.table::data.table(d),1e6,~id)
} # }


d <- data.frame(x=c(1,rnorm(9)),
               z=rnorm(10),
               id=c(4,10,10,5,5,4,4,5,10,5),
               id2=c(1,1,2,1,2,1,1,1,1,2),
               v=rnorm(10))
dsample(d,~id, size=2)
#>            x          z id id2          v id.1
#> 2 -1.3015354  1.8963336 10   1  0.1146672    1
#> 3 -2.2328145 -0.6553967 10   2  1.3539106    1
#> 9  0.7221283  0.2433119 10   1  1.6450700    1
#> 1  1.0000000 -0.5431344  4   1 -0.7914629    2
#> 6 -0.3820466 -0.9365007  4   1  0.8371893    2
#> 7  1.2936727  0.4492428  4   1  2.0438222    2
dsample(d,.~id+id2)
#>              x          z          v id
#> 2   -1.3015354  1.8963336  0.1146672  1
#> 9    0.7221283  0.2433119  1.6450700  1
#> 5    0.2480692 -1.4073548  0.1015301  2
#> 10  -0.6817843 -0.1823694  0.2514141  2
#> 1    1.0000000 -0.5431344 -0.7914629  3
#> 6   -0.3820466 -0.9365007  0.8371893  3
#> 7    1.2936727  0.4492428  2.0438222  3
#> 2.1 -1.3015354  1.8963336  0.1146672  4
#> 9.1  0.7221283  0.2433119  1.6450700  4
#> 1.1  1.0000000 -0.5431344 -0.7914629  5
#> 6.1 -0.3820466 -0.9365007  0.8371893  5
#> 7.1  1.2936727  0.4492428  2.0438222  5
#> 3   -2.2328145 -0.6553967  1.3539106  6
#> 2.2 -1.3015354  1.8963336  0.1146672  7
#> 9.2  0.7221283  0.2433119  1.6450700  7
#> 1.2  1.0000000 -0.5431344 -0.7914629  8
#> 6.2 -0.3820466 -0.9365007  0.8371893  8
#> 7.2  1.2936727  0.4492428  2.0438222  8
#> 3.1 -2.2328145 -0.6553967  1.3539106  9
#> 2.3 -1.3015354  1.8963336  0.1146672 10
#> 9.3  0.7221283  0.2433119  1.6450700 10
dsample(d,x+z~id|x>0,size=5)
#>             x          z id
#> 1   1.0000000 -0.5431344  1
#> 7   1.2936727  0.4492428  1
#> 1.1 1.0000000 -0.5431344  2
#> 7.1 1.2936727  0.4492428  2
#> 4   0.5090305 -0.4130785  3
#> 5   0.2480692 -1.4073548  3
#> 8   0.3212879  1.3654902  3
#> 4.1 0.5090305 -0.4130785  4
#> 5.1 0.2480692 -1.4073548  4
#> 8.1 0.3212879  1.3654902  4
#> 4.2 0.5090305 -0.4130785  5
#> 5.2 0.2480692 -1.4073548  5
#> 8.2 0.3212879  1.3654902  5