Sample blockwise from clustered data

blocksample(data, size, idvar = NULL, replace = TRUE, ...)

Arguments

data

Data frame

size

Size of samples

idvar

Column defining the clusters

replace

Logical indicating wether to sample with replacement

...

additional arguments to lower level functions

Value

data.frame

Details

Original id is stored in the attribute 'id'

Author

Klaus K. Holst

Examples


d <- data.frame(x=rnorm(5), z=rnorm(5), id=c(4,10,10,5,5), v=rnorm(5))
(dd <- blocksample(d,size=20,~id))
#>              x          z id          v
#> 2   -0.1860536  0.7643334  1 -0.4191347
#> 3   -1.4318887 -0.1859843  1 -0.7272482
#> 1    1.6798539 -0.1808598  2  0.7690749
#> 1.1  1.6798539 -0.1808598  3  0.7690749
#> 1.2  1.6798539 -0.1808598  4  0.7690749
#> 2.1 -0.1860536  0.7643334  5 -0.4191347
#> 3.1 -1.4318887 -0.1859843  5 -0.7272482
#> 1.3  1.6798539 -0.1808598  6  0.7690749
#> 2.2 -0.1860536  0.7643334  7 -0.4191347
#> 3.2 -1.4318887 -0.1859843  7 -0.7272482
#> 1.4  1.6798539 -0.1808598  8  0.7690749
#> 4    1.0531112  2.1390011  9  0.3442342
#> 5   -1.7536511 -1.8835131  9  0.1622619
#> 4.1  1.0531112  2.1390011 10  0.3442342
#> 5.1 -1.7536511 -1.8835131 10  0.1622619
#> 4.2  1.0531112  2.1390011 11  0.3442342
#> 5.2 -1.7536511 -1.8835131 11  0.1622619
#> 2.3 -0.1860536  0.7643334 12 -0.4191347
#> 3.3 -1.4318887 -0.1859843 12 -0.7272482
#> 1.5  1.6798539 -0.1808598 13  0.7690749
#> 1.6  1.6798539 -0.1808598 14  0.7690749
#> 1.7  1.6798539 -0.1808598 15  0.7690749
#> 4.3  1.0531112  2.1390011 16  0.3442342
#> 5.3 -1.7536511 -1.8835131 16  0.1622619
#> 4.4  1.0531112  2.1390011 17  0.3442342
#> 5.4 -1.7536511 -1.8835131 17  0.1622619
#> 2.4 -0.1860536  0.7643334 18 -0.4191347
#> 3.4 -1.4318887 -0.1859843 18 -0.7272482
#> 4.5  1.0531112  2.1390011 19  0.3442342
#> 5.5 -1.7536511 -1.8835131 19  0.1622619
#> 2.5 -0.1860536  0.7643334 20 -0.4191347
#> 3.5 -1.4318887 -0.1859843 20 -0.7272482
attributes(dd)$id
#>  [1] 10 10  4  4  4 10 10  4 10 10  4  5  5  5  5  5  5 10 10  4  4  4  5  5  5
#> [26]  5 10 10  5  5 10 10

if (FALSE) {
blocksample(data.table::data.table(d),1e6,~id)
}


d <- data.frame(x=c(1,rnorm(9)),
               z=rnorm(10),
               id=c(4,10,10,5,5,4,4,5,10,5),
               id2=c(1,1,2,1,2,1,1,1,1,2),
               v=rnorm(10))
dsample(d,~id, size=2)
#>           x           z id id2           v id.1
#> 1 1.0000000 -0.64228668  4   1  0.05817816    1
#> 6 1.3780091 -0.43690763  4   1  1.45718384    1
#> 7 1.0934867  0.02556918  4   1 -0.09948385    1
#> 2 0.7661782  0.28146337 10   1 -1.11519023    2
#> 3 1.0703792  0.85726708 10   2  1.23138876    2
#> 9 0.6452734 -0.23604570 10   1 -1.61353377    2
dsample(d,.~id+id2)
#>               x           z           v id
#> 5     1.0928953  0.76328170  0.35033757  1
#> 10    0.7771008  1.19918159 -0.40722640  1
#> 1     1.0000000 -0.64228668  0.05817816  2
#> 6     1.3780091 -0.43690763  1.45718384  2
#> 7     1.0934867  0.02556918 -0.09948385  2
#> 4    -0.2381451  0.77353186 -0.51109310  3
#> 8    -0.1986250 -0.46409760 -0.38706264  3
#> 5.1   1.0928953  0.76328170  0.35033757  4
#> 10.1  0.7771008  1.19918159 -0.40722640  4
#> 1.1   1.0000000 -0.64228668  0.05817816  5
#> 6.1   1.3780091 -0.43690763  1.45718384  5
#> 7.1   1.0934867  0.02556918 -0.09948385  5
#> 1.2   1.0000000 -0.64228668  0.05817816  6
#> 6.2   1.3780091 -0.43690763  1.45718384  6
#> 7.2   1.0934867  0.02556918 -0.09948385  6
#> 3     1.0703792  0.85726708  1.23138876  7
#> 5.2   1.0928953  0.76328170  0.35033757  8
#> 10.2  0.7771008  1.19918159 -0.40722640  8
#> 4.1  -0.2381451  0.77353186 -0.51109310  9
#> 8.1  -0.1986250 -0.46409760 -0.38706264  9
#> 4.2  -0.2381451  0.77353186 -0.51109310 10
#> 8.2  -0.1986250 -0.46409760 -0.38706264 10
dsample(d,x+z~id|x>0,size=5)
#>             x           z id
#> 5   1.0928953  0.76328170  1
#> 10  0.7771008  1.19918159  1
#> 2   0.7661782  0.28146337  2
#> 3   1.0703792  0.85726708  2
#> 9   0.6452734 -0.23604570  2
#> 1   1.0000000 -0.64228668  3
#> 6   1.3780091 -0.43690763  3
#> 7   1.0934867  0.02556918  3
#> 1.1 1.0000000 -0.64228668  4
#> 6.1 1.3780091 -0.43690763  4
#> 7.1 1.0934867  0.02556918  4
#> 1.2 1.0000000 -0.64228668  5
#> 6.2 1.3780091 -0.43690763  5
#> 7.2 1.0934867  0.02556918  5