Sample blockwise from clustered data
blocksample(data, size, idvar = NULL, replace = TRUE, ...)
data.frame
Original id is stored in the attribute 'id'
d <- data.frame(x=rnorm(5), z=rnorm(5), id=c(4,10,10,5,5), v=rnorm(5))
(dd <- blocksample(d,size=20,~id))
#> x z id v
#> 1 -0.199163996 -0.8751277 1 -0.5425570
#> 4 -0.234900827 -1.0110285 2 1.3142519
#> 5 0.806117016 -1.5661159 2 -0.7775337
#> 1.1 -0.199163996 -0.8751277 3 -0.5425570
#> 2 0.007765097 0.7091612 4 -1.0851680
#> 3 0.111327769 -0.3777224 4 -1.5270909
#> 4.1 -0.234900827 -1.0110285 5 1.3142519
#> 5.1 0.806117016 -1.5661159 5 -0.7775337
#> 4.2 -0.234900827 -1.0110285 6 1.3142519
#> 5.2 0.806117016 -1.5661159 6 -0.7775337
#> 2.1 0.007765097 0.7091612 7 -1.0851680
#> 3.1 0.111327769 -0.3777224 7 -1.5270909
#> 4.3 -0.234900827 -1.0110285 8 1.3142519
#> 5.3 0.806117016 -1.5661159 8 -0.7775337
#> 1.2 -0.199163996 -0.8751277 9 -0.5425570
#> 1.3 -0.199163996 -0.8751277 10 -0.5425570
#> 1.4 -0.199163996 -0.8751277 11 -0.5425570
#> 2.2 0.007765097 0.7091612 12 -1.0851680
#> 3.2 0.111327769 -0.3777224 12 -1.5270909
#> 4.4 -0.234900827 -1.0110285 13 1.3142519
#> 5.4 0.806117016 -1.5661159 13 -0.7775337
#> 1.5 -0.199163996 -0.8751277 14 -0.5425570
#> 2.3 0.007765097 0.7091612 15 -1.0851680
#> 3.3 0.111327769 -0.3777224 15 -1.5270909
#> 1.6 -0.199163996 -0.8751277 16 -0.5425570
#> 1.7 -0.199163996 -0.8751277 17 -0.5425570
#> 4.5 -0.234900827 -1.0110285 18 1.3142519
#> 5.5 0.806117016 -1.5661159 18 -0.7775337
#> 4.6 -0.234900827 -1.0110285 19 1.3142519
#> 5.6 0.806117016 -1.5661159 19 -0.7775337
#> 4.7 -0.234900827 -1.0110285 20 1.3142519
#> 5.7 0.806117016 -1.5661159 20 -0.7775337
attributes(dd)$id
#> [1] 4 5 5 4 10 10 5 5 5 5 10 10 5 5 4 4 4 10 10 5 5 4 10 10 4
#> [26] 4 5 5 5 5 5 5
if (FALSE) { # \dontrun{
blocksample(data.table::data.table(d),1e6,~id)
} # }
d <- data.frame(x=c(1,rnorm(9)),
z=rnorm(10),
id=c(4,10,10,5,5,4,4,5,10,5),
id2=c(1,1,2,1,2,1,1,1,1,2),
v=rnorm(10))
dsample(d,~id, size=2)
#> x z id id2 v id.1
#> 2 -1.3015354 1.8963336 10 1 0.1146672 1
#> 3 -2.2328145 -0.6553967 10 2 1.3539106 1
#> 9 0.7221283 0.2433119 10 1 1.6450700 1
#> 1 1.0000000 -0.5431344 4 1 -0.7914629 2
#> 6 -0.3820466 -0.9365007 4 1 0.8371893 2
#> 7 1.2936727 0.4492428 4 1 2.0438222 2
dsample(d,.~id+id2)
#> x z v id
#> 2 -1.3015354 1.8963336 0.1146672 1
#> 9 0.7221283 0.2433119 1.6450700 1
#> 5 0.2480692 -1.4073548 0.1015301 2
#> 10 -0.6817843 -0.1823694 0.2514141 2
#> 1 1.0000000 -0.5431344 -0.7914629 3
#> 6 -0.3820466 -0.9365007 0.8371893 3
#> 7 1.2936727 0.4492428 2.0438222 3
#> 2.1 -1.3015354 1.8963336 0.1146672 4
#> 9.1 0.7221283 0.2433119 1.6450700 4
#> 1.1 1.0000000 -0.5431344 -0.7914629 5
#> 6.1 -0.3820466 -0.9365007 0.8371893 5
#> 7.1 1.2936727 0.4492428 2.0438222 5
#> 3 -2.2328145 -0.6553967 1.3539106 6
#> 2.2 -1.3015354 1.8963336 0.1146672 7
#> 9.2 0.7221283 0.2433119 1.6450700 7
#> 1.2 1.0000000 -0.5431344 -0.7914629 8
#> 6.2 -0.3820466 -0.9365007 0.8371893 8
#> 7.2 1.2936727 0.4492428 2.0438222 8
#> 3.1 -2.2328145 -0.6553967 1.3539106 9
#> 2.3 -1.3015354 1.8963336 0.1146672 10
#> 9.3 0.7221283 0.2433119 1.6450700 10
dsample(d,x+z~id|x>0,size=5)
#> x z id
#> 1 1.0000000 -0.5431344 1
#> 7 1.2936727 0.4492428 1
#> 1.1 1.0000000 -0.5431344 2
#> 7.1 1.2936727 0.4492428 2
#> 4 0.5090305 -0.4130785 3
#> 5 0.2480692 -1.4073548 3
#> 8 0.3212879 1.3654902 3
#> 4.1 0.5090305 -0.4130785 4
#> 5.1 0.2480692 -1.4073548 4
#> 8.1 0.3212879 1.3654902 4
#> 4.2 0.5090305 -0.4130785 5
#> 5.2 0.2480692 -1.4073548 5
#> 8.2 0.3212879 1.3654902 5