Sample blockwise from clustered data
Examples
d <- data.frame(x=rnorm(5), z=rnorm(5), id=c(4,10,10,5,5), v=rnorm(5))
(dd <- blocksample(d,size=20,~id))
#>                x          z id          v
#> 1   -0.199163996 -0.8751277  1 -0.5425570
#> 4   -0.234900827 -1.0110285  2  1.3142519
#> 5    0.806117016 -1.5661159  2 -0.7775337
#> 1.1 -0.199163996 -0.8751277  3 -0.5425570
#> 2    0.007765097  0.7091612  4 -1.0851680
#> 3    0.111327769 -0.3777224  4 -1.5270909
#> 4.1 -0.234900827 -1.0110285  5  1.3142519
#> 5.1  0.806117016 -1.5661159  5 -0.7775337
#> 4.2 -0.234900827 -1.0110285  6  1.3142519
#> 5.2  0.806117016 -1.5661159  6 -0.7775337
#> 2.1  0.007765097  0.7091612  7 -1.0851680
#> 3.1  0.111327769 -0.3777224  7 -1.5270909
#> 4.3 -0.234900827 -1.0110285  8  1.3142519
#> 5.3  0.806117016 -1.5661159  8 -0.7775337
#> 1.2 -0.199163996 -0.8751277  9 -0.5425570
#> 1.3 -0.199163996 -0.8751277 10 -0.5425570
#> 1.4 -0.199163996 -0.8751277 11 -0.5425570
#> 2.2  0.007765097  0.7091612 12 -1.0851680
#> 3.2  0.111327769 -0.3777224 12 -1.5270909
#> 4.4 -0.234900827 -1.0110285 13  1.3142519
#> 5.4  0.806117016 -1.5661159 13 -0.7775337
#> 1.5 -0.199163996 -0.8751277 14 -0.5425570
#> 2.3  0.007765097  0.7091612 15 -1.0851680
#> 3.3  0.111327769 -0.3777224 15 -1.5270909
#> 1.6 -0.199163996 -0.8751277 16 -0.5425570
#> 1.7 -0.199163996 -0.8751277 17 -0.5425570
#> 4.5 -0.234900827 -1.0110285 18  1.3142519
#> 5.5  0.806117016 -1.5661159 18 -0.7775337
#> 4.6 -0.234900827 -1.0110285 19  1.3142519
#> 5.6  0.806117016 -1.5661159 19 -0.7775337
#> 4.7 -0.234900827 -1.0110285 20  1.3142519
#> 5.7  0.806117016 -1.5661159 20 -0.7775337
attributes(dd)$id
#>  [1]  4  5  5  4 10 10  5  5  5  5 10 10  5  5  4  4  4 10 10  5  5  4 10 10  4
#> [26]  4  5  5  5  5  5  5
if (FALSE) { # \dontrun{
blocksample(data.table::data.table(d),1e6,~id)
} # }
d <- data.frame(x=c(1,rnorm(9)),
               z=rnorm(10),
               id=c(4,10,10,5,5,4,4,5,10,5),
               id2=c(1,1,2,1,2,1,1,1,1,2),
               v=rnorm(10))
dsample(d,~id, size=2)
#>            x          z id id2          v id.1
#> 2 -1.3015354  1.8963336 10   1  0.1146672    1
#> 3 -2.2328145 -0.6553967 10   2  1.3539106    1
#> 9  0.7221283  0.2433119 10   1  1.6450700    1
#> 1  1.0000000 -0.5431344  4   1 -0.7914629    2
#> 6 -0.3820466 -0.9365007  4   1  0.8371893    2
#> 7  1.2936727  0.4492428  4   1  2.0438222    2
dsample(d,.~id+id2)
#>              x          z          v id
#> 2   -1.3015354  1.8963336  0.1146672  1
#> 9    0.7221283  0.2433119  1.6450700  1
#> 5    0.2480692 -1.4073548  0.1015301  2
#> 10  -0.6817843 -0.1823694  0.2514141  2
#> 1    1.0000000 -0.5431344 -0.7914629  3
#> 6   -0.3820466 -0.9365007  0.8371893  3
#> 7    1.2936727  0.4492428  2.0438222  3
#> 2.1 -1.3015354  1.8963336  0.1146672  4
#> 9.1  0.7221283  0.2433119  1.6450700  4
#> 1.1  1.0000000 -0.5431344 -0.7914629  5
#> 6.1 -0.3820466 -0.9365007  0.8371893  5
#> 7.1  1.2936727  0.4492428  2.0438222  5
#> 3   -2.2328145 -0.6553967  1.3539106  6
#> 2.2 -1.3015354  1.8963336  0.1146672  7
#> 9.2  0.7221283  0.2433119  1.6450700  7
#> 1.2  1.0000000 -0.5431344 -0.7914629  8
#> 6.2 -0.3820466 -0.9365007  0.8371893  8
#> 7.2  1.2936727  0.4492428  2.0438222  8
#> 3.1 -2.2328145 -0.6553967  1.3539106  9
#> 2.3 -1.3015354  1.8963336  0.1146672 10
#> 9.3  0.7221283  0.2433119  1.6450700 10
dsample(d,x+z~id|x>0,size=5)
#>             x          z id
#> 1   1.0000000 -0.5431344  1
#> 7   1.2936727  0.4492428  1
#> 1.1 1.0000000 -0.5431344  2
#> 7.1 1.2936727  0.4492428  2
#> 4   0.5090305 -0.4130785  3
#> 5   0.2480692 -1.4073548  3
#> 8   0.3212879  1.3654902  3
#> 4.1 0.5090305 -0.4130785  4
#> 5.1 0.2480692 -1.4073548  4
#> 8.1 0.3212879  1.3654902  4
#> 4.2 0.5090305 -0.4130785  5
#> 5.2 0.2480692 -1.4073548  5
#> 8.2 0.3212879  1.3654902  5