Sample blockwise from clustered data
Examples
d <- data.frame(x=rnorm(5), z=rnorm(5), id=c(4,10,10,5,5), v=rnorm(5))
(dd <- blocksample(d,size=20,~id))
#> x z id v
#> 4 0.4821452 -0.1545095 1 -1.53337315
#> 5 0.3959834 -1.0453677 1 0.01150068
#> 1 -0.7825994 0.6715858 2 1.14649089
#> 1.1 -0.7825994 0.6715858 3 1.14649089
#> 2 -0.7561844 2.2199949 4 0.48228630
#> 3 -0.8763060 0.5204616 4 1.26038336
#> 2.1 -0.7561844 2.2199949 5 0.48228630
#> 3.1 -0.8763060 0.5204616 5 1.26038336
#> 2.2 -0.7561844 2.2199949 6 0.48228630
#> 3.2 -0.8763060 0.5204616 6 1.26038336
#> 4.1 0.4821452 -0.1545095 7 -1.53337315
#> 5.1 0.3959834 -1.0453677 7 0.01150068
#> 2.3 -0.7561844 2.2199949 8 0.48228630
#> 3.3 -0.8763060 0.5204616 8 1.26038336
#> 4.2 0.4821452 -0.1545095 9 -1.53337315
#> 5.2 0.3959834 -1.0453677 9 0.01150068
#> 1.2 -0.7825994 0.6715858 10 1.14649089
#> 2.4 -0.7561844 2.2199949 11 0.48228630
#> 3.4 -0.8763060 0.5204616 11 1.26038336
#> 1.3 -0.7825994 0.6715858 12 1.14649089
#> 2.5 -0.7561844 2.2199949 13 0.48228630
#> 3.5 -0.8763060 0.5204616 13 1.26038336
#> 4.3 0.4821452 -0.1545095 14 -1.53337315
#> 5.3 0.3959834 -1.0453677 14 0.01150068
#> 1.4 -0.7825994 0.6715858 15 1.14649089
#> 1.5 -0.7825994 0.6715858 16 1.14649089
#> 4.4 0.4821452 -0.1545095 17 -1.53337315
#> 5.4 0.3959834 -1.0453677 17 0.01150068
#> 4.5 0.4821452 -0.1545095 18 -1.53337315
#> 5.5 0.3959834 -1.0453677 18 0.01150068
#> 4.6 0.4821452 -0.1545095 19 -1.53337315
#> 5.6 0.3959834 -1.0453677 19 0.01150068
#> 1.6 -0.7825994 0.6715858 20 1.14649089
attributes(dd)$id
#> [1] 5 5 4 4 10 10 10 10 10 10 5 5 10 10 5 5 4 10 10 4 10 10 5 5 4
#> [26] 4 5 5 5 5 5 5 4
if (FALSE) { # \dontrun{
blocksample(data.table::data.table(d),1e6,~id)
} # }
d <- data.frame(x=c(1,rnorm(9)),
z=rnorm(10),
id=c(4,10,10,5,5,4,4,5,10,5),
id2=c(1,1,2,1,2,1,1,1,1,2),
v=rnorm(10))
dsample(d,~id, size=2)
#> x z id id2 v id.1
#> 1 1.0000000 -0.8086293 4 1 0.3390736 1
#> 6 0.5714767 1.2783414 4 1 -0.3637973 1
#> 7 0.1107420 -1.4864846 4 1 -0.6838435 1
#> 1.1 1.0000000 -0.8086293 4 1 0.3390736 2
#> 6.1 0.5714767 1.2783414 4 1 -0.3637973 2
#> 7.1 0.1107420 -1.4864846 4 1 -0.6838435 2
dsample(d,.~id+id2)
#> x z v id
#> 3 0.7926693 -0.2475841 -0.1276268 1
#> 1 1.0000000 -0.8086293 0.3390736 2
#> 6 0.5714767 1.2783414 -0.3637973 2
#> 7 0.1107420 -1.4864846 -0.6838435 2
#> 4 0.8491297 1.6862562 -1.9193590 3
#> 8 -1.3252587 2.0680256 0.5901875 3
#> 4.1 0.8491297 1.6862562 -1.9193590 4
#> 8.1 -1.3252587 2.0680256 0.5901875 4
#> 1.1 1.0000000 -0.8086293 0.3390736 5
#> 6.1 0.5714767 1.2783414 -0.3637973 5
#> 7.1 0.1107420 -1.4864846 -0.6838435 5
#> 4.2 0.8491297 1.6862562 -1.9193590 6
#> 8.2 -1.3252587 2.0680256 0.5901875 6
#> 1.2 1.0000000 -0.8086293 0.3390736 7
#> 6.2 0.5714767 1.2783414 -0.3637973 7
#> 7.2 0.1107420 -1.4864846 -0.6838435 7
#> 2 2.2024418 -0.0356119 -1.0427741 8
#> 9 -1.1820866 0.8510684 1.2092282 8
#> 5 1.9451423 -0.2893210 -0.3585603 9
#> 10 -0.2516282 0.8101096 0.3914163 9
#> 2.1 2.2024418 -0.0356119 -1.0427741 10
#> 9.1 -1.1820866 0.8510684 1.2092282 10
dsample(d,x+z~id|x>0,size=5)
#> x z id
#> 2 2.2024418 -0.0356119 1
#> 3 0.7926693 -0.2475841 1
#> 1 1.0000000 -0.8086293 2
#> 6 0.5714767 1.2783414 2
#> 7 0.1107420 -1.4864846 2
#> 2.1 2.2024418 -0.0356119 3
#> 3.1 0.7926693 -0.2475841 3
#> 1.1 1.0000000 -0.8086293 4
#> 6.1 0.5714767 1.2783414 4
#> 7.1 0.1107420 -1.4864846 4
#> 2.2 2.2024418 -0.0356119 5
#> 3.2 0.7926693 -0.2475841 5
