Sample blockwise from clustered data
Examples
d <- data.frame(x=rnorm(5), z=rnorm(5), id=c(4,10,10,5,5), v=rnorm(5))
(dd <- blocksample(d,size=20,~id))
#> x z id v
#> 1 -0.2098647 -0.966818043 1 -0.2770055
#> 1.1 -0.2098647 -0.966818043 2 -0.2770055
#> 4 0.5540451 -0.001732368 3 -1.3458555
#> 5 -0.5581936 -0.351628450 3 -0.2126497
#> 2 -0.7869657 -2.057152528 4 1.1694029
#> 3 1.3636367 0.305695423 4 -0.9603945
#> 4.1 0.5540451 -0.001732368 5 -1.3458555
#> 5.1 -0.5581936 -0.351628450 5 -0.2126497
#> 2.1 -0.7869657 -2.057152528 6 1.1694029
#> 3.1 1.3636367 0.305695423 6 -0.9603945
#> 1.2 -0.2098647 -0.966818043 7 -0.2770055
#> 2.2 -0.7869657 -2.057152528 8 1.1694029
#> 3.2 1.3636367 0.305695423 8 -0.9603945
#> 4.2 0.5540451 -0.001732368 9 -1.3458555
#> 5.2 -0.5581936 -0.351628450 9 -0.2126497
#> 4.3 0.5540451 -0.001732368 10 -1.3458555
#> 5.3 -0.5581936 -0.351628450 10 -0.2126497
#> 1.3 -0.2098647 -0.966818043 11 -0.2770055
#> 1.4 -0.2098647 -0.966818043 12 -0.2770055
#> 2.3 -0.7869657 -2.057152528 13 1.1694029
#> 3.3 1.3636367 0.305695423 13 -0.9603945
#> 1.5 -0.2098647 -0.966818043 14 -0.2770055
#> 4.4 0.5540451 -0.001732368 15 -1.3458555
#> 5.4 -0.5581936 -0.351628450 15 -0.2126497
#> 1.6 -0.2098647 -0.966818043 16 -0.2770055
#> 1.7 -0.2098647 -0.966818043 17 -0.2770055
#> 1.8 -0.2098647 -0.966818043 18 -0.2770055
#> 1.9 -0.2098647 -0.966818043 19 -0.2770055
#> 1.10 -0.2098647 -0.966818043 20 -0.2770055
attributes(dd)$id
#> [1] 4 4 5 5 10 10 5 5 10 10 4 10 10 5 5 5 5 4 4 10 10 4 5 5 4
#> [26] 4 4 4 4
if (FALSE) { # \dontrun{
blocksample(data.table::data.table(d),1e6,~id)
} # }
d <- data.frame(x=c(1,rnorm(9)),
z=rnorm(10),
id=c(4,10,10,5,5,4,4,5,10,5),
id2=c(1,1,2,1,2,1,1,1,1,2),
v=rnorm(10))
dsample(d,~id, size=2)
#> x z id id2 v id.1
#> 1 1.0000000 -0.4024641 4 1 0.09811391 1
#> 6 -0.9481249 0.8533950 4 1 -1.53366661 1
#> 7 0.3808644 0.3585276 4 1 -0.71283679 1
#> 4 -0.7826530 0.5446049 5 1 0.35253922 2
#> 5 0.3506016 -0.2281963 5 2 -0.65197620 2
#> 8 -0.3301687 -0.2970404 5 1 0.74729117 2
#> 10 -0.3731517 -1.7023895 5 2 1.54708600 2
dsample(d,.~id+id2)
#> x z v id
#> 3 -2.1508496 -0.1699301 0.38173893 1
#> 1 1.0000000 -0.4024641 0.09811391 2
#> 6 -0.9481249 0.8533950 -1.53366661 2
#> 7 0.3808644 0.3585276 -0.71283679 2
#> 3.1 -2.1508496 -0.1699301 0.38173893 3
#> 4 -0.7826530 0.5446049 0.35253922 4
#> 8 -0.3301687 -0.2970404 0.74729117 4
#> 5 0.3506016 -0.2281963 -0.65197620 5
#> 10 -0.3731517 -1.7023895 1.54708600 5
#> 1.1 1.0000000 -0.4024641 0.09811391 6
#> 6.1 -0.9481249 0.8533950 -1.53366661 6
#> 7.1 0.3808644 0.3585276 -0.71283679 6
#> 1.2 1.0000000 -0.4024641 0.09811391 7
#> 6.2 -0.9481249 0.8533950 -1.53366661 7
#> 7.2 0.3808644 0.3585276 -0.71283679 7
#> 2 -2.0273286 1.3327415 0.27081692 8
#> 9 -0.5036722 1.6237297 0.24582474 8
#> 5.1 0.3506016 -0.2281963 -0.65197620 9
#> 10.1 -0.3731517 -1.7023895 1.54708600 9
#> 3.2 -2.1508496 -0.1699301 0.38173893 10
dsample(d,x+z~id|x>0,size=5)
#> x z id
#> 5 0.3506016 -0.2281963 1
#> 1 1.0000000 -0.4024641 2
#> 7 0.3808644 0.3585276 2
#> 5.1 0.3506016 -0.2281963 3
#> 5.2 0.3506016 -0.2281963 4
#> 5.3 0.3506016 -0.2281963 5
