Sample blockwise from clustered data
Examples
d <- data.frame(x=rnorm(5), z=rnorm(5), id=c(4,10,10,5,5), v=rnorm(5))
(dd <- blocksample(d,size=20,~id))
#> x z id v
#> 4 -1.343691 0.8570311 1 0.7443112
#> 5 -1.048993 0.1789570 1 -0.3518537
#> 4.1 -1.343691 0.8570311 2 0.7443112
#> 5.1 -1.048993 0.1789570 2 -0.3518537
#> 2 1.463933 -1.0927775 3 0.4502932
#> 3 1.568890 -0.1499354 3 0.6133362
#> 2.1 1.463933 -1.0927775 4 0.4502932
#> 3.1 1.568890 -0.1499354 4 0.6133362
#> 1 -2.260427 0.1930874 5 0.8026301
#> 1.1 -2.260427 0.1930874 6 0.8026301
#> 2.2 1.463933 -1.0927775 7 0.4502932
#> 3.2 1.568890 -0.1499354 7 0.6133362
#> 4.2 -1.343691 0.8570311 8 0.7443112
#> 5.2 -1.048993 0.1789570 8 -0.3518537
#> 4.3 -1.343691 0.8570311 9 0.7443112
#> 5.3 -1.048993 0.1789570 9 -0.3518537
#> 4.4 -1.343691 0.8570311 10 0.7443112
#> 5.4 -1.048993 0.1789570 10 -0.3518537
#> 1.2 -2.260427 0.1930874 11 0.8026301
#> 1.3 -2.260427 0.1930874 12 0.8026301
#> 4.5 -1.343691 0.8570311 13 0.7443112
#> 5.5 -1.048993 0.1789570 13 -0.3518537
#> 2.3 1.463933 -1.0927775 14 0.4502932
#> 3.3 1.568890 -0.1499354 14 0.6133362
#> 1.4 -2.260427 0.1930874 15 0.8026301
#> 1.5 -2.260427 0.1930874 16 0.8026301
#> 4.6 -1.343691 0.8570311 17 0.7443112
#> 5.6 -1.048993 0.1789570 17 -0.3518537
#> 1.6 -2.260427 0.1930874 18 0.8026301
#> 1.7 -2.260427 0.1930874 19 0.8026301
#> 4.7 -1.343691 0.8570311 20 0.7443112
#> 5.7 -1.048993 0.1789570 20 -0.3518537
attributes(dd)$id
#> [1] 5 5 5 5 10 10 10 10 4 4 10 10 5 5 5 5 5 5 4 4 5 5 10 10 4
#> [26] 4 5 5 4 4 5 5
if (FALSE) { # \dontrun{
blocksample(data.table::data.table(d),1e6,~id)
} # }
d <- data.frame(x=c(1,rnorm(9)),
z=rnorm(10),
id=c(4,10,10,5,5,4,4,5,10,5),
id2=c(1,1,2,1,2,1,1,1,1,2),
v=rnorm(10))
dsample(d,~id, size=2)
#> x z id id2 v id.1
#> 1 1.00000000 -2.0086151 4 1 0.92172701 1
#> 6 -0.09405042 -0.5680621 4 1 0.01204515 1
#> 7 -0.14746874 -0.2470949 4 1 1.18385699 1
#> 4 3.08775057 -0.4998917 5 1 -0.85054942 2
#> 5 1.58046816 -0.4244225 5 2 2.09775398 2
#> 8 -0.74460167 0.9049383 5 1 0.78316393 2
#> 10 -1.09740779 0.8278474 5 2 0.22017679 2
dsample(d,.~id+id2)
#> x z v id
#> 2 -0.45626536 0.9083831 -0.34610735 1
#> 9 -0.84850307 1.1791014 1.31676742 1
#> 5 1.58046816 -0.4244225 2.09775398 2
#> 10 -1.09740779 0.8278474 0.22017679 2
#> 5.1 1.58046816 -0.4244225 2.09775398 3
#> 10.1 -1.09740779 0.8278474 0.22017679 3
#> 3 -0.53259681 -0.1478809 -0.70094140 4
#> 1 1.00000000 -2.0086151 0.92172701 5
#> 6 -0.09405042 -0.5680621 0.01204515 5
#> 7 -0.14746874 -0.2470949 1.18385699 5
#> 2.1 -0.45626536 0.9083831 -0.34610735 6
#> 9.1 -0.84850307 1.1791014 1.31676742 6
#> 3.1 -0.53259681 -0.1478809 -0.70094140 7
#> 1.1 1.00000000 -2.0086151 0.92172701 8
#> 6.1 -0.09405042 -0.5680621 0.01204515 8
#> 7.1 -0.14746874 -0.2470949 1.18385699 8
#> 3.2 -0.53259681 -0.1478809 -0.70094140 9
#> 4 3.08775057 -0.4998917 -0.85054942 10
#> 8 -0.74460167 0.9049383 0.78316393 10
dsample(d,x+z~id|x>0,size=5)
#> x z id
#> 1 1.000000 -2.0086151 1
#> 4 3.087751 -0.4998917 2
#> 5 1.580468 -0.4244225 2
#> 4.1 3.087751 -0.4998917 3
#> 5.1 1.580468 -0.4244225 3
#> 1.1 1.000000 -2.0086151 4
#> 4.2 3.087751 -0.4998917 5
#> 5.2 1.580468 -0.4244225 5
