Sample blockwise from clustered data
blocksample(data, size, idvar = NULL, replace = TRUE, ...)
Data frame
Size of samples
Column defining the clusters
Logical indicating wether to sample with replacement
additional arguments to lower level functions
data.frame
Original id is stored in the attribute 'id'
d <- data.frame(x=rnorm(5), z=rnorm(5), id=c(4,10,10,5,5), v=rnorm(5))
(dd <- blocksample(d,size=20,~id))
#> x z id v
#> 2 -0.1860536 0.7643334 1 -0.4191347
#> 3 -1.4318887 -0.1859843 1 -0.7272482
#> 1 1.6798539 -0.1808598 2 0.7690749
#> 1.1 1.6798539 -0.1808598 3 0.7690749
#> 1.2 1.6798539 -0.1808598 4 0.7690749
#> 2.1 -0.1860536 0.7643334 5 -0.4191347
#> 3.1 -1.4318887 -0.1859843 5 -0.7272482
#> 1.3 1.6798539 -0.1808598 6 0.7690749
#> 2.2 -0.1860536 0.7643334 7 -0.4191347
#> 3.2 -1.4318887 -0.1859843 7 -0.7272482
#> 1.4 1.6798539 -0.1808598 8 0.7690749
#> 4 1.0531112 2.1390011 9 0.3442342
#> 5 -1.7536511 -1.8835131 9 0.1622619
#> 4.1 1.0531112 2.1390011 10 0.3442342
#> 5.1 -1.7536511 -1.8835131 10 0.1622619
#> 4.2 1.0531112 2.1390011 11 0.3442342
#> 5.2 -1.7536511 -1.8835131 11 0.1622619
#> 2.3 -0.1860536 0.7643334 12 -0.4191347
#> 3.3 -1.4318887 -0.1859843 12 -0.7272482
#> 1.5 1.6798539 -0.1808598 13 0.7690749
#> 1.6 1.6798539 -0.1808598 14 0.7690749
#> 1.7 1.6798539 -0.1808598 15 0.7690749
#> 4.3 1.0531112 2.1390011 16 0.3442342
#> 5.3 -1.7536511 -1.8835131 16 0.1622619
#> 4.4 1.0531112 2.1390011 17 0.3442342
#> 5.4 -1.7536511 -1.8835131 17 0.1622619
#> 2.4 -0.1860536 0.7643334 18 -0.4191347
#> 3.4 -1.4318887 -0.1859843 18 -0.7272482
#> 4.5 1.0531112 2.1390011 19 0.3442342
#> 5.5 -1.7536511 -1.8835131 19 0.1622619
#> 2.5 -0.1860536 0.7643334 20 -0.4191347
#> 3.5 -1.4318887 -0.1859843 20 -0.7272482
attributes(dd)$id
#> [1] 10 10 4 4 4 10 10 4 10 10 4 5 5 5 5 5 5 10 10 4 4 4 5 5 5
#> [26] 5 10 10 5 5 10 10
if (FALSE) {
blocksample(data.table::data.table(d),1e6,~id)
}
d <- data.frame(x=c(1,rnorm(9)),
z=rnorm(10),
id=c(4,10,10,5,5,4,4,5,10,5),
id2=c(1,1,2,1,2,1,1,1,1,2),
v=rnorm(10))
dsample(d,~id, size=2)
#> x z id id2 v id.1
#> 1 1.0000000 -0.64228668 4 1 0.05817816 1
#> 6 1.3780091 -0.43690763 4 1 1.45718384 1
#> 7 1.0934867 0.02556918 4 1 -0.09948385 1
#> 2 0.7661782 0.28146337 10 1 -1.11519023 2
#> 3 1.0703792 0.85726708 10 2 1.23138876 2
#> 9 0.6452734 -0.23604570 10 1 -1.61353377 2
dsample(d,.~id+id2)
#> x z v id
#> 5 1.0928953 0.76328170 0.35033757 1
#> 10 0.7771008 1.19918159 -0.40722640 1
#> 1 1.0000000 -0.64228668 0.05817816 2
#> 6 1.3780091 -0.43690763 1.45718384 2
#> 7 1.0934867 0.02556918 -0.09948385 2
#> 4 -0.2381451 0.77353186 -0.51109310 3
#> 8 -0.1986250 -0.46409760 -0.38706264 3
#> 5.1 1.0928953 0.76328170 0.35033757 4
#> 10.1 0.7771008 1.19918159 -0.40722640 4
#> 1.1 1.0000000 -0.64228668 0.05817816 5
#> 6.1 1.3780091 -0.43690763 1.45718384 5
#> 7.1 1.0934867 0.02556918 -0.09948385 5
#> 1.2 1.0000000 -0.64228668 0.05817816 6
#> 6.2 1.3780091 -0.43690763 1.45718384 6
#> 7.2 1.0934867 0.02556918 -0.09948385 6
#> 3 1.0703792 0.85726708 1.23138876 7
#> 5.2 1.0928953 0.76328170 0.35033757 8
#> 10.2 0.7771008 1.19918159 -0.40722640 8
#> 4.1 -0.2381451 0.77353186 -0.51109310 9
#> 8.1 -0.1986250 -0.46409760 -0.38706264 9
#> 4.2 -0.2381451 0.77353186 -0.51109310 10
#> 8.2 -0.1986250 -0.46409760 -0.38706264 10
dsample(d,x+z~id|x>0,size=5)
#> x z id
#> 5 1.0928953 0.76328170 1
#> 10 0.7771008 1.19918159 1
#> 2 0.7661782 0.28146337 2
#> 3 1.0703792 0.85726708 2
#> 9 0.6452734 -0.23604570 2
#> 1 1.0000000 -0.64228668 3
#> 6 1.3780091 -0.43690763 3
#> 7 1.0934867 0.02556918 3
#> 1.1 1.0000000 -0.64228668 4
#> 6.1 1.3780091 -0.43690763 4
#> 7.1 1.0934867 0.02556918 4
#> 1.2 1.0000000 -0.64228668 5
#> 6.2 1.3780091 -0.43690763 5
#> 7.2 1.0934867 0.02556918 5