Sample blockwise from clustered data
blocksample(data, size, idvar = NULL, replace = TRUE, ...)
data.frame
Original id is stored in the attribute 'id'
d <- data.frame(x=rnorm(5), z=rnorm(5), id=c(4,10,10,5,5), v=rnorm(5))
(dd <- blocksample(d,size=20,~id))
#> x z id v
#> 1 1.2846635 -1.2486962 1 0.6486553
#> 1.1 1.2846635 -1.2486962 2 0.6486553
#> 1.2 1.2846635 -1.2486962 3 0.6486553
#> 2 0.2623388 1.2735745 4 -0.7044930
#> 3 0.7316727 -0.4036855 4 1.1835723
#> 4 -1.2000197 0.1349840 5 -1.4012900
#> 5 0.2727637 -1.1101618 5 -0.1566187
#> 1.3 1.2846635 -1.2486962 6 0.6486553
#> 1.4 1.2846635 -1.2486962 7 0.6486553
#> 4.1 -1.2000197 0.1349840 8 -1.4012900
#> 5.1 0.2727637 -1.1101618 8 -0.1566187
#> 1.5 1.2846635 -1.2486962 9 0.6486553
#> 4.2 -1.2000197 0.1349840 10 -1.4012900
#> 5.2 0.2727637 -1.1101618 10 -0.1566187
#> 4.3 -1.2000197 0.1349840 11 -1.4012900
#> 5.3 0.2727637 -1.1101618 11 -0.1566187
#> 4.4 -1.2000197 0.1349840 12 -1.4012900
#> 5.4 0.2727637 -1.1101618 12 -0.1566187
#> 1.6 1.2846635 -1.2486962 13 0.6486553
#> 4.5 -1.2000197 0.1349840 14 -1.4012900
#> 5.5 0.2727637 -1.1101618 14 -0.1566187
#> 4.6 -1.2000197 0.1349840 15 -1.4012900
#> 5.6 0.2727637 -1.1101618 15 -0.1566187
#> 2.1 0.2623388 1.2735745 16 -0.7044930
#> 3.1 0.7316727 -0.4036855 16 1.1835723
#> 1.7 1.2846635 -1.2486962 17 0.6486553
#> 4.7 -1.2000197 0.1349840 18 -1.4012900
#> 5.7 0.2727637 -1.1101618 18 -0.1566187
#> 2.2 0.2623388 1.2735745 19 -0.7044930
#> 3.2 0.7316727 -0.4036855 19 1.1835723
#> 1.8 1.2846635 -1.2486962 20 0.6486553
attributes(dd)$id
#> [1] 4 4 4 10 10 5 5 4 4 5 5 4 5 5 5 5 5 5 4 5 5 5 5 10 10
#> [26] 4 5 5 10 10 4
if (FALSE) { # \dontrun{
blocksample(data.table::data.table(d),1e6,~id)
} # }
d <- data.frame(x=c(1,rnorm(9)),
z=rnorm(10),
id=c(4,10,10,5,5,4,4,5,10,5),
id2=c(1,1,2,1,2,1,1,1,1,2),
v=rnorm(10))
dsample(d,~id, size=2)
#> x z id id2 v id.1
#> 2 -1.0032037 0.62589483 10 1 0.5041811 1
#> 3 -0.7825649 -0.83353084 10 2 -0.1187519 1
#> 9 0.6801000 -0.76067696 10 1 -0.5918016 1
#> 1 1.0000000 -0.09739721 4 1 -0.4282120 2
#> 6 0.8396141 -0.70526662 4 1 0.1479184 2
#> 7 -2.1060205 -0.45457954 4 1 -1.1093351 2
dsample(d,.~id+id2)
#> x z v id
#> 5 -0.51484661 0.6825645 0.8557416 1
#> 10 -0.06015005 0.6603094 0.3145779 1
#> 3 -0.78256491 -0.8335308 -0.1187519 2
#> 4 -0.13370571 0.4886833 0.6540485 3
#> 8 0.10607163 0.0637473 -1.1253394 3
#> 2 -1.00320368 0.6258948 0.5041811 4
#> 9 0.68010004 -0.7606770 -0.5918016 4
#> 2.1 -1.00320368 0.6258948 0.5041811 5
#> 9.1 0.68010004 -0.7606770 -0.5918016 5
#> 3.1 -0.78256491 -0.8335308 -0.1187519 6
#> 5.1 -0.51484661 0.6825645 0.8557416 7
#> 10.1 -0.06015005 0.6603094 0.3145779 7
#> 5.2 -0.51484661 0.6825645 0.8557416 8
#> 10.2 -0.06015005 0.6603094 0.3145779 8
#> 4.1 -0.13370571 0.4886833 0.6540485 9
#> 8.1 0.10607163 0.0637473 -1.1253394 9
#> 2.2 -1.00320368 0.6258948 0.5041811 10
#> 9.2 0.68010004 -0.7606770 -0.5918016 10
dsample(d,x+z~id|x>0,size=5)
#> x z id
#> 1 1.0000000 -0.09739721 1
#> 6 0.8396141 -0.70526662 1
#> 1.1 1.0000000 -0.09739721 2
#> 6.1 0.8396141 -0.70526662 2
#> 1.2 1.0000000 -0.09739721 3
#> 6.2 0.8396141 -0.70526662 3
#> 8 0.1060716 0.06374730 4
#> 8.1 0.1060716 0.06374730 5