Sample blockwise from clustered data

blocksample(data, size, idvar = NULL, replace = TRUE, ...)

Arguments

data

Data frame

size

Size of samples

idvar

Column defining the clusters

replace

Logical indicating wether to sample with replacement

...

additional arguments to lower level functions

Value

data.frame

Details

Original id is stored in the attribute 'id'

Author

Klaus K. Holst

Examples


d <- data.frame(x=rnorm(5), z=rnorm(5), id=c(4,10,10,5,5), v=rnorm(5))
(dd <- blocksample(d,size=20,~id))
#>              x          z id          v
#> 1    1.2846635 -1.2486962  1  0.6486553
#> 1.1  1.2846635 -1.2486962  2  0.6486553
#> 1.2  1.2846635 -1.2486962  3  0.6486553
#> 2    0.2623388  1.2735745  4 -0.7044930
#> 3    0.7316727 -0.4036855  4  1.1835723
#> 4   -1.2000197  0.1349840  5 -1.4012900
#> 5    0.2727637 -1.1101618  5 -0.1566187
#> 1.3  1.2846635 -1.2486962  6  0.6486553
#> 1.4  1.2846635 -1.2486962  7  0.6486553
#> 4.1 -1.2000197  0.1349840  8 -1.4012900
#> 5.1  0.2727637 -1.1101618  8 -0.1566187
#> 1.5  1.2846635 -1.2486962  9  0.6486553
#> 4.2 -1.2000197  0.1349840 10 -1.4012900
#> 5.2  0.2727637 -1.1101618 10 -0.1566187
#> 4.3 -1.2000197  0.1349840 11 -1.4012900
#> 5.3  0.2727637 -1.1101618 11 -0.1566187
#> 4.4 -1.2000197  0.1349840 12 -1.4012900
#> 5.4  0.2727637 -1.1101618 12 -0.1566187
#> 1.6  1.2846635 -1.2486962 13  0.6486553
#> 4.5 -1.2000197  0.1349840 14 -1.4012900
#> 5.5  0.2727637 -1.1101618 14 -0.1566187
#> 4.6 -1.2000197  0.1349840 15 -1.4012900
#> 5.6  0.2727637 -1.1101618 15 -0.1566187
#> 2.1  0.2623388  1.2735745 16 -0.7044930
#> 3.1  0.7316727 -0.4036855 16  1.1835723
#> 1.7  1.2846635 -1.2486962 17  0.6486553
#> 4.7 -1.2000197  0.1349840 18 -1.4012900
#> 5.7  0.2727637 -1.1101618 18 -0.1566187
#> 2.2  0.2623388  1.2735745 19 -0.7044930
#> 3.2  0.7316727 -0.4036855 19  1.1835723
#> 1.8  1.2846635 -1.2486962 20  0.6486553
attributes(dd)$id
#>  [1]  4  4  4 10 10  5  5  4  4  5  5  4  5  5  5  5  5  5  4  5  5  5  5 10 10
#> [26]  4  5  5 10 10  4

if (FALSE) { # \dontrun{
blocksample(data.table::data.table(d),1e6,~id)
} # }


d <- data.frame(x=c(1,rnorm(9)),
               z=rnorm(10),
               id=c(4,10,10,5,5,4,4,5,10,5),
               id2=c(1,1,2,1,2,1,1,1,1,2),
               v=rnorm(10))
dsample(d,~id, size=2)
#>            x           z id id2          v id.1
#> 2 -1.0032037  0.62589483 10   1  0.5041811    1
#> 3 -0.7825649 -0.83353084 10   2 -0.1187519    1
#> 9  0.6801000 -0.76067696 10   1 -0.5918016    1
#> 1  1.0000000 -0.09739721  4   1 -0.4282120    2
#> 6  0.8396141 -0.70526662  4   1  0.1479184    2
#> 7 -2.1060205 -0.45457954  4   1 -1.1093351    2
dsample(d,.~id+id2)
#>                x          z          v id
#> 5    -0.51484661  0.6825645  0.8557416  1
#> 10   -0.06015005  0.6603094  0.3145779  1
#> 3    -0.78256491 -0.8335308 -0.1187519  2
#> 4    -0.13370571  0.4886833  0.6540485  3
#> 8     0.10607163  0.0637473 -1.1253394  3
#> 2    -1.00320368  0.6258948  0.5041811  4
#> 9     0.68010004 -0.7606770 -0.5918016  4
#> 2.1  -1.00320368  0.6258948  0.5041811  5
#> 9.1   0.68010004 -0.7606770 -0.5918016  5
#> 3.1  -0.78256491 -0.8335308 -0.1187519  6
#> 5.1  -0.51484661  0.6825645  0.8557416  7
#> 10.1 -0.06015005  0.6603094  0.3145779  7
#> 5.2  -0.51484661  0.6825645  0.8557416  8
#> 10.2 -0.06015005  0.6603094  0.3145779  8
#> 4.1  -0.13370571  0.4886833  0.6540485  9
#> 8.1   0.10607163  0.0637473 -1.1253394  9
#> 2.2  -1.00320368  0.6258948  0.5041811 10
#> 9.2   0.68010004 -0.7606770 -0.5918016 10
dsample(d,x+z~id|x>0,size=5)
#>             x           z id
#> 1   1.0000000 -0.09739721  1
#> 6   0.8396141 -0.70526662  1
#> 1.1 1.0000000 -0.09739721  2
#> 6.1 0.8396141 -0.70526662  2
#> 1.2 1.0000000 -0.09739721  3
#> 6.2 0.8396141 -0.70526662  3
#> 8   0.1060716  0.06374730  4
#> 8.1 0.1060716  0.06374730  5