Block sampling — blocksample • mets

Sample blockwise from clustered data

blocksample(data, size, idvar = NULL, replace = TRUE, ...)

Arguments

data: Data frame
size: Size of samples
idvar: Column defining the clusters
replace: Logical indicating wether to sample with replacement
...: additional arguments to lower level functions

Value

data.frame

Details

Original id is stored in the attribute 'id'

Author

Klaus K. Holst

Examples


d <- data.frame(x=rnorm(5), z=rnorm(5), id=c(4,10,10,5,5), v=rnorm(5))
(dd <- blocksample(d,size=20,~id))
#>              x          z id          v
#> 1    1.2846635 -1.2486962  1  0.6486553
#> 1.1  1.2846635 -1.2486962  2  0.6486553
#> 1.2  1.2846635 -1.2486962  3  0.6486553
#> 2    0.2623388  1.2735745  4 -0.7044930
#> 3    0.7316727 -0.4036855  4  1.1835723
#> 4   -1.2000197  0.1349840  5 -1.4012900
#> 5    0.2727637 -1.1101618  5 -0.1566187
#> 1.3  1.2846635 -1.2486962  6  0.6486553
#> 1.4  1.2846635 -1.2486962  7  0.6486553
#> 4.1 -1.2000197  0.1349840  8 -1.4012900
#> 5.1  0.2727637 -1.1101618  8 -0.1566187
#> 1.5  1.2846635 -1.2486962  9  0.6486553
#> 4.2 -1.2000197  0.1349840 10 -1.4012900
#> 5.2  0.2727637 -1.1101618 10 -0.1566187
#> 4.3 -1.2000197  0.1349840 11 -1.4012900
#> 5.3  0.2727637 -1.1101618 11 -0.1566187
#> 4.4 -1.2000197  0.1349840 12 -1.4012900
#> 5.4  0.2727637 -1.1101618 12 -0.1566187
#> 1.6  1.2846635 -1.2486962 13  0.6486553
#> 4.5 -1.2000197  0.1349840 14 -1.4012900
#> 5.5  0.2727637 -1.1101618 14 -0.1566187
#> 4.6 -1.2000197  0.1349840 15 -1.4012900
#> 5.6  0.2727637 -1.1101618 15 -0.1566187
#> 2.1  0.2623388  1.2735745 16 -0.7044930
#> 3.1  0.7316727 -0.4036855 16  1.1835723
#> 1.7  1.2846635 -1.2486962 17  0.6486553
#> 4.7 -1.2000197  0.1349840 18 -1.4012900
#> 5.7  0.2727637 -1.1101618 18 -0.1566187
#> 2.2  0.2623388  1.2735745 19 -0.7044930
#> 3.2  0.7316727 -0.4036855 19  1.1835723
#> 1.8  1.2846635 -1.2486962 20  0.6486553
attributes(dd)$id
#>  [1]  4  4  4 10 10  5  5  4  4  5  5  4  5  5  5  5  5  5  4  5  5  5  5 10 10
#> [26]  4  5  5 10 10  4

if (FALSE) { # \dontrun{
blocksample(data.table::data.table(d),1e6,~id)
} # }


d <- data.frame(x=c(1,rnorm(9)),
               z=rnorm(10),
               id=c(4,10,10,5,5,4,4,5,10,5),
               id2=c(1,1,2,1,2,1,1,1,1,2),
               v=rnorm(10))
dsample(d,~id, size=2)
#>            x           z id id2          v id.1
#> 2 -1.0032037  0.62589483 10   1  0.5041811    1
#> 3 -0.7825649 -0.83353084 10   2 -0.1187519    1
#> 9  0.6801000 -0.76067696 10   1 -0.5918016    1
#> 1  1.0000000 -0.09739721  4   1 -0.4282120    2
#> 6  0.8396141 -0.70526662  4   1  0.1479184    2
#> 7 -2.1060205 -0.45457954  4   1 -1.1093351    2
dsample(d,.~id+id2)
#>                x          z          v id
#> 5    -0.51484661  0.6825645  0.8557416  1
#> 10   -0.06015005  0.6603094  0.3145779  1
#> 3    -0.78256491 -0.8335308 -0.1187519  2
#> 4    -0.13370571  0.4886833  0.6540485  3
#> 8     0.10607163  0.0637473 -1.1253394  3
#> 2    -1.00320368  0.6258948  0.5041811  4
#> 9     0.68010004 -0.7606770 -0.5918016  4
#> 2.1  -1.00320368  0.6258948  0.5041811  5
#> 9.1   0.68010004 -0.7606770 -0.5918016  5
#> 3.1  -0.78256491 -0.8335308 -0.1187519  6
#> 5.1  -0.51484661  0.6825645  0.8557416  7
#> 10.1 -0.06015005  0.6603094  0.3145779  7
#> 5.2  -0.51484661  0.6825645  0.8557416  8
#> 10.2 -0.06015005  0.6603094  0.3145779  8
#> 4.1  -0.13370571  0.4886833  0.6540485  9
#> 8.1   0.10607163  0.0637473 -1.1253394  9
#> 2.2  -1.00320368  0.6258948  0.5041811 10
#> 9.2   0.68010004 -0.7606770 -0.5918016 10
dsample(d,x+z~id|x>0,size=5)
#>             x           z id
#> 1   1.0000000 -0.09739721  1
#> 6   0.8396141 -0.70526662  1
#> 1.1 1.0000000 -0.09739721  2
#> 6.1 0.8396141 -0.70526662  2
#> 1.2 1.0000000 -0.09739721  3
#> 6.2 0.8396141 -0.70526662  3
#> 8   0.1060716  0.06374730  4
#> 8.1 0.1060716  0.06374730  5