Skip to contents

Sample blockwise from clustered data

Usage

blocksample(data, size, idvar = NULL, replace = TRUE, ...)

Arguments

data

Data frame

size

Size of samples

idvar

Column defining the clusters

replace

Logical indicating wether to sample with replacement

...

additional arguments to lower level functions

Value

data.frame

Details

Original id is stored in the attribute 'id'

Author

Klaus K. Holst

Examples


d <- data.frame(x=rnorm(5), z=rnorm(5), id=c(4,10,10,5,5), v=rnorm(5))
(dd <- blocksample(d,size=20,~id))
#>             x          z id          v
#> 4   -1.343691  0.8570311  1  0.7443112
#> 5   -1.048993  0.1789570  1 -0.3518537
#> 4.1 -1.343691  0.8570311  2  0.7443112
#> 5.1 -1.048993  0.1789570  2 -0.3518537
#> 2    1.463933 -1.0927775  3  0.4502932
#> 3    1.568890 -0.1499354  3  0.6133362
#> 2.1  1.463933 -1.0927775  4  0.4502932
#> 3.1  1.568890 -0.1499354  4  0.6133362
#> 1   -2.260427  0.1930874  5  0.8026301
#> 1.1 -2.260427  0.1930874  6  0.8026301
#> 2.2  1.463933 -1.0927775  7  0.4502932
#> 3.2  1.568890 -0.1499354  7  0.6133362
#> 4.2 -1.343691  0.8570311  8  0.7443112
#> 5.2 -1.048993  0.1789570  8 -0.3518537
#> 4.3 -1.343691  0.8570311  9  0.7443112
#> 5.3 -1.048993  0.1789570  9 -0.3518537
#> 4.4 -1.343691  0.8570311 10  0.7443112
#> 5.4 -1.048993  0.1789570 10 -0.3518537
#> 1.2 -2.260427  0.1930874 11  0.8026301
#> 1.3 -2.260427  0.1930874 12  0.8026301
#> 4.5 -1.343691  0.8570311 13  0.7443112
#> 5.5 -1.048993  0.1789570 13 -0.3518537
#> 2.3  1.463933 -1.0927775 14  0.4502932
#> 3.3  1.568890 -0.1499354 14  0.6133362
#> 1.4 -2.260427  0.1930874 15  0.8026301
#> 1.5 -2.260427  0.1930874 16  0.8026301
#> 4.6 -1.343691  0.8570311 17  0.7443112
#> 5.6 -1.048993  0.1789570 17 -0.3518537
#> 1.6 -2.260427  0.1930874 18  0.8026301
#> 1.7 -2.260427  0.1930874 19  0.8026301
#> 4.7 -1.343691  0.8570311 20  0.7443112
#> 5.7 -1.048993  0.1789570 20 -0.3518537
attributes(dd)$id
#>  [1]  5  5  5  5 10 10 10 10  4  4 10 10  5  5  5  5  5  5  4  4  5  5 10 10  4
#> [26]  4  5  5  4  4  5  5

if (FALSE) { # \dontrun{
blocksample(data.table::data.table(d),1e6,~id)
} # }


d <- data.frame(x=c(1,rnorm(9)),
               z=rnorm(10),
               id=c(4,10,10,5,5,4,4,5,10,5),
               id2=c(1,1,2,1,2,1,1,1,1,2),
               v=rnorm(10))
dsample(d,~id, size=2)
#>              x          z id id2           v id.1
#> 1   1.00000000 -2.0086151  4   1  0.92172701    1
#> 6  -0.09405042 -0.5680621  4   1  0.01204515    1
#> 7  -0.14746874 -0.2470949  4   1  1.18385699    1
#> 4   3.08775057 -0.4998917  5   1 -0.85054942    2
#> 5   1.58046816 -0.4244225  5   2  2.09775398    2
#> 8  -0.74460167  0.9049383  5   1  0.78316393    2
#> 10 -1.09740779  0.8278474  5   2  0.22017679    2
dsample(d,.~id+id2)
#>                x          z           v id
#> 2    -0.45626536  0.9083831 -0.34610735  1
#> 9    -0.84850307  1.1791014  1.31676742  1
#> 5     1.58046816 -0.4244225  2.09775398  2
#> 10   -1.09740779  0.8278474  0.22017679  2
#> 5.1   1.58046816 -0.4244225  2.09775398  3
#> 10.1 -1.09740779  0.8278474  0.22017679  3
#> 3    -0.53259681 -0.1478809 -0.70094140  4
#> 1     1.00000000 -2.0086151  0.92172701  5
#> 6    -0.09405042 -0.5680621  0.01204515  5
#> 7    -0.14746874 -0.2470949  1.18385699  5
#> 2.1  -0.45626536  0.9083831 -0.34610735  6
#> 9.1  -0.84850307  1.1791014  1.31676742  6
#> 3.1  -0.53259681 -0.1478809 -0.70094140  7
#> 1.1   1.00000000 -2.0086151  0.92172701  8
#> 6.1  -0.09405042 -0.5680621  0.01204515  8
#> 7.1  -0.14746874 -0.2470949  1.18385699  8
#> 3.2  -0.53259681 -0.1478809 -0.70094140  9
#> 4     3.08775057 -0.4998917 -0.85054942 10
#> 8    -0.74460167  0.9049383  0.78316393 10
dsample(d,x+z~id|x>0,size=5)
#>            x          z id
#> 1   1.000000 -2.0086151  1
#> 4   3.087751 -0.4998917  2
#> 5   1.580468 -0.4244225  2
#> 4.1 3.087751 -0.4998917  3
#> 5.1 1.580468 -0.4244225  3
#> 1.1 1.000000 -2.0086151  4
#> 4.2 3.087751 -0.4998917  5
#> 5.2 1.580468 -0.4244225  5