Skip to contents

Sample blockwise from clustered data

Usage

blocksample(data, size, idvar = NULL, replace = TRUE, ...)

Arguments

data

Data frame

size

Size of samples

idvar

Column defining the clusters

replace

Logical indicating wether to sample with replacement

...

additional arguments to lower level functions

Value

data.frame

Details

Original id is stored in the attribute 'id'

Author

Klaus K. Holst

Examples


d <- data.frame(x=rnorm(5), z=rnorm(5), id=c(4,10,10,5,5), v=rnorm(5))
(dd <- blocksample(d,size=20,~id))
#>              x          z id           v
#> 4    0.4821452 -0.1545095  1 -1.53337315
#> 5    0.3959834 -1.0453677  1  0.01150068
#> 1   -0.7825994  0.6715858  2  1.14649089
#> 1.1 -0.7825994  0.6715858  3  1.14649089
#> 2   -0.7561844  2.2199949  4  0.48228630
#> 3   -0.8763060  0.5204616  4  1.26038336
#> 2.1 -0.7561844  2.2199949  5  0.48228630
#> 3.1 -0.8763060  0.5204616  5  1.26038336
#> 2.2 -0.7561844  2.2199949  6  0.48228630
#> 3.2 -0.8763060  0.5204616  6  1.26038336
#> 4.1  0.4821452 -0.1545095  7 -1.53337315
#> 5.1  0.3959834 -1.0453677  7  0.01150068
#> 2.3 -0.7561844  2.2199949  8  0.48228630
#> 3.3 -0.8763060  0.5204616  8  1.26038336
#> 4.2  0.4821452 -0.1545095  9 -1.53337315
#> 5.2  0.3959834 -1.0453677  9  0.01150068
#> 1.2 -0.7825994  0.6715858 10  1.14649089
#> 2.4 -0.7561844  2.2199949 11  0.48228630
#> 3.4 -0.8763060  0.5204616 11  1.26038336
#> 1.3 -0.7825994  0.6715858 12  1.14649089
#> 2.5 -0.7561844  2.2199949 13  0.48228630
#> 3.5 -0.8763060  0.5204616 13  1.26038336
#> 4.3  0.4821452 -0.1545095 14 -1.53337315
#> 5.3  0.3959834 -1.0453677 14  0.01150068
#> 1.4 -0.7825994  0.6715858 15  1.14649089
#> 1.5 -0.7825994  0.6715858 16  1.14649089
#> 4.4  0.4821452 -0.1545095 17 -1.53337315
#> 5.4  0.3959834 -1.0453677 17  0.01150068
#> 4.5  0.4821452 -0.1545095 18 -1.53337315
#> 5.5  0.3959834 -1.0453677 18  0.01150068
#> 4.6  0.4821452 -0.1545095 19 -1.53337315
#> 5.6  0.3959834 -1.0453677 19  0.01150068
#> 1.6 -0.7825994  0.6715858 20  1.14649089
attributes(dd)$id
#>  [1]  5  5  4  4 10 10 10 10 10 10  5  5 10 10  5  5  4 10 10  4 10 10  5  5  4
#> [26]  4  5  5  5  5  5  5  4

if (FALSE) { # \dontrun{
blocksample(data.table::data.table(d),1e6,~id)
} # }


d <- data.frame(x=c(1,rnorm(9)),
               z=rnorm(10),
               id=c(4,10,10,5,5,4,4,5,10,5),
               id2=c(1,1,2,1,2,1,1,1,1,2),
               v=rnorm(10))
dsample(d,~id, size=2)
#>             x          z id id2          v id.1
#> 1   1.0000000 -0.8086293  4   1  0.3390736    1
#> 6   0.5714767  1.2783414  4   1 -0.3637973    1
#> 7   0.1107420 -1.4864846  4   1 -0.6838435    1
#> 1.1 1.0000000 -0.8086293  4   1  0.3390736    2
#> 6.1 0.5714767  1.2783414  4   1 -0.3637973    2
#> 7.1 0.1107420 -1.4864846  4   1 -0.6838435    2
dsample(d,.~id+id2)
#>              x          z          v id
#> 3    0.7926693 -0.2475841 -0.1276268  1
#> 1    1.0000000 -0.8086293  0.3390736  2
#> 6    0.5714767  1.2783414 -0.3637973  2
#> 7    0.1107420 -1.4864846 -0.6838435  2
#> 4    0.8491297  1.6862562 -1.9193590  3
#> 8   -1.3252587  2.0680256  0.5901875  3
#> 4.1  0.8491297  1.6862562 -1.9193590  4
#> 8.1 -1.3252587  2.0680256  0.5901875  4
#> 1.1  1.0000000 -0.8086293  0.3390736  5
#> 6.1  0.5714767  1.2783414 -0.3637973  5
#> 7.1  0.1107420 -1.4864846 -0.6838435  5
#> 4.2  0.8491297  1.6862562 -1.9193590  6
#> 8.2 -1.3252587  2.0680256  0.5901875  6
#> 1.2  1.0000000 -0.8086293  0.3390736  7
#> 6.2  0.5714767  1.2783414 -0.3637973  7
#> 7.2  0.1107420 -1.4864846 -0.6838435  7
#> 2    2.2024418 -0.0356119 -1.0427741  8
#> 9   -1.1820866  0.8510684  1.2092282  8
#> 5    1.9451423 -0.2893210 -0.3585603  9
#> 10  -0.2516282  0.8101096  0.3914163  9
#> 2.1  2.2024418 -0.0356119 -1.0427741 10
#> 9.1 -1.1820866  0.8510684  1.2092282 10
dsample(d,x+z~id|x>0,size=5)
#>             x          z id
#> 2   2.2024418 -0.0356119  1
#> 3   0.7926693 -0.2475841  1
#> 1   1.0000000 -0.8086293  2
#> 6   0.5714767  1.2783414  2
#> 7   0.1107420 -1.4864846  2
#> 2.1 2.2024418 -0.0356119  3
#> 3.1 0.7926693 -0.2475841  3
#> 1.1 1.0000000 -0.8086293  4
#> 6.1 0.5714767  1.2783414  4
#> 7.1 0.1107420 -1.4864846  4
#> 2.2 2.2024418 -0.0356119  5
#> 3.2 0.7926693 -0.2475841  5