Skip to contents

Sample blockwise from clustered data

Usage

blocksample(data, size, idvar = NULL, replace = TRUE, ...)

Arguments

data

Data frame

size

Size of samples

idvar

Column defining the clusters

replace

Logical indicating wether to sample with replacement

...

additional arguments to lower level functions

Value

data.frame

Details

Original id is stored in the attribute 'id'

Author

Klaus K. Holst

Examples


d <- data.frame(x=rnorm(5), z=rnorm(5), id=c(4,10,10,5,5), v=rnorm(5))
(dd <- blocksample(d,size=20,~id))
#>               x            z id          v
#> 1    -0.2098647 -0.966818043  1 -0.2770055
#> 1.1  -0.2098647 -0.966818043  2 -0.2770055
#> 4     0.5540451 -0.001732368  3 -1.3458555
#> 5    -0.5581936 -0.351628450  3 -0.2126497
#> 2    -0.7869657 -2.057152528  4  1.1694029
#> 3     1.3636367  0.305695423  4 -0.9603945
#> 4.1   0.5540451 -0.001732368  5 -1.3458555
#> 5.1  -0.5581936 -0.351628450  5 -0.2126497
#> 2.1  -0.7869657 -2.057152528  6  1.1694029
#> 3.1   1.3636367  0.305695423  6 -0.9603945
#> 1.2  -0.2098647 -0.966818043  7 -0.2770055
#> 2.2  -0.7869657 -2.057152528  8  1.1694029
#> 3.2   1.3636367  0.305695423  8 -0.9603945
#> 4.2   0.5540451 -0.001732368  9 -1.3458555
#> 5.2  -0.5581936 -0.351628450  9 -0.2126497
#> 4.3   0.5540451 -0.001732368 10 -1.3458555
#> 5.3  -0.5581936 -0.351628450 10 -0.2126497
#> 1.3  -0.2098647 -0.966818043 11 -0.2770055
#> 1.4  -0.2098647 -0.966818043 12 -0.2770055
#> 2.3  -0.7869657 -2.057152528 13  1.1694029
#> 3.3   1.3636367  0.305695423 13 -0.9603945
#> 1.5  -0.2098647 -0.966818043 14 -0.2770055
#> 4.4   0.5540451 -0.001732368 15 -1.3458555
#> 5.4  -0.5581936 -0.351628450 15 -0.2126497
#> 1.6  -0.2098647 -0.966818043 16 -0.2770055
#> 1.7  -0.2098647 -0.966818043 17 -0.2770055
#> 1.8  -0.2098647 -0.966818043 18 -0.2770055
#> 1.9  -0.2098647 -0.966818043 19 -0.2770055
#> 1.10 -0.2098647 -0.966818043 20 -0.2770055
attributes(dd)$id
#>  [1]  4  4  5  5 10 10  5  5 10 10  4 10 10  5  5  5  5  4  4 10 10  4  5  5  4
#> [26]  4  4  4  4

if (FALSE) { # \dontrun{
blocksample(data.table::data.table(d),1e6,~id)
} # }


d <- data.frame(x=c(1,rnorm(9)),
               z=rnorm(10),
               id=c(4,10,10,5,5,4,4,5,10,5),
               id2=c(1,1,2,1,2,1,1,1,1,2),
               v=rnorm(10))
dsample(d,~id, size=2)
#>             x          z id id2           v id.1
#> 1   1.0000000 -0.4024641  4   1  0.09811391    1
#> 6  -0.9481249  0.8533950  4   1 -1.53366661    1
#> 7   0.3808644  0.3585276  4   1 -0.71283679    1
#> 4  -0.7826530  0.5446049  5   1  0.35253922    2
#> 5   0.3506016 -0.2281963  5   2 -0.65197620    2
#> 8  -0.3301687 -0.2970404  5   1  0.74729117    2
#> 10 -0.3731517 -1.7023895  5   2  1.54708600    2
dsample(d,.~id+id2)
#>               x          z           v id
#> 3    -2.1508496 -0.1699301  0.38173893  1
#> 1     1.0000000 -0.4024641  0.09811391  2
#> 6    -0.9481249  0.8533950 -1.53366661  2
#> 7     0.3808644  0.3585276 -0.71283679  2
#> 3.1  -2.1508496 -0.1699301  0.38173893  3
#> 4    -0.7826530  0.5446049  0.35253922  4
#> 8    -0.3301687 -0.2970404  0.74729117  4
#> 5     0.3506016 -0.2281963 -0.65197620  5
#> 10   -0.3731517 -1.7023895  1.54708600  5
#> 1.1   1.0000000 -0.4024641  0.09811391  6
#> 6.1  -0.9481249  0.8533950 -1.53366661  6
#> 7.1   0.3808644  0.3585276 -0.71283679  6
#> 1.2   1.0000000 -0.4024641  0.09811391  7
#> 6.2  -0.9481249  0.8533950 -1.53366661  7
#> 7.2   0.3808644  0.3585276 -0.71283679  7
#> 2    -2.0273286  1.3327415  0.27081692  8
#> 9    -0.5036722  1.6237297  0.24582474  8
#> 5.1   0.3506016 -0.2281963 -0.65197620  9
#> 10.1 -0.3731517 -1.7023895  1.54708600  9
#> 3.2  -2.1508496 -0.1699301  0.38173893 10
dsample(d,x+z~id|x>0,size=5)
#>             x          z id
#> 5   0.3506016 -0.2281963  1
#> 1   1.0000000 -0.4024641  2
#> 7   0.3808644  0.3585276  2
#> 5.1 0.3506016 -0.2281963  3
#> 5.2 0.3506016 -0.2281963  4
#> 5.3 0.3506016 -0.2281963  5