Fast reshape/tranpose of data
Usage
fast.reshape(
data,
varying,
id,
num,
sep = "",
keep,
idname = "id",
numname = "num",
factor = FALSE,
idcombine = TRUE,
labelnum = FALSE,
labels,
regex = mets.options()$regex,
dropid = FALSE,
...
)Arguments
- data
data.frame or matrix
- varying
Vector of prefix-names of the time varying variables. Optional for Long->Wide reshaping.
- id
id-variable. If omitted then reshape Wide->Long.
- num
Optional number/time variable
- sep
String seperating prefix-name with number/time
- keep
Vector of column names to keep
- idname
Name of id-variable (Wide->Long)
- numname
Name of number-variable (Wide->Long)
- factor
If true all factors are kept (otherwise treated as character)
- idcombine
If TRUE and
idis vector of several variables, the unique id is combined from all the variables. Otherwise the first variable is only used as identifier.- labelnum
If TRUE varying variables in wide format (going from long->wide) are labeled 1,2,3,... otherwise use 'num' variable. In long-format (going from wide->long) varying variables matching 'varying' prefix are only selected if their postfix is a number.
- labels
Optional labels for the number variable
- regex
Use regular expressions
- dropid
Drop id in long format (default FALSE)
- ...
Optional additional arguments
Examples
m <- lava::lvm(c(y1,y2,y3,y4)~x)
d <- lava::sim(m,5)
d
#> y1 y2 y3 y4 x
#> 1 -0.57557601 -0.3326475 -0.4457970 0.77282675 0.06016044
#> 2 -1.05053922 -0.9088874 0.7541443 -0.66245889 -0.58889449
#> 3 1.96377843 0.2523829 0.3169168 0.49386202 0.53149619
#> 4 -2.16909044 -1.0242058 -1.6979506 -2.20005456 -1.51839408
#> 5 0.09917712 0.1292274 0.2063671 -0.01771241 0.30655786
fast.reshape(d,"y")
#> x y id num
#> 1 0.06016044 -0.57557601 1 1
#> 2 0.06016044 -0.33264749 1 2
#> 3 0.06016044 -0.44579702 1 3
#> 4 0.06016044 0.77282675 1 4
#> 5 -0.58889449 -1.05053922 2 1
#> 6 -0.58889449 -0.90888735 2 2
#> 7 -0.58889449 0.75414434 2 3
#> 8 -0.58889449 -0.66245889 2 4
#> 9 0.53149619 1.96377843 3 1
#> 10 0.53149619 0.25238289 3 2
#> 11 0.53149619 0.31691678 3 3
#> 12 0.53149619 0.49386202 3 4
#> 13 -1.51839408 -2.16909044 4 1
#> 14 -1.51839408 -1.02420575 4 2
#> 15 -1.51839408 -1.69795061 4 3
#> 16 -1.51839408 -2.20005456 4 4
#> 17 0.30655786 0.09917712 5 1
#> 18 0.30655786 0.12922738 5 2
#> 19 0.30655786 0.20636712 5 3
#> 20 0.30655786 -0.01771241 5 4
fast.reshape(fast.reshape(d,"y"),id="id")
#> x1 y1 id num1 x2 y2 num2 x3
#> 1 0.06016044 -0.57557601 1 1 0.06016044 -0.3326475 2 0.06016044
#> 2 -0.58889449 -1.05053922 2 1 -0.58889449 -0.9088874 2 -0.58889449
#> 3 0.53149619 1.96377843 3 1 0.53149619 0.2523829 2 0.53149619
#> 4 -1.51839408 -2.16909044 4 1 -1.51839408 -1.0242058 2 -1.51839408
#> 5 0.30655786 0.09917712 5 1 0.30655786 0.1292274 2 0.30655786
#> y3 num3 x4 y4 num4
#> 1 -0.4457970 3 0.06016044 0.77282675 4
#> 2 0.7541443 3 -0.58889449 -0.66245889 4
#> 3 0.3169168 3 0.53149619 0.49386202 4
#> 4 -1.6979506 3 -1.51839408 -2.20005456 4
#> 5 0.2063671 3 0.30655786 -0.01771241 4
##### From wide-format
(dd <- fast.reshape(d,"y"))
#> x y id num
#> 1 0.06016044 -0.57557601 1 1
#> 2 0.06016044 -0.33264749 1 2
#> 3 0.06016044 -0.44579702 1 3
#> 4 0.06016044 0.77282675 1 4
#> 5 -0.58889449 -1.05053922 2 1
#> 6 -0.58889449 -0.90888735 2 2
#> 7 -0.58889449 0.75414434 2 3
#> 8 -0.58889449 -0.66245889 2 4
#> 9 0.53149619 1.96377843 3 1
#> 10 0.53149619 0.25238289 3 2
#> 11 0.53149619 0.31691678 3 3
#> 12 0.53149619 0.49386202 3 4
#> 13 -1.51839408 -2.16909044 4 1
#> 14 -1.51839408 -1.02420575 4 2
#> 15 -1.51839408 -1.69795061 4 3
#> 16 -1.51839408 -2.20005456 4 4
#> 17 0.30655786 0.09917712 5 1
#> 18 0.30655786 0.12922738 5 2
#> 19 0.30655786 0.20636712 5 3
#> 20 0.30655786 -0.01771241 5 4
## Same with explicit setting new id and number variable/column names
## and seperator "" (default) and dropping x
fast.reshape(d,"y",idname="a",timevar="b",sep="",keep=c())
#> y a num
#> 1 -0.57557601 1 1
#> 2 -0.33264749 1 2
#> 3 -0.44579702 1 3
#> 4 0.77282675 1 4
#> 5 -1.05053922 2 1
#> 6 -0.90888735 2 2
#> 7 0.75414434 2 3
#> 8 -0.66245889 2 4
#> 9 1.96377843 3 1
#> 10 0.25238289 3 2
#> 11 0.31691678 3 3
#> 12 0.49386202 3 4
#> 13 -2.16909044 4 1
#> 14 -1.02420575 4 2
#> 15 -1.69795061 4 3
#> 16 -2.20005456 4 4
#> 17 0.09917712 5 1
#> 18 0.12922738 5 2
#> 19 0.20636712 5 3
#> 20 -0.01771241 5 4
## Same with 'reshape' list-syntax
fast.reshape(d,list(c("y1","y2","y3","y4")),labelnum=TRUE)
#> x y1 id num
#> 1 0.06016044 -0.57557601 1 1
#> 2 0.06016044 -0.33264749 1 2
#> 3 0.06016044 -0.44579702 1 3
#> 4 0.06016044 0.77282675 1 4
#> 5 -0.58889449 -1.05053922 2 1
#> 6 -0.58889449 -0.90888735 2 2
#> 7 -0.58889449 0.75414434 2 3
#> 8 -0.58889449 -0.66245889 2 4
#> 9 0.53149619 1.96377843 3 1
#> 10 0.53149619 0.25238289 3 2
#> 11 0.53149619 0.31691678 3 3
#> 12 0.53149619 0.49386202 3 4
#> 13 -1.51839408 -2.16909044 4 1
#> 14 -1.51839408 -1.02420575 4 2
#> 15 -1.51839408 -1.69795061 4 3
#> 16 -1.51839408 -2.20005456 4 4
#> 17 0.30655786 0.09917712 5 1
#> 18 0.30655786 0.12922738 5 2
#> 19 0.30655786 0.20636712 5 3
#> 20 0.30655786 -0.01771241 5 4
##### From long-format
fast.reshape(dd,id="id")
#> x1 y1 id num1 x2 y2 num2 x3
#> 1 0.06016044 -0.57557601 1 1 0.06016044 -0.3326475 2 0.06016044
#> 2 -0.58889449 -1.05053922 2 1 -0.58889449 -0.9088874 2 -0.58889449
#> 3 0.53149619 1.96377843 3 1 0.53149619 0.2523829 2 0.53149619
#> 4 -1.51839408 -2.16909044 4 1 -1.51839408 -1.0242058 2 -1.51839408
#> 5 0.30655786 0.09917712 5 1 0.30655786 0.1292274 2 0.30655786
#> y3 num3 x4 y4 num4
#> 1 -0.4457970 3 0.06016044 0.77282675 4
#> 2 0.7541443 3 -0.58889449 -0.66245889 4
#> 3 0.3169168 3 0.53149619 0.49386202 4
#> 4 -1.6979506 3 -1.51839408 -2.20005456 4
#> 5 0.2063671 3 0.30655786 -0.01771241 4
## Restrict set up within-cluster varying variables
fast.reshape(dd,"y",id="id")
#> x y1 id num y2 y3 y4
#> 1 0.06016044 -0.57557601 1 1 -0.3326475 -0.4457970 0.77282675
#> 2 -0.58889449 -1.05053922 2 1 -0.9088874 0.7541443 -0.66245889
#> 3 0.53149619 1.96377843 3 1 0.2523829 0.3169168 0.49386202
#> 4 -1.51839408 -2.16909044 4 1 -1.0242058 -1.6979506 -2.20005456
#> 5 0.30655786 0.09917712 5 1 0.1292274 0.2063671 -0.01771241
fast.reshape(dd,"y",id="id",keep="x",sep=".")
#> x y.1 id y.2 y.3 y.4
#> 1 0.06016044 -0.57557601 1 -0.3326475 -0.4457970 0.77282675
#> 2 -0.58889449 -1.05053922 2 -0.9088874 0.7541443 -0.66245889
#> 3 0.53149619 1.96377843 3 0.2523829 0.3169168 0.49386202
#> 4 -1.51839408 -2.16909044 4 -1.0242058 -1.6979506 -2.20005456
#> 5 0.30655786 0.09917712 5 0.1292274 0.2063671 -0.01771241
#####
x <- data.frame(id=c(5,5,6,6,7),y=1:5,x=1:5,tv=c(1,2,2,1,2))
x
#> id y x tv
#> 1 5 1 1 1
#> 2 5 2 2 2
#> 3 6 3 3 2
#> 4 6 4 4 1
#> 5 7 5 5 2
(xw <- fast.reshape(x,id="id"))
#> id y1 x1 tv1 y2 x2 tv2
#> 1 5 1 1 1 2 2 2
#> 2 6 3 3 2 4 4 1
#> 3 7 5 5 2 NA NA NA
(xl <- fast.reshape(xw,c("y","x"),idname="id2",keep=c()))
#> y x id2 num
#> 1 1 1 1 1
#> 2 2 2 1 2
#> 3 3 3 2 1
#> 4 4 4 2 2
#> 5 5 5 3 1
#> 6 NA NA 3 2
(xl <- fast.reshape(xw,c("y","x","tv")))
#> id y x tv num
#> 1 5 1 1 1 1
#> 2 5 2 2 2 2
#> 3 6 3 3 2 1
#> 4 6 4 4 1 2
#> 5 7 5 5 2 1
#> 6 7 NA NA NA 2
(xw2 <- fast.reshape(xl,id="id",num="num"))
#> id y1 x1 tv1 y2 x2 tv2
#> 1 5 1 1 1 2 2 2
#> 2 6 3 3 2 4 4 1
#> 3 7 5 5 2 NA NA NA
fast.reshape(xw2,c("y","x"),idname="id")
#> id tv1 tv2 y x num
#> 1 5 1 2 1 1 1
#> 2 5 1 2 2 2 2
#> 3 6 2 1 3 3 1
#> 4 6 2 1 4 4 2
#> 5 7 2 NA 5 5 1
#> 6 7 2 NA NA NA 2
### more generally:
### varying=list(c("ym","yf","yb1","yb2"), c("zm","zf","zb1","zb2"))
### varying=list(c("ym","yf","yb1","yb2")))
##### Family cluster example
d <- mets:::sim_BinFam(3)
d
#> agem agef ageb1 ageb2 xm xf xb1 xb2 ym yf yb1 yb2 id
#> 1 23.12490 27.68406 13.55082 16.98722 1 1 1 1 1 0 1 0 1
#> 2 24.05690 28.48338 14.53548 15.76694 1 1 0 0 1 1 1 1 2
#> 3 29.96077 31.87501 13.12305 15.21414 1 1 1 0 1 1 1 1 3
fast.reshape(d,var="y")
#> agem agef ageb1 ageb2 xm xf xb1 xb2 id y num
#> 1 23.12490 27.68406 13.55082 16.98722 1 1 1 1 1 1 m
#> 2 23.12490 27.68406 13.55082 16.98722 1 1 1 1 1 0 f
#> 3 23.12490 27.68406 13.55082 16.98722 1 1 1 1 1 1 b1
#> 4 23.12490 27.68406 13.55082 16.98722 1 1 1 1 1 0 b2
#> 5 24.05690 28.48338 14.53548 15.76694 1 1 0 0 2 1 m
#> 6 24.05690 28.48338 14.53548 15.76694 1 1 0 0 2 1 f
#> 7 24.05690 28.48338 14.53548 15.76694 1 1 0 0 2 1 b1
#> 8 24.05690 28.48338 14.53548 15.76694 1 1 0 0 2 1 b2
#> 9 29.96077 31.87501 13.12305 15.21414 1 1 1 0 3 1 m
#> 10 29.96077 31.87501 13.12305 15.21414 1 1 1 0 3 1 f
#> 11 29.96077 31.87501 13.12305 15.21414 1 1 1 0 3 1 b1
#> 12 29.96077 31.87501 13.12305 15.21414 1 1 1 0 3 1 b2
fast.reshape(d,varying=list(c("ym","yf","yb1","yb2")))
#> agem agef ageb1 ageb2 xm xf xb1 xb2 id ym num
#> 1 23.12490 27.68406 13.55082 16.98722 1 1 1 1 1 1 ym
#> 2 23.12490 27.68406 13.55082 16.98722 1 1 1 1 1 0 yf
#> 3 23.12490 27.68406 13.55082 16.98722 1 1 1 1 1 1 yb1
#> 4 23.12490 27.68406 13.55082 16.98722 1 1 1 1 1 0 yb2
#> 5 24.05690 28.48338 14.53548 15.76694 1 1 0 0 2 1 ym
#> 6 24.05690 28.48338 14.53548 15.76694 1 1 0 0 2 1 yf
#> 7 24.05690 28.48338 14.53548 15.76694 1 1 0 0 2 1 yb1
#> 8 24.05690 28.48338 14.53548 15.76694 1 1 0 0 2 1 yb2
#> 9 29.96077 31.87501 13.12305 15.21414 1 1 1 0 3 1 ym
#> 10 29.96077 31.87501 13.12305 15.21414 1 1 1 0 3 1 yf
#> 11 29.96077 31.87501 13.12305 15.21414 1 1 1 0 3 1 yb1
#> 12 29.96077 31.87501 13.12305 15.21414 1 1 1 0 3 1 yb2
d <- lava::sim(lava::lvm(~y1+y2+ya),10)
d
#> y1 y2 ya
#> 1 -0.14439960 1.0273924 -0.4302118
#> 2 0.20753834 1.2079084 -0.9261095
#> 3 2.30797840 -1.2313234 -0.1771040
#> 4 0.10580237 0.9838956 0.4020118
#> 5 0.45699881 0.2199248 -0.7317482
#> 6 -0.07715294 -1.4672500 0.8303732
#> 7 -0.33400084 0.5210227 -1.2080828
#> 8 -0.03472603 -0.1587546 -1.0479844
#> 9 0.78763961 1.4645873 1.4411577
#> 10 2.07524501 -0.7660820 -1.0158475
(dd <- fast.reshape(d,"y"))
#> y id num
#> 1 -0.14439960 1 1
#> 2 1.02739244 1 2
#> 3 -0.43021175 1 a
#> 4 0.20753834 2 1
#> 5 1.20790840 2 2
#> 6 -0.92610950 2 a
#> 7 2.30797840 3 1
#> 8 -1.23132342 3 2
#> 9 -0.17710396 3 a
#> 10 0.10580237 4 1
#> 11 0.98389557 4 2
#> 12 0.40201178 4 a
#> 13 0.45699881 5 1
#> 14 0.21992480 5 2
#> 15 -0.73174817 5 a
#> 16 -0.07715294 6 1
#> 17 -1.46725003 6 2
#> 18 0.83037317 6 a
#> 19 -0.33400084 7 1
#> 20 0.52102274 7 2
#> 21 -1.20808279 7 a
#> 22 -0.03472603 8 1
#> 23 -0.15875460 8 2
#> 24 -1.04798441 8 a
#> 25 0.78763961 9 1
#> 26 1.46458731 9 2
#> 27 1.44115771 9 a
#> 28 2.07524501 10 1
#> 29 -0.76608200 10 2
#> 30 -1.01584747 10 a
fast.reshape(d,"y",labelnum=TRUE)
#> ya y id num
#> 1 -0.4302118 -0.14439960 1 1
#> 2 -0.4302118 1.02739244 1 2
#> 3 -0.9261095 0.20753834 2 1
#> 4 -0.9261095 1.20790840 2 2
#> 5 -0.1771040 2.30797840 3 1
#> 6 -0.1771040 -1.23132342 3 2
#> 7 0.4020118 0.10580237 4 1
#> 8 0.4020118 0.98389557 4 2
#> 9 -0.7317482 0.45699881 5 1
#> 10 -0.7317482 0.21992480 5 2
#> 11 0.8303732 -0.07715294 6 1
#> 12 0.8303732 -1.46725003 6 2
#> 13 -1.2080828 -0.33400084 7 1
#> 14 -1.2080828 0.52102274 7 2
#> 15 -1.0479844 -0.03472603 8 1
#> 16 -1.0479844 -0.15875460 8 2
#> 17 1.4411577 0.78763961 9 1
#> 18 1.4411577 1.46458731 9 2
#> 19 -1.0158475 2.07524501 10 1
#> 20 -1.0158475 -0.76608200 10 2
fast.reshape(dd,id="id",num="num")
#> y1 id y2 ya
#> 1 -0.14439960 1 1.0273924 -0.4302118
#> 2 0.20753834 2 1.2079084 -0.9261095
#> 3 2.30797840 3 -1.2313234 -0.1771040
#> 4 0.10580237 4 0.9838956 0.4020118
#> 5 0.45699881 5 0.2199248 -0.7317482
#> 6 -0.07715294 6 -1.4672500 0.8303732
#> 7 -0.33400084 7 0.5210227 -1.2080828
#> 8 -0.03472603 8 -0.1587546 -1.0479844
#> 9 0.78763961 9 1.4645873 1.4411577
#> 10 2.07524501 10 -0.7660820 -1.0158475
fast.reshape(dd,id="id",num="num",labelnum=TRUE)
#> y1 id y2 y3
#> 1 -0.14439960 1 1.0273924 -0.4302118
#> 2 0.20753834 2 1.2079084 -0.9261095
#> 3 2.30797840 3 -1.2313234 -0.1771040
#> 4 0.10580237 4 0.9838956 0.4020118
#> 5 0.45699881 5 0.2199248 -0.7317482
#> 6 -0.07715294 6 -1.4672500 0.8303732
#> 7 -0.33400084 7 0.5210227 -1.2080828
#> 8 -0.03472603 8 -0.1587546 -1.0479844
#> 9 0.78763961 9 1.4645873 1.4411577
#> 10 2.07524501 10 -0.7660820 -1.0158475
fast.reshape(d,c(a="y"),labelnum=TRUE) ## New column name
#> ya a id num
#> 1 -0.4302118 -0.14439960 1 1
#> 2 -0.4302118 1.02739244 1 2
#> 3 -0.9261095 0.20753834 2 1
#> 4 -0.9261095 1.20790840 2 2
#> 5 -0.1771040 2.30797840 3 1
#> 6 -0.1771040 -1.23132342 3 2
#> 7 0.4020118 0.10580237 4 1
#> 8 0.4020118 0.98389557 4 2
#> 9 -0.7317482 0.45699881 5 1
#> 10 -0.7317482 0.21992480 5 2
#> 11 0.8303732 -0.07715294 6 1
#> 12 0.8303732 -1.46725003 6 2
#> 13 -1.2080828 -0.33400084 7 1
#> 14 -1.2080828 0.52102274 7 2
#> 15 -1.0479844 -0.03472603 8 1
#> 16 -1.0479844 -0.15875460 8 2
#> 17 1.4411577 0.78763961 9 1
#> 18 1.4411577 1.46458731 9 2
#> 19 -1.0158475 2.07524501 10 1
#> 20 -1.0158475 -0.76608200 10 2
##### Unbalanced data
m <- lava::lvm(c(y1,y2,y3,y4)~ x+z1+z3+z5)
d <- lava::sim(m,3)
d
#> y1 y2 y3 y4 x z1 z3
#> 1 -1.476059 -0.199160129 -4.1732690 -1.346706 -0.1643758 -1.3702079 -0.3087406
#> 2 -1.959051 0.008613182 0.9196863 -1.591375 0.4206946 0.9878383 -1.2532898
#> 3 2.173273 1.432963646 2.4309376 2.273980 -0.4002467 1.5197450 0.6422413
#> z5
#> 1 -0.04470914
#> 2 -1.73321841
#> 3 0.00213186
fast.reshape(d,c("y","z"))
#> x y z id num
#> 1 -0.1643758 -1.476058703 -1.37020788 1 1
#> 2 -0.1643758 -0.199160129 NA 1 2
#> 3 -0.1643758 -4.173268951 -0.30874057 1 3
#> 4 -0.1643758 -1.346706079 NA 1 4
#> 5 -0.1643758 NA -0.04470914 1 5
#> 6 0.4206946 -1.959051303 0.98783827 2 1
#> 7 0.4206946 0.008613182 NA 2 2
#> 8 0.4206946 0.919686338 -1.25328976 2 3
#> 9 0.4206946 -1.591374775 NA 2 4
#> 10 0.4206946 NA -1.73321841 2 5
#> 11 -0.4002467 2.173273287 1.51974503 3 1
#> 12 -0.4002467 1.432963646 NA 3 2
#> 13 -0.4002467 2.430937614 0.64224131 3 3
#> 14 -0.4002467 2.273979870 NA 3 4
#> 15 -0.4002467 NA 0.00213186 3 5
##### not-varying syntax:
fast.reshape(d,-c("x"))
#> x y z id num
#> 1 -0.1643758 -1.476058703 -1.37020788 1 1
#> 2 -0.1643758 -0.199160129 NA 1 2
#> 3 -0.1643758 -4.173268951 -0.30874057 1 3
#> 4 -0.1643758 -1.346706079 NA 1 4
#> 5 -0.1643758 NA -0.04470914 1 5
#> 6 0.4206946 -1.959051303 0.98783827 2 1
#> 7 0.4206946 0.008613182 NA 2 2
#> 8 0.4206946 0.919686338 -1.25328976 2 3
#> 9 0.4206946 -1.591374775 NA 2 4
#> 10 0.4206946 NA -1.73321841 2 5
#> 11 -0.4002467 2.173273287 1.51974503 3 1
#> 12 -0.4002467 1.432963646 NA 3 2
#> 13 -0.4002467 2.430937614 0.64224131 3 3
#> 14 -0.4002467 2.273979870 NA 3 4
#> 15 -0.4002467 NA 0.00213186 3 5
##### Automatically define varying variables from trailing digits
fast.reshape(d)
#> x y z id num
#> 1 -0.1643758 -1.476058703 -1.37020788 1 1
#> 2 -0.1643758 -0.199160129 NA 1 2
#> 3 -0.1643758 -4.173268951 -0.30874057 1 3
#> 4 -0.1643758 -1.346706079 NA 1 4
#> 5 -0.1643758 NA -0.04470914 1 5
#> 6 0.4206946 -1.959051303 0.98783827 2 1
#> 7 0.4206946 0.008613182 NA 2 2
#> 8 0.4206946 0.919686338 -1.25328976 2 3
#> 9 0.4206946 -1.591374775 NA 2 4
#> 10 0.4206946 NA -1.73321841 2 5
#> 11 -0.4002467 2.173273287 1.51974503 3 1
#> 12 -0.4002467 1.432963646 NA 3 2
#> 13 -0.4002467 2.430937614 0.64224131 3 3
#> 14 -0.4002467 2.273979870 NA 3 4
#> 15 -0.4002467 NA 0.00213186 3 5
##### Prostate cancer example
data(prt)
head(prtw <- fast.reshape(prt,"cancer",id="id"))
#> country time status zyg id cancer1 cancer2
#> 31 Denmark 96.98833 1 DZ 1 0 0
#> 39 Denmark 68.04498 1 DZ 3 0 0
#> 51 Denmark 78.78068 1 DZ 5 0 0
#> 70 Denmark 100.95488 1 MZ 9 0 0
#> 83 Denmark 104.55035 1 DZ 12 0 1
#> 95 Denmark 95.65324 1 DZ 15 0 0
ftable(cancer1~cancer2,data=prtw)
#> cancer1 0 1
#> cancer2
#> 0 13405 349
#> 1 362 106
rm(prtw)
