Fast reshape/tranpose of data
fast.reshape(
data,
varying,
id,
num,
sep = "",
keep,
idname = "id",
numname = "num",
factor = FALSE,
idcombine = TRUE,
labelnum = FALSE,
labels,
regex = mets.options()$regex,
dropid = FALSE,
...
)
data.frame or matrix
Vector of prefix-names of the time varying variables. Optional for Long->Wide reshaping.
id-variable. If omitted then reshape Wide->Long.
Optional number/time variable
String seperating prefix-name with number/time
Vector of column names to keep
Name of id-variable (Wide->Long)
Name of number-variable (Wide->Long)
If true all factors are kept (otherwise treated as character)
If TRUE and id
is vector of several variables, the unique id is combined from all the variables.
Otherwise the first variable is only used as identifier.
If TRUE varying variables in wide format (going from long->wide) are labeled 1,2,3,... otherwise use 'num' variable. In long-format (going from wide->long) varying variables matching 'varying' prefix are only selected if their postfix is a number.
Optional labels for the number variable
Use regular expressions
Drop id in long format (default FALSE)
Optional additional arguments
m <- lava::lvm(c(y1,y2,y3,y4)~x)
d <- lava::sim(m,5)
d
#> y1 y2 y3 y4 x
#> 1 -1.9292578 -2.0424073 -0.8237835 -1.476289383 -1.53644982
#> 2 -0.6209690 1.0420627 -0.3745405 -0.889870613 -0.30097613
#> 3 -0.8073932 -0.7428593 -0.5659141 0.003216288 -0.52827990
#> 4 -0.1579064 -0.8316513 -1.3337553 -2.170488862 -0.65209478
#> 5 -0.2342273 -0.1570875 -0.3811671 0.249661083 -0.05689678
fast.reshape(d,"y")
#> x y id num
#> 1 -1.53644982 -1.929257753 1 1
#> 2 -1.53644982 -2.042407286 1 2
#> 3 -1.53644982 -0.823783516 1 3
#> 4 -1.53644982 -1.476289383 1 4
#> 5 -0.30097613 -0.620968995 2 1
#> 6 -0.30097613 1.042062698 2 2
#> 7 -0.30097613 -0.374540531 2 3
#> 8 -0.30097613 -0.889870613 2 4
#> 9 -0.52827990 -0.807393207 3 1
#> 10 -0.52827990 -0.742859313 3 2
#> 11 -0.52827990 -0.565914076 3 3
#> 12 -0.52827990 0.003216288 3 4
#> 13 -0.65209478 -0.157906449 4 1
#> 14 -0.65209478 -0.831651311 4 2
#> 15 -0.65209478 -1.333755259 4 3
#> 16 -0.65209478 -2.170488862 4 4
#> 17 -0.05689678 -0.234227260 5 1
#> 18 -0.05689678 -0.157087519 5 2
#> 19 -0.05689678 -0.381167050 5 3
#> 20 -0.05689678 0.249661083 5 4
fast.reshape(fast.reshape(d,"y"),id="id")
#> x1 y1 id num1 x2 y2 num2 x3
#> 1 -1.53644982 -1.9292578 1 1 -1.53644982 -2.0424073 2 -1.53644982
#> 2 -0.30097613 -0.6209690 2 1 -0.30097613 1.0420627 2 -0.30097613
#> 3 -0.52827990 -0.8073932 3 1 -0.52827990 -0.7428593 2 -0.52827990
#> 4 -0.65209478 -0.1579064 4 1 -0.65209478 -0.8316513 2 -0.65209478
#> 5 -0.05689678 -0.2342273 5 1 -0.05689678 -0.1570875 2 -0.05689678
#> y3 num3 x4 y4 num4
#> 1 -0.8237835 3 -1.53644982 -1.476289383 4
#> 2 -0.3745405 3 -0.30097613 -0.889870613 4
#> 3 -0.5659141 3 -0.52827990 0.003216288 4
#> 4 -1.3337553 3 -0.65209478 -2.170488862 4
#> 5 -0.3811671 3 -0.05689678 0.249661083 4
##### From wide-format
(dd <- fast.reshape(d,"y"))
#> x y id num
#> 1 -1.53644982 -1.929257753 1 1
#> 2 -1.53644982 -2.042407286 1 2
#> 3 -1.53644982 -0.823783516 1 3
#> 4 -1.53644982 -1.476289383 1 4
#> 5 -0.30097613 -0.620968995 2 1
#> 6 -0.30097613 1.042062698 2 2
#> 7 -0.30097613 -0.374540531 2 3
#> 8 -0.30097613 -0.889870613 2 4
#> 9 -0.52827990 -0.807393207 3 1
#> 10 -0.52827990 -0.742859313 3 2
#> 11 -0.52827990 -0.565914076 3 3
#> 12 -0.52827990 0.003216288 3 4
#> 13 -0.65209478 -0.157906449 4 1
#> 14 -0.65209478 -0.831651311 4 2
#> 15 -0.65209478 -1.333755259 4 3
#> 16 -0.65209478 -2.170488862 4 4
#> 17 -0.05689678 -0.234227260 5 1
#> 18 -0.05689678 -0.157087519 5 2
#> 19 -0.05689678 -0.381167050 5 3
#> 20 -0.05689678 0.249661083 5 4
## Same with explicit setting new id and number variable/column names
## and seperator "" (default) and dropping x
fast.reshape(d,"y",idname="a",timevar="b",sep="",keep=c())
#> y a num
#> 1 -1.929257753 1 1
#> 2 -2.042407286 1 2
#> 3 -0.823783516 1 3
#> 4 -1.476289383 1 4
#> 5 -0.620968995 2 1
#> 6 1.042062698 2 2
#> 7 -0.374540531 2 3
#> 8 -0.889870613 2 4
#> 9 -0.807393207 3 1
#> 10 -0.742859313 3 2
#> 11 -0.565914076 3 3
#> 12 0.003216288 3 4
#> 13 -0.157906449 4 1
#> 14 -0.831651311 4 2
#> 15 -1.333755259 4 3
#> 16 -2.170488862 4 4
#> 17 -0.234227260 5 1
#> 18 -0.157087519 5 2
#> 19 -0.381167050 5 3
#> 20 0.249661083 5 4
## Same with 'reshape' list-syntax
fast.reshape(d,list(c("y1","y2","y3","y4")),labelnum=TRUE)
#> x y1 id num
#> 1 -1.53644982 -1.929257753 1 1
#> 2 -1.53644982 -2.042407286 1 2
#> 3 -1.53644982 -0.823783516 1 3
#> 4 -1.53644982 -1.476289383 1 4
#> 5 -0.30097613 -0.620968995 2 1
#> 6 -0.30097613 1.042062698 2 2
#> 7 -0.30097613 -0.374540531 2 3
#> 8 -0.30097613 -0.889870613 2 4
#> 9 -0.52827990 -0.807393207 3 1
#> 10 -0.52827990 -0.742859313 3 2
#> 11 -0.52827990 -0.565914076 3 3
#> 12 -0.52827990 0.003216288 3 4
#> 13 -0.65209478 -0.157906449 4 1
#> 14 -0.65209478 -0.831651311 4 2
#> 15 -0.65209478 -1.333755259 4 3
#> 16 -0.65209478 -2.170488862 4 4
#> 17 -0.05689678 -0.234227260 5 1
#> 18 -0.05689678 -0.157087519 5 2
#> 19 -0.05689678 -0.381167050 5 3
#> 20 -0.05689678 0.249661083 5 4
##### From long-format
fast.reshape(dd,id="id")
#> x1 y1 id num1 x2 y2 num2 x3
#> 1 -1.53644982 -1.9292578 1 1 -1.53644982 -2.0424073 2 -1.53644982
#> 2 -0.30097613 -0.6209690 2 1 -0.30097613 1.0420627 2 -0.30097613
#> 3 -0.52827990 -0.8073932 3 1 -0.52827990 -0.7428593 2 -0.52827990
#> 4 -0.65209478 -0.1579064 4 1 -0.65209478 -0.8316513 2 -0.65209478
#> 5 -0.05689678 -0.2342273 5 1 -0.05689678 -0.1570875 2 -0.05689678
#> y3 num3 x4 y4 num4
#> 1 -0.8237835 3 -1.53644982 -1.476289383 4
#> 2 -0.3745405 3 -0.30097613 -0.889870613 4
#> 3 -0.5659141 3 -0.52827990 0.003216288 4
#> 4 -1.3337553 3 -0.65209478 -2.170488862 4
#> 5 -0.3811671 3 -0.05689678 0.249661083 4
## Restrict set up within-cluster varying variables
fast.reshape(dd,"y",id="id")
#> x y1 id num y2 y3 y4
#> 1 -1.53644982 -1.9292578 1 1 -2.0424073 -0.8237835 -1.476289383
#> 2 -0.30097613 -0.6209690 2 1 1.0420627 -0.3745405 -0.889870613
#> 3 -0.52827990 -0.8073932 3 1 -0.7428593 -0.5659141 0.003216288
#> 4 -0.65209478 -0.1579064 4 1 -0.8316513 -1.3337553 -2.170488862
#> 5 -0.05689678 -0.2342273 5 1 -0.1570875 -0.3811671 0.249661083
fast.reshape(dd,"y",id="id",keep="x",sep=".")
#> x y.1 id y.2 y.3 y.4
#> 1 -1.53644982 -1.9292578 1 -2.0424073 -0.8237835 -1.476289383
#> 2 -0.30097613 -0.6209690 2 1.0420627 -0.3745405 -0.889870613
#> 3 -0.52827990 -0.8073932 3 -0.7428593 -0.5659141 0.003216288
#> 4 -0.65209478 -0.1579064 4 -0.8316513 -1.3337553 -2.170488862
#> 5 -0.05689678 -0.2342273 5 -0.1570875 -0.3811671 0.249661083
#####
x <- data.frame(id=c(5,5,6,6,7),y=1:5,x=1:5,tv=c(1,2,2,1,2))
x
#> id y x tv
#> 1 5 1 1 1
#> 2 5 2 2 2
#> 3 6 3 3 2
#> 4 6 4 4 1
#> 5 7 5 5 2
(xw <- fast.reshape(x,id="id"))
#> id y1 x1 tv1 y2 x2 tv2
#> 1 5 1 1 1 2 2 2
#> 2 6 3 3 2 4 4 1
#> 3 7 5 5 2 NA NA NA
(xl <- fast.reshape(xw,c("y","x"),idname="id2",keep=c()))
#> y x id2 num
#> 1 1 1 1 1
#> 2 2 2 1 2
#> 3 3 3 2 1
#> 4 4 4 2 2
#> 5 5 5 3 1
#> 6 NA NA 3 2
(xl <- fast.reshape(xw,c("y","x","tv")))
#> id y x tv num
#> 1 5 1 1 1 1
#> 2 5 2 2 2 2
#> 3 6 3 3 2 1
#> 4 6 4 4 1 2
#> 5 7 5 5 2 1
#> 6 7 NA NA NA 2
(xw2 <- fast.reshape(xl,id="id",num="num"))
#> id y1 x1 tv1 y2 x2 tv2
#> 1 5 1 1 1 2 2 2
#> 2 6 3 3 2 4 4 1
#> 3 7 5 5 2 NA NA NA
fast.reshape(xw2,c("y","x"),idname="id")
#> id tv1 tv2 y x num
#> 1 5 1 2 1 1 1
#> 2 5 1 2 2 2 2
#> 3 6 2 1 3 3 1
#> 4 6 2 1 4 4 2
#> 5 7 2 NA 5 5 1
#> 6 7 2 NA NA NA 2
### more generally:
### varying=list(c("ym","yf","yb1","yb2"), c("zm","zf","zb1","zb2"))
### varying=list(c("ym","yf","yb1","yb2")))
##### Family cluster example
d <- mets:::simBinFam(3)
d
#> agem agef ageb1 ageb2 xm xf xb1 xb2 ym yf yb1 yb2 id
#> 1 22.93606 23.91995 5.771517 7.924428 1 1 0 1 1 1 1 1 1
#> 2 23.99351 30.75988 8.636968 11.665460 1 1 1 1 1 0 0 0 2
#> 3 28.12132 29.26337 9.425925 11.233806 1 1 1 1 1 1 1 0 3
fast.reshape(d,var="y")
#> agem agef ageb1 ageb2 xm xf xb1 xb2 id y num
#> 1 22.93606 23.91995 5.771517 7.924428 1 1 0 1 1 1 m
#> 2 22.93606 23.91995 5.771517 7.924428 1 1 0 1 1 1 f
#> 3 22.93606 23.91995 5.771517 7.924428 1 1 0 1 1 1 b1
#> 4 22.93606 23.91995 5.771517 7.924428 1 1 0 1 1 1 b2
#> 5 23.99351 30.75988 8.636968 11.665460 1 1 1 1 2 1 m
#> 6 23.99351 30.75988 8.636968 11.665460 1 1 1 1 2 0 f
#> 7 23.99351 30.75988 8.636968 11.665460 1 1 1 1 2 0 b1
#> 8 23.99351 30.75988 8.636968 11.665460 1 1 1 1 2 0 b2
#> 9 28.12132 29.26337 9.425925 11.233806 1 1 1 1 3 1 m
#> 10 28.12132 29.26337 9.425925 11.233806 1 1 1 1 3 1 f
#> 11 28.12132 29.26337 9.425925 11.233806 1 1 1 1 3 1 b1
#> 12 28.12132 29.26337 9.425925 11.233806 1 1 1 1 3 0 b2
fast.reshape(d,varying=list(c("ym","yf","yb1","yb2")))
#> agem agef ageb1 ageb2 xm xf xb1 xb2 id ym num
#> 1 22.93606 23.91995 5.771517 7.924428 1 1 0 1 1 1 ym
#> 2 22.93606 23.91995 5.771517 7.924428 1 1 0 1 1 1 yf
#> 3 22.93606 23.91995 5.771517 7.924428 1 1 0 1 1 1 yb1
#> 4 22.93606 23.91995 5.771517 7.924428 1 1 0 1 1 1 yb2
#> 5 23.99351 30.75988 8.636968 11.665460 1 1 1 1 2 1 ym
#> 6 23.99351 30.75988 8.636968 11.665460 1 1 1 1 2 0 yf
#> 7 23.99351 30.75988 8.636968 11.665460 1 1 1 1 2 0 yb1
#> 8 23.99351 30.75988 8.636968 11.665460 1 1 1 1 2 0 yb2
#> 9 28.12132 29.26337 9.425925 11.233806 1 1 1 1 3 1 ym
#> 10 28.12132 29.26337 9.425925 11.233806 1 1 1 1 3 1 yf
#> 11 28.12132 29.26337 9.425925 11.233806 1 1 1 1 3 1 yb1
#> 12 28.12132 29.26337 9.425925 11.233806 1 1 1 1 3 0 yb2
d <- lava::sim(lava::lvm(~y1+y2+ya),10)
d
#> y1 y2 ya
#> 1 -0.07715294 -1.4672500 0.8303732
#> 2 -0.33400084 0.5210227 -1.2080828
#> 3 -0.03472603 -0.1587546 -1.0479844
#> 4 0.78763961 1.4645873 1.4411577
#> 5 2.07524501 -0.7660820 -1.0158475
#> 6 1.02739244 -0.4302118 0.4119747
#> 7 1.20790840 -0.9261095 -0.3810761
#> 8 -1.23132342 -0.1771040 0.4094018
#> 9 0.98389557 0.4020118 1.6888733
#> 10 0.21992480 -0.7317482 1.5865884
(dd <- fast.reshape(d,"y"))
#> y id num
#> 1 -0.07715294 1 1
#> 2 -1.46725003 1 2
#> 3 0.83037317 1 a
#> 4 -0.33400084 2 1
#> 5 0.52102274 2 2
#> 6 -1.20808279 2 a
#> 7 -0.03472603 3 1
#> 8 -0.15875460 3 2
#> 9 -1.04798441 3 a
#> 10 0.78763961 4 1
#> 11 1.46458731 4 2
#> 12 1.44115771 4 a
#> 13 2.07524501 5 1
#> 14 -0.76608200 5 2
#> 15 -1.01584747 5 a
#> 16 1.02739244 6 1
#> 17 -0.43021175 6 2
#> 18 0.41197471 6 a
#> 19 1.20790840 7 1
#> 20 -0.92610950 7 2
#> 21 -0.38107605 7 a
#> 22 -1.23132342 8 1
#> 23 -0.17710396 8 2
#> 24 0.40940184 8 a
#> 25 0.98389557 9 1
#> 26 0.40201178 9 2
#> 27 1.68887329 9 a
#> 28 0.21992480 10 1
#> 29 -0.73174817 10 2
#> 30 1.58658843 10 a
fast.reshape(d,"y",labelnum=TRUE)
#> ya y id num
#> 1 0.8303732 -0.07715294 1 1
#> 2 0.8303732 -1.46725003 1 2
#> 3 -1.2080828 -0.33400084 2 1
#> 4 -1.2080828 0.52102274 2 2
#> 5 -1.0479844 -0.03472603 3 1
#> 6 -1.0479844 -0.15875460 3 2
#> 7 1.4411577 0.78763961 4 1
#> 8 1.4411577 1.46458731 4 2
#> 9 -1.0158475 2.07524501 5 1
#> 10 -1.0158475 -0.76608200 5 2
#> 11 0.4119747 1.02739244 6 1
#> 12 0.4119747 -0.43021175 6 2
#> 13 -0.3810761 1.20790840 7 1
#> 14 -0.3810761 -0.92610950 7 2
#> 15 0.4094018 -1.23132342 8 1
#> 16 0.4094018 -0.17710396 8 2
#> 17 1.6888733 0.98389557 9 1
#> 18 1.6888733 0.40201178 9 2
#> 19 1.5865884 0.21992480 10 1
#> 20 1.5865884 -0.73174817 10 2
fast.reshape(dd,id="id",num="num")
#> y1 id y2 ya
#> 1 -0.07715294 1 -1.4672500 0.8303732
#> 2 -0.33400084 2 0.5210227 -1.2080828
#> 3 -0.03472603 3 -0.1587546 -1.0479844
#> 4 0.78763961 4 1.4645873 1.4411577
#> 5 2.07524501 5 -0.7660820 -1.0158475
#> 6 1.02739244 6 -0.4302118 0.4119747
#> 7 1.20790840 7 -0.9261095 -0.3810761
#> 8 -1.23132342 8 -0.1771040 0.4094018
#> 9 0.98389557 9 0.4020118 1.6888733
#> 10 0.21992480 10 -0.7317482 1.5865884
fast.reshape(dd,id="id",num="num",labelnum=TRUE)
#> y1 id y2 y3
#> 1 -0.07715294 1 -1.4672500 0.8303732
#> 2 -0.33400084 2 0.5210227 -1.2080828
#> 3 -0.03472603 3 -0.1587546 -1.0479844
#> 4 0.78763961 4 1.4645873 1.4411577
#> 5 2.07524501 5 -0.7660820 -1.0158475
#> 6 1.02739244 6 -0.4302118 0.4119747
#> 7 1.20790840 7 -0.9261095 -0.3810761
#> 8 -1.23132342 8 -0.1771040 0.4094018
#> 9 0.98389557 9 0.4020118 1.6888733
#> 10 0.21992480 10 -0.7317482 1.5865884
fast.reshape(d,c(a="y"),labelnum=TRUE) ## New column name
#> ya a id num
#> 1 0.8303732 -0.07715294 1 1
#> 2 0.8303732 -1.46725003 1 2
#> 3 -1.2080828 -0.33400084 2 1
#> 4 -1.2080828 0.52102274 2 2
#> 5 -1.0479844 -0.03472603 3 1
#> 6 -1.0479844 -0.15875460 3 2
#> 7 1.4411577 0.78763961 4 1
#> 8 1.4411577 1.46458731 4 2
#> 9 -1.0158475 2.07524501 5 1
#> 10 -1.0158475 -0.76608200 5 2
#> 11 0.4119747 1.02739244 6 1
#> 12 0.4119747 -0.43021175 6 2
#> 13 -0.3810761 1.20790840 7 1
#> 14 -0.3810761 -0.92610950 7 2
#> 15 0.4094018 -1.23132342 8 1
#> 16 0.4094018 -0.17710396 8 2
#> 17 1.6888733 0.98389557 9 1
#> 18 1.6888733 0.40201178 9 2
#> 19 1.5865884 0.21992480 10 1
#> 20 1.5865884 -0.73174817 10 2
##### Unbalanced data
m <- lava::lvm(c(y1,y2,y3,y4)~ x+z1+z3+z5)
d <- lava::sim(m,3)
d
#> y1 y2 y3 y4 x z1 z3
#> 1 0.676638 1.674612 1.517654 0.6072991 1.5197450 0.64224131 0.00213186
#> 2 -1.465844 1.360719 0.655016 -0.5508160 -0.3087406 -0.04470914 -0.63030033
#> 3 -1.160947 -3.672008 -3.237914 -2.6707705 -1.2532898 -1.73321841 -0.34096858
#> z5
#> 1 -1.156572
#> 2 1.803142
#> 3 -0.331132
fast.reshape(d,c("y","z"))
#> x y z id num
#> 1 1.5197450 0.6766380 0.64224131 1 1
#> 2 1.5197450 1.6746120 NA 1 2
#> 3 1.5197450 1.5176543 0.00213186 1 3
#> 4 1.5197450 0.6072991 NA 1 4
#> 5 1.5197450 NA -1.15657236 1 5
#> 6 -0.3087406 -1.4658437 -0.04470914 2 1
#> 7 -0.3087406 1.3607192 NA 2 2
#> 8 -0.3087406 0.6550160 -0.63030033 2 3
#> 9 -0.3087406 -0.5508160 NA 2 4
#> 10 -0.3087406 NA 1.80314191 2 5
#> 11 -1.2532898 -1.1609472 -1.73321841 3 1
#> 12 -1.2532898 -3.6720083 NA 3 2
#> 13 -1.2532898 -3.2379141 -0.34096858 3 3
#> 14 -1.2532898 -2.6707705 NA 3 4
#> 15 -1.2532898 NA -0.33113204 3 5
##### not-varying syntax:
fast.reshape(d,-c("x"))
#> x y z id num
#> 1 1.5197450 0.6766380 0.64224131 1 1
#> 2 1.5197450 1.6746120 NA 1 2
#> 3 1.5197450 1.5176543 0.00213186 1 3
#> 4 1.5197450 0.6072991 NA 1 4
#> 5 1.5197450 NA -1.15657236 1 5
#> 6 -0.3087406 -1.4658437 -0.04470914 2 1
#> 7 -0.3087406 1.3607192 NA 2 2
#> 8 -0.3087406 0.6550160 -0.63030033 2 3
#> 9 -0.3087406 -0.5508160 NA 2 4
#> 10 -0.3087406 NA 1.80314191 2 5
#> 11 -1.2532898 -1.1609472 -1.73321841 3 1
#> 12 -1.2532898 -3.6720083 NA 3 2
#> 13 -1.2532898 -3.2379141 -0.34096858 3 3
#> 14 -1.2532898 -2.6707705 NA 3 4
#> 15 -1.2532898 NA -0.33113204 3 5
##### Automatically define varying variables from trailing digits
fast.reshape(d)
#> x y z id num
#> 1 1.5197450 0.6766380 0.64224131 1 1
#> 2 1.5197450 1.6746120 NA 1 2
#> 3 1.5197450 1.5176543 0.00213186 1 3
#> 4 1.5197450 0.6072991 NA 1 4
#> 5 1.5197450 NA -1.15657236 1 5
#> 6 -0.3087406 -1.4658437 -0.04470914 2 1
#> 7 -0.3087406 1.3607192 NA 2 2
#> 8 -0.3087406 0.6550160 -0.63030033 2 3
#> 9 -0.3087406 -0.5508160 NA 2 4
#> 10 -0.3087406 NA 1.80314191 2 5
#> 11 -1.2532898 -1.1609472 -1.73321841 3 1
#> 12 -1.2532898 -3.6720083 NA 3 2
#> 13 -1.2532898 -3.2379141 -0.34096858 3 3
#> 14 -1.2532898 -2.6707705 NA 3 4
#> 15 -1.2532898 NA -0.33113204 3 5
##### Prostate cancer example
data(prt)
head(prtw <- fast.reshape(prt,"cancer",id="id"))
#> country time status zyg id cancer1 cancer2
#> 31 Denmark 96.98833 1 DZ 1 0 0
#> 39 Denmark 68.04498 1 DZ 3 0 0
#> 51 Denmark 78.78068 1 DZ 5 0 0
#> 70 Denmark 100.95488 1 MZ 9 0 0
#> 83 Denmark 104.55035 1 DZ 12 0 1
#> 95 Denmark 95.65324 1 DZ 15 0 0
ftable(cancer1~cancer2,data=prtw)
#> cancer1 0 1
#> cancer2
#> 0 13405 349
#> 1 362 106
rm(prtw)