Fast reshape/tranpose of data

fast.reshape(
  data,
  varying,
  id,
  num,
  sep = "",
  keep,
  idname = "id",
  numname = "num",
  factor = FALSE,
  idcombine = TRUE,
  labelnum = FALSE,
  labels,
  regex = mets.options()$regex,
  dropid = FALSE,
  ...
)

Arguments

data

data.frame or matrix

varying

Vector of prefix-names of the time varying variables. Optional for Long->Wide reshaping.

id

id-variable. If omitted then reshape Wide->Long.

num

Optional number/time variable

sep

String seperating prefix-name with number/time

keep

Vector of column names to keep

idname

Name of id-variable (Wide->Long)

numname

Name of number-variable (Wide->Long)

factor

If true all factors are kept (otherwise treated as character)

idcombine

If TRUE and id is vector of several variables, the unique id is combined from all the variables. Otherwise the first variable is only used as identifier.

labelnum

If TRUE varying variables in wide format (going from long->wide) are labeled 1,2,3,... otherwise use 'num' variable. In long-format (going from wide->long) varying variables matching 'varying' prefix are only selected if their postfix is a number.

labels

Optional labels for the number variable

regex

Use regular expressions

dropid

Drop id in long format (default FALSE)

...

Optional additional arguments

Author

Thomas Scheike, Klaus K. Holst

Examples

library("lava")
m <- lvm(c(y1,y2,y3,y4)~x)
d <- sim(m,5)
d
#>           y1         y2         y3           y4           x
#> 1  0.2831227 -2.1182123 -1.4250463  0.071372698 -0.02381443
#> 2  1.5331086 -1.8251924 -0.1559777  0.989453671  0.68849861
#> 3 -0.8615187  1.6376094 -0.1752069 -0.553247876  0.97964789
#> 4  2.3227427  0.2381912  0.5568031  2.301990157  1.25501206
#> 5 -2.2193572 -0.8322391 -0.2814669  0.007107947 -0.75306422
fast.reshape(d,"y")
#>              x            y id num
#> 1  -0.02381443  0.283122682  1   1
#> 2  -0.02381443 -2.118212338  1   2
#> 3  -0.02381443 -1.425046293  1   3
#> 4  -0.02381443  0.071372698  1   4
#> 5   0.68849861  1.533108608  2   1
#> 6   0.68849861 -1.825192368  2   2
#> 7   0.68849861 -0.155977699  2   3
#> 8   0.68849861  0.989453671  2   4
#> 9   0.97964789 -0.861518733  3   1
#> 10  0.97964789  1.637609422  3   2
#> 11  0.97964789 -0.175206928  3   3
#> 12  0.97964789 -0.553247876  3   4
#> 13  1.25501206  2.322742718  4   1
#> 14  1.25501206  0.238191204  4   2
#> 15  1.25501206  0.556803097  4   3
#> 16  1.25501206  2.301990157  4   4
#> 17 -0.75306422 -2.219357160  5   1
#> 18 -0.75306422 -0.832239130  5   2
#> 19 -0.75306422 -0.281466913  5   3
#> 20 -0.75306422  0.007107947  5   4
fast.reshape(fast.reshape(d,"y"),id="id")
#>            x1         y1 id num1          x2         y2 num2          x3
#> 1 -0.02381443  0.2831227  1    1 -0.02381443 -2.1182123    2 -0.02381443
#> 2  0.68849861  1.5331086  2    1  0.68849861 -1.8251924    2  0.68849861
#> 3  0.97964789 -0.8615187  3    1  0.97964789  1.6376094    2  0.97964789
#> 4  1.25501206  2.3227427  4    1  1.25501206  0.2381912    2  1.25501206
#> 5 -0.75306422 -2.2193572  5    1 -0.75306422 -0.8322391    2 -0.75306422
#>           y3 num3          x4           y4 num4
#> 1 -1.4250463    3 -0.02381443  0.071372698    4
#> 2 -0.1559777    3  0.68849861  0.989453671    4
#> 3 -0.1752069    3  0.97964789 -0.553247876    4
#> 4  0.5568031    3  1.25501206  2.301990157    4
#> 5 -0.2814669    3 -0.75306422  0.007107947    4

##### From wide-format
(dd <- fast.reshape(d,"y"))
#>              x            y id num
#> 1  -0.02381443  0.283122682  1   1
#> 2  -0.02381443 -2.118212338  1   2
#> 3  -0.02381443 -1.425046293  1   3
#> 4  -0.02381443  0.071372698  1   4
#> 5   0.68849861  1.533108608  2   1
#> 6   0.68849861 -1.825192368  2   2
#> 7   0.68849861 -0.155977699  2   3
#> 8   0.68849861  0.989453671  2   4
#> 9   0.97964789 -0.861518733  3   1
#> 10  0.97964789  1.637609422  3   2
#> 11  0.97964789 -0.175206928  3   3
#> 12  0.97964789 -0.553247876  3   4
#> 13  1.25501206  2.322742718  4   1
#> 14  1.25501206  0.238191204  4   2
#> 15  1.25501206  0.556803097  4   3
#> 16  1.25501206  2.301990157  4   4
#> 17 -0.75306422 -2.219357160  5   1
#> 18 -0.75306422 -0.832239130  5   2
#> 19 -0.75306422 -0.281466913  5   3
#> 20 -0.75306422  0.007107947  5   4
## Same with explicit setting new id and number variable/column names
## and seperator "" (default) and dropping x
fast.reshape(d,"y",idname="a",timevar="b",sep="",keep=c())
#>               y a num
#> 1   0.283122682 1   1
#> 2  -2.118212338 1   2
#> 3  -1.425046293 1   3
#> 4   0.071372698 1   4
#> 5   1.533108608 2   1
#> 6  -1.825192368 2   2
#> 7  -0.155977699 2   3
#> 8   0.989453671 2   4
#> 9  -0.861518733 3   1
#> 10  1.637609422 3   2
#> 11 -0.175206928 3   3
#> 12 -0.553247876 3   4
#> 13  2.322742718 4   1
#> 14  0.238191204 4   2
#> 15  0.556803097 4   3
#> 16  2.301990157 4   4
#> 17 -2.219357160 5   1
#> 18 -0.832239130 5   2
#> 19 -0.281466913 5   3
#> 20  0.007107947 5   4
## Same with 'reshape' list-syntax
fast.reshape(d,list(c("y1","y2","y3","y4")),labelnum=TRUE)
#>              x           y1 id num
#> 1  -0.02381443  0.283122682  1   1
#> 2  -0.02381443 -2.118212338  1   2
#> 3  -0.02381443 -1.425046293  1   3
#> 4  -0.02381443  0.071372698  1   4
#> 5   0.68849861  1.533108608  2   1
#> 6   0.68849861 -1.825192368  2   2
#> 7   0.68849861 -0.155977699  2   3
#> 8   0.68849861  0.989453671  2   4
#> 9   0.97964789 -0.861518733  3   1
#> 10  0.97964789  1.637609422  3   2
#> 11  0.97964789 -0.175206928  3   3
#> 12  0.97964789 -0.553247876  3   4
#> 13  1.25501206  2.322742718  4   1
#> 14  1.25501206  0.238191204  4   2
#> 15  1.25501206  0.556803097  4   3
#> 16  1.25501206  2.301990157  4   4
#> 17 -0.75306422 -2.219357160  5   1
#> 18 -0.75306422 -0.832239130  5   2
#> 19 -0.75306422 -0.281466913  5   3
#> 20 -0.75306422  0.007107947  5   4

##### From long-format
fast.reshape(dd,id="id")
#>            x1         y1 id num1          x2         y2 num2          x3
#> 1 -0.02381443  0.2831227  1    1 -0.02381443 -2.1182123    2 -0.02381443
#> 2  0.68849861  1.5331086  2    1  0.68849861 -1.8251924    2  0.68849861
#> 3  0.97964789 -0.8615187  3    1  0.97964789  1.6376094    2  0.97964789
#> 4  1.25501206  2.3227427  4    1  1.25501206  0.2381912    2  1.25501206
#> 5 -0.75306422 -2.2193572  5    1 -0.75306422 -0.8322391    2 -0.75306422
#>           y3 num3          x4           y4 num4
#> 1 -1.4250463    3 -0.02381443  0.071372698    4
#> 2 -0.1559777    3  0.68849861  0.989453671    4
#> 3 -0.1752069    3  0.97964789 -0.553247876    4
#> 4  0.5568031    3  1.25501206  2.301990157    4
#> 5 -0.2814669    3 -0.75306422  0.007107947    4
## Restrict set up within-cluster varying variables
fast.reshape(dd,"y",id="id")
#>             x         y1 id num         y2         y3           y4
#> 1 -0.02381443  0.2831227  1   1 -2.1182123 -1.4250463  0.071372698
#> 2  0.68849861  1.5331086  2   1 -1.8251924 -0.1559777  0.989453671
#> 3  0.97964789 -0.8615187  3   1  1.6376094 -0.1752069 -0.553247876
#> 4  1.25501206  2.3227427  4   1  0.2381912  0.5568031  2.301990157
#> 5 -0.75306422 -2.2193572  5   1 -0.8322391 -0.2814669  0.007107947
fast.reshape(dd,"y",id="id",keep="x",sep=".")
#>             x        y.1 id        y.2        y.3          y.4
#> 1 -0.02381443  0.2831227  1 -2.1182123 -1.4250463  0.071372698
#> 2  0.68849861  1.5331086  2 -1.8251924 -0.1559777  0.989453671
#> 3  0.97964789 -0.8615187  3  1.6376094 -0.1752069 -0.553247876
#> 4  1.25501206  2.3227427  4  0.2381912  0.5568031  2.301990157
#> 5 -0.75306422 -2.2193572  5 -0.8322391 -0.2814669  0.007107947

#####
x <- data.frame(id=c(5,5,6,6,7),y=1:5,x=1:5,tv=c(1,2,2,1,2))
x
#>   id y x tv
#> 1  5 1 1  1
#> 2  5 2 2  2
#> 3  6 3 3  2
#> 4  6 4 4  1
#> 5  7 5 5  2
(xw <- fast.reshape(x,id="id"))
#>   id y1 x1 tv1 y2 x2 tv2
#> 1  5  1  1   1  2  2   2
#> 2  6  3  3   2  4  4   1
#> 3  7  5  5   2 NA NA  NA
(xl <- fast.reshape(xw,c("y","x"),idname="id2",keep=c()))
#>    y  x id2 num
#> 1  1  1   1   1
#> 2  2  2   1   2
#> 3  3  3   2   1
#> 4  4  4   2   2
#> 5  5  5   3   1
#> 6 NA NA   3   2
(xl <- fast.reshape(xw,c("y","x","tv")))
#>   id  y  x tv num
#> 1  5  1  1  1   1
#> 2  5  2  2  2   2
#> 3  6  3  3  2   1
#> 4  6  4  4  1   2
#> 5  7  5  5  2   1
#> 6  7 NA NA NA   2
(xw2 <- fast.reshape(xl,id="id",num="num"))
#>   id y1 x1 tv1 y2 x2 tv2
#> 1  5  1  1   1  2  2   2
#> 2  6  3  3   2  4  4   1
#> 3  7  5  5   2 NA NA  NA
fast.reshape(xw2,c("y","x"),idname="id")
#>   id tv1 tv2  y  x num
#> 1  5   1   2  1  1   1
#> 2  5   1   2  2  2   2
#> 3  6   2   1  3  3   1
#> 4  6   2   1  4  4   2
#> 5  7   2  NA  5  5   1
#> 6  7   2  NA NA NA   2

### more generally:
### varying=list(c("ym","yf","yb1","yb2"), c("zm","zf","zb1","zb2"))
### varying=list(c("ym","yf","yb1","yb2")))

##### Family cluster example
d <- mets:::simBinFam(3)
d
#>       agem     agef     ageb1     ageb2 xm xf xb1 xb2 ym yf yb1 yb2 id
#> 1 28.21417 31.47441  6.105765  7.227351  1  1   0   0  1  1   1   0  1
#> 2 22.45870 29.00487 12.336780 14.339498  1  1   1   1  1  1   1   1  2
#> 3 26.02417 27.86702  5.663357  8.545406  1  1   0   1  1  1   1   1  3
fast.reshape(d,var="y")
#>        agem     agef     ageb1     ageb2 xm xf xb1 xb2 id y num
#> 1  28.21417 31.47441  6.105765  7.227351  1  1   0   0  1 1   m
#> 2  28.21417 31.47441  6.105765  7.227351  1  1   0   0  1 1   f
#> 3  28.21417 31.47441  6.105765  7.227351  1  1   0   0  1 1  b1
#> 4  28.21417 31.47441  6.105765  7.227351  1  1   0   0  1 0  b2
#> 5  22.45870 29.00487 12.336780 14.339498  1  1   1   1  2 1   m
#> 6  22.45870 29.00487 12.336780 14.339498  1  1   1   1  2 1   f
#> 7  22.45870 29.00487 12.336780 14.339498  1  1   1   1  2 1  b1
#> 8  22.45870 29.00487 12.336780 14.339498  1  1   1   1  2 1  b2
#> 9  26.02417 27.86702  5.663357  8.545406  1  1   0   1  3 1   m
#> 10 26.02417 27.86702  5.663357  8.545406  1  1   0   1  3 1   f
#> 11 26.02417 27.86702  5.663357  8.545406  1  1   0   1  3 1  b1
#> 12 26.02417 27.86702  5.663357  8.545406  1  1   0   1  3 1  b2
fast.reshape(d,varying=list(c("ym","yf","yb1","yb2")))
#>        agem     agef     ageb1     ageb2 xm xf xb1 xb2 id ym num
#> 1  28.21417 31.47441  6.105765  7.227351  1  1   0   0  1  1  ym
#> 2  28.21417 31.47441  6.105765  7.227351  1  1   0   0  1  1  yf
#> 3  28.21417 31.47441  6.105765  7.227351  1  1   0   0  1  1 yb1
#> 4  28.21417 31.47441  6.105765  7.227351  1  1   0   0  1  0 yb2
#> 5  22.45870 29.00487 12.336780 14.339498  1  1   1   1  2  1  ym
#> 6  22.45870 29.00487 12.336780 14.339498  1  1   1   1  2  1  yf
#> 7  22.45870 29.00487 12.336780 14.339498  1  1   1   1  2  1 yb1
#> 8  22.45870 29.00487 12.336780 14.339498  1  1   1   1  2  1 yb2
#> 9  26.02417 27.86702  5.663357  8.545406  1  1   0   1  3  1  ym
#> 10 26.02417 27.86702  5.663357  8.545406  1  1   0   1  3  1  yf
#> 11 26.02417 27.86702  5.663357  8.545406  1  1   0   1  3  1 yb1
#> 12 26.02417 27.86702  5.663357  8.545406  1  1   0   1  3  1 yb2

d <- sim(lvm(~y1+y2+ya),10)
d
#>             y1          y2          ya
#> 1  -0.79072920  0.49222745 -1.85163417
#> 2  -0.07033981  0.53315014  0.54690525
#> 3   0.20755278  0.19958345  2.04303427
#> 4   0.67745555 -0.65015253  0.09673758
#> 5  -1.31952201 -0.68257848  1.38630576
#> 6   0.62516266  0.05819279 -0.01838253
#> 7  -1.22042483 -0.17291440  0.69390363
#> 8  -0.28613030  0.40750732  0.94344927
#> 9   0.87842877 -0.09988101  0.94327568
#> 10 -1.10653994  0.51268991  0.01490019
(dd <- fast.reshape(d,"y"))
#>              y id num
#> 1  -0.79072920  1   1
#> 2   0.49222745  1   2
#> 3  -1.85163417  1   a
#> 4  -0.07033981  2   1
#> 5   0.53315014  2   2
#> 6   0.54690525  2   a
#> 7   0.20755278  3   1
#> 8   0.19958345  3   2
#> 9   2.04303427  3   a
#> 10  0.67745555  4   1
#> 11 -0.65015253  4   2
#> 12  0.09673758  4   a
#> 13 -1.31952201  5   1
#> 14 -0.68257848  5   2
#> 15  1.38630576  5   a
#> 16  0.62516266  6   1
#> 17  0.05819279  6   2
#> 18 -0.01838253  6   a
#> 19 -1.22042483  7   1
#> 20 -0.17291440  7   2
#> 21  0.69390363  7   a
#> 22 -0.28613030  8   1
#> 23  0.40750732  8   2
#> 24  0.94344927  8   a
#> 25  0.87842877  9   1
#> 26 -0.09988101  9   2
#> 27  0.94327568  9   a
#> 28 -1.10653994 10   1
#> 29  0.51268991 10   2
#> 30  0.01490019 10   a
fast.reshape(d,"y",labelnum=TRUE)
#>             ya           y id num
#> 1  -1.85163417 -0.79072920  1   1
#> 2  -1.85163417  0.49222745  1   2
#> 3   0.54690525 -0.07033981  2   1
#> 4   0.54690525  0.53315014  2   2
#> 5   2.04303427  0.20755278  3   1
#> 6   2.04303427  0.19958345  3   2
#> 7   0.09673758  0.67745555  4   1
#> 8   0.09673758 -0.65015253  4   2
#> 9   1.38630576 -1.31952201  5   1
#> 10  1.38630576 -0.68257848  5   2
#> 11 -0.01838253  0.62516266  6   1
#> 12 -0.01838253  0.05819279  6   2
#> 13  0.69390363 -1.22042483  7   1
#> 14  0.69390363 -0.17291440  7   2
#> 15  0.94344927 -0.28613030  8   1
#> 16  0.94344927  0.40750732  8   2
#> 17  0.94327568  0.87842877  9   1
#> 18  0.94327568 -0.09988101  9   2
#> 19  0.01490019 -1.10653994 10   1
#> 20  0.01490019  0.51268991 10   2
fast.reshape(dd,id="id",num="num")
#>             y1 id          y2          ya
#> 1  -0.79072920  1  0.49222745 -1.85163417
#> 2  -0.07033981  2  0.53315014  0.54690525
#> 3   0.20755278  3  0.19958345  2.04303427
#> 4   0.67745555  4 -0.65015253  0.09673758
#> 5  -1.31952201  5 -0.68257848  1.38630576
#> 6   0.62516266  6  0.05819279 -0.01838253
#> 7  -1.22042483  7 -0.17291440  0.69390363
#> 8  -0.28613030  8  0.40750732  0.94344927
#> 9   0.87842877  9 -0.09988101  0.94327568
#> 10 -1.10653994 10  0.51268991  0.01490019
fast.reshape(dd,id="id",num="num",labelnum=TRUE)
#>             y1 id          y2          y3
#> 1  -0.79072920  1  0.49222745 -1.85163417
#> 2  -0.07033981  2  0.53315014  0.54690525
#> 3   0.20755278  3  0.19958345  2.04303427
#> 4   0.67745555  4 -0.65015253  0.09673758
#> 5  -1.31952201  5 -0.68257848  1.38630576
#> 6   0.62516266  6  0.05819279 -0.01838253
#> 7  -1.22042483  7 -0.17291440  0.69390363
#> 8  -0.28613030  8  0.40750732  0.94344927
#> 9   0.87842877  9 -0.09988101  0.94327568
#> 10 -1.10653994 10  0.51268991  0.01490019
fast.reshape(d,c(a="y"),labelnum=TRUE) ## New column name
#>             ya           a id num
#> 1  -1.85163417 -0.79072920  1   1
#> 2  -1.85163417  0.49222745  1   2
#> 3   0.54690525 -0.07033981  2   1
#> 4   0.54690525  0.53315014  2   2
#> 5   2.04303427  0.20755278  3   1
#> 6   2.04303427  0.19958345  3   2
#> 7   0.09673758  0.67745555  4   1
#> 8   0.09673758 -0.65015253  4   2
#> 9   1.38630576 -1.31952201  5   1
#> 10  1.38630576 -0.68257848  5   2
#> 11 -0.01838253  0.62516266  6   1
#> 12 -0.01838253  0.05819279  6   2
#> 13  0.69390363 -1.22042483  7   1
#> 14  0.69390363 -0.17291440  7   2
#> 15  0.94344927 -0.28613030  8   1
#> 16  0.94344927  0.40750732  8   2
#> 17  0.94327568  0.87842877  9   1
#> 18  0.94327568 -0.09988101  9   2
#> 19  0.01490019 -1.10653994 10   1
#> 20  0.01490019  0.51268991 10   2


##### Unbalanced data
m <- lvm(c(y1,y2,y3,y4)~ x+z1+z3+z5)
d <- sim(m,3)
d
#>          y1        y2        y3        y4          x         z1         z3
#> 1  3.913257 2.1852597  3.569181  2.865170  1.6641621 -1.5399405  1.3126754
#> 2 -3.185682 0.3776233 -2.776879 -4.252584 -1.4104728  0.1248257 -0.6162465
#> 3  1.328163 2.8094582  1.039085  2.429321  0.6552037  1.3275028 -0.1304471
#>             z5
#> 1  1.548377100
#> 2 -0.397956131
#> 3 -0.001175883
fast.reshape(d,c("y","z"))
#>             x          y            z id num
#> 1   1.6641621  3.9132570 -1.539940528  1   1
#> 2   1.6641621  2.1852597           NA  1   2
#> 3   1.6641621  3.5691812  1.312675427  1   3
#> 4   1.6641621  2.8651701           NA  1   4
#> 5   1.6641621         NA  1.548377100  1   5
#> 6  -1.4104728 -3.1856821  0.124825699  2   1
#> 7  -1.4104728  0.3776233           NA  2   2
#> 8  -1.4104728 -2.7768785 -0.616246543  2   3
#> 9  -1.4104728 -4.2525842           NA  2   4
#> 10 -1.4104728         NA -0.397956131  2   5
#> 11  0.6552037  1.3281625  1.327502837  3   1
#> 12  0.6552037  2.8094582           NA  3   2
#> 13  0.6552037  1.0390851 -0.130447133  3   3
#> 14  0.6552037  2.4293205           NA  3   4
#> 15  0.6552037         NA -0.001175883  3   5

##### not-varying syntax:
fast.reshape(d,-c("x"))
#>             x          y            z id num
#> 1   1.6641621  3.9132570 -1.539940528  1   1
#> 2   1.6641621  2.1852597           NA  1   2
#> 3   1.6641621  3.5691812  1.312675427  1   3
#> 4   1.6641621  2.8651701           NA  1   4
#> 5   1.6641621         NA  1.548377100  1   5
#> 6  -1.4104728 -3.1856821  0.124825699  2   1
#> 7  -1.4104728  0.3776233           NA  2   2
#> 8  -1.4104728 -2.7768785 -0.616246543  2   3
#> 9  -1.4104728 -4.2525842           NA  2   4
#> 10 -1.4104728         NA -0.397956131  2   5
#> 11  0.6552037  1.3281625  1.327502837  3   1
#> 12  0.6552037  2.8094582           NA  3   2
#> 13  0.6552037  1.0390851 -0.130447133  3   3
#> 14  0.6552037  2.4293205           NA  3   4
#> 15  0.6552037         NA -0.001175883  3   5

##### Automatically define varying variables from trailing digits
fast.reshape(d)
#>             x          y            z id num
#> 1   1.6641621  3.9132570 -1.539940528  1   1
#> 2   1.6641621  2.1852597           NA  1   2
#> 3   1.6641621  3.5691812  1.312675427  1   3
#> 4   1.6641621  2.8651701           NA  1   4
#> 5   1.6641621         NA  1.548377100  1   5
#> 6  -1.4104728 -3.1856821  0.124825699  2   1
#> 7  -1.4104728  0.3776233           NA  2   2
#> 8  -1.4104728 -2.7768785 -0.616246543  2   3
#> 9  -1.4104728 -4.2525842           NA  2   4
#> 10 -1.4104728         NA -0.397956131  2   5
#> 11  0.6552037  1.3281625  1.327502837  3   1
#> 12  0.6552037  2.8094582           NA  3   2
#> 13  0.6552037  1.0390851 -0.130447133  3   3
#> 14  0.6552037  2.4293205           NA  3   4
#> 15  0.6552037         NA -0.001175883  3   5

##### Prostate cancer example
data(prt)
head(prtw <- fast.reshape(prt,"cancer",id="id"))
#>    country      time status zyg id cancer1 cancer2
#> 31 Denmark  96.98833      1  DZ  1       0       0
#> 39 Denmark  68.04498      1  DZ  3       0       0
#> 51 Denmark  78.78068      1  DZ  5       0       0
#> 70 Denmark 100.95488      1  MZ  9       0       0
#> 83 Denmark 104.55035      1  DZ 12       0       1
#> 95 Denmark  95.65324      1  DZ 15       0       0
ftable(cancer1~cancer2,data=prtw)
#>         cancer1     0     1
#> cancer2                    
#> 0               13405   349
#> 1                 362   106
rm(prtw)