import numpy as np
import pandas as pd
np.random.seed(123)
df = pd.DataFrame({"M2018" : {0 : "p", 1 : "q", 2 : "r"},
"M2018" : {0 : "s", 1 : "t", 2 : "u"},
"N2019" : {0 : 2.5, 1 : 1.2, 2 : .7},
"N2019" : {0 : 3.2, 1 : 1.3, 2 : .1},
"X" : dict(zip(range(3), np.random.randn(3)))
})
df["id"] = df.index
df
pd.wide_to_long(df, ["M", "N"], i="id", j="year")
... # doctest: +NORMALIZE_WHITESPACE
With multiple id columns
df = pd.DataFrame({
'fID': [1, 1, 2, 2, 3, 3],
'birth': [1, 2, 3, 1, 2, 3],
'ht1': [2.8, 2.9, 2.2, 2, 1.8, 1.9],
'ht2': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4]
})
df
y = pd.wide_to_long(df, stubnames='ht', i=['fID', 'birth'], j='age')
y
# doctest: +NORMALIZE_WHITESPACE
Going from long back to wide just takes some creative use of unstack
w = y.unstack()
w.columns = w.columns.map('{0[0]}{0[1]}'.format)
w.reset_index()
Less wieldy column names are also handled:
np.random.seed(0)
df = pd.DataFrame({'P(weekly)-2015': np.random.rand(3),
'P(weekly)-2016': np.random.rand(3),
'Q(weekly)-2015': np.random.rand(3),
'Q(weekly)-2016': np.random.rand(3),
'X' : np.random.randint(3, size=3)})
df['id'] = df.index
df # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
pd.wide_to_long(df, ['P(weekly)', 'Q(weekly)'], i='id',
j='year', sep='-')
# doctest: +NORMALIZE_WHITESPACE
If there are many columns you can use a regex to find our stubnames and pass that list on to
wide_to_long
stubnames = sorted(
set([match[0] for match in df.columns.str.findall(
r'[P-Q]\(.*\)').values if match != [] ])
)
list(stubnames)
All of the above examples have integers as suffixes. It is possible to have non-integers as suffixes.
df = pd.DataFrame({
'fID': [1, 1, 2, 2, 3, 3],
'birth': [1, 2, 3, 1, 2, 3],
'ht_one': [2.8, 2.9, 2.2, 2, 1.8, 1.9],
'ht_two': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4]
})
df
y = pd.wide_to_long(df, stubnames='ht', i=['fID', 'birth'], j='age',
sep='_', suffix='\w+')
y
# doctest: +NORMALIZE_WHITESPACE