You could run one apply()
which will use for
-loop to convert list to Series
with headers – it can use enumerate
to put correct number in headers.
Because some rows have less participants so it puts NaN
which you can later fill with empty strings.
And next you can use join()
to add all as new columns. Because headers are create in apply()
so you don’t have to create them in join()
import pandas as pd
data = {'participants':
[
[{'roles': [{'type': 'director'}, {'type': 'founder'}, {'type': 'owner'}, {'type': 'real_owner'}], 'life': {'name': 'Lichun Du'}}],
[{'roles': [{'type': 'board'}], 'life': {'name': 'Erik Mølgaard'}}, {'roles': [{'type': 'director'}, {'type': 'board'}, {'type': 'real_owner'}], 'life': {'name': 'Mikael Bodholdt Linde'}}, {'roles': [{'type': 'board'}, {'type': 'real_owner'}], 'life': {'name': 'Dorte Bøcker Linde'}}],
[{'roles': [{'type': 'director'}, {'type': 'real_owner'}], 'life': {'name': 'Kristian Løth Hougaard'}}, {'roles': [{'type': 'owner'}], 'life': {'name': 'WORLD JET HOLDING ApS'}}],
]
}
df = pd.DataFrame(data)
def get_names(cell):
all_names = pd.Series(dtype=object)
for number, item in enumerate(cell, 1):
name = item['life']['name']
all_names[f'Participant #{number} Name'] = name
return all_names
def get_roles(cell):
all_roles = pd.Series(dtype=object)
for number, item in enumerate(cell, 1):
roles = [role['type'] for role in item['roles']]
all_roles[f'Participant #{number} Role'] = ",".join(roles)
return all_roles
roles = df['participants'].apply(get_roles)
roles = roles.fillna('') # put empty string in place of NaN
names = df['participants'].apply(get_names)
names = names.fillna('') # put empty string in place of NaN
df = df.join(roles)
df = df.join(names)
df = df.drop(columns=['participants']) # remove old column
pd.options.display.max_colwidth = 100
print(df.to_string())
Result:
Participant #1 Role Participant #2 Role Participant #3 Role Participant #1 Name Participant #2 Name Participant #3 Name
0 director,founder,owner,real_owner Lichun Du
1 board director,board,real_owner board,real_owner Erik Mølgaard Mikael Bodholdt Linde Dorte Bøcker Linde
2 director,real_owner owner Kristian Løth Hougaard WORLD JET HOLDING ApS
I used two function to get first only columns with roles and next columns only with names – but if you would need role1, name1, role2, name2, role3, name3
then it could be done with one function.
import pandas as pd
data = {'participants':
[
[{'roles': [{'type': 'director'}, {'type': 'founder'}, {'type': 'owner'}, {'type': 'real_owner'}], 'life': {'name': 'Lichun Du'}}],
[{'roles': [{'type': 'board'}], 'life': {'name': 'Erik Mølgaard'}}, {'roles': [{'type': 'director'}, {'type': 'board'}, {'type': 'real_owner'}], 'life': {'name': 'Mikael Bodholdt Linde'}}, {'roles': [{'type': 'board'}, {'type': 'real_owner'}], 'life': {'name': 'Dorte Bøcker Linde'}}],
[{'roles': [{'type': 'director'}, {'type': 'real_owner'}], 'life': {'name': 'Kristian Løth Hougaard'}}, {'roles': [{'type': 'owner'}], 'life': {'name': 'WORLD JET HOLDING ApS'}}],
]
}
df = pd.DataFrame(data)
def get_columns(cell):
results = pd.Series(dtype=object)
for number, item in enumerate(cell, 1):
name = item['life']['name']
results[f'Participant #{number} Name'] = name
roles = [role['type'] for role in item['roles']]
results[f'Participant #{number} Role'] = ",".join(roles)
return results
columns = df['participants'].apply(get_columns)
names = columns.fillna('') # put empty string in place of NaN
df = df.join(columns)
#print(df.columns)
df = df.drop(columns=['participants'])
pd.options.display.max_colwidth = 100
print(df.to_string())
Result:
Participant #1 Name Participant #1 Role Participant #2 Name Participant #2 Role Participant #3 Name Participant #3 Role
0 Lichun Du director,founder,owner,real_owner NaN NaN NaN NaN
1 Erik Mølgaard board Mikael Bodholdt Linde director,board,real_owner Dorte Bøcker Linde board,real_owner
2 Kristian Løth Hougaard director,real_owner WORLD JET HOLDING ApS owner NaN NaN
11
solved Pandas – Create dynamic column(s) from a single column’s values