[Solved] Pandas – Create dynamic column(s) from a single column’s values

Question

You could run one apply() which will use for-loop to convert list to Series with headers – it can use enumerate to put correct number in headers.

Because some rows have less participants so it puts NaN which you can later fill with empty strings.

And next you can use join() to add all as new columns. Because headers are create in apply() so you don’t have to create them in join()

import pandas as pd

data = {'participants': 
[
    [{'roles': [{'type': 'director'}, {'type': 'founder'}, {'type': 'owner'}, {'type': 'real_owner'}], 'life': {'name': 'Lichun Du'}}],
    [{'roles': [{'type': 'board'}], 'life': {'name': 'Erik Mølgaard'}}, {'roles': [{'type': 'director'}, {'type': 'board'}, {'type': 'real_owner'}], 'life': {'name': 'Mikael Bodholdt Linde'}}, {'roles': [{'type': 'board'}, {'type': 'real_owner'}], 'life': {'name': 'Dorte Bøcker Linde'}}],
    [{'roles': [{'type': 'director'}, {'type': 'real_owner'}], 'life': {'name': 'Kristian Løth Hougaard'}}, {'roles': [{'type': 'owner'}], 'life': {'name': 'WORLD JET HOLDING ApS'}}],
]
}

df = pd.DataFrame(data)

def get_names(cell):
    
    all_names = pd.Series(dtype=object)
    
    for number, item in enumerate(cell, 1):
        name = item['life']['name']
        all_names[f'Participant #{number} Name'] = name

    return all_names

def get_roles(cell):
    
    all_roles = pd.Series(dtype=object)
    
    for number, item in enumerate(cell, 1):
        roles = [role['type'] for role in item['roles']]
        all_roles[f'Participant #{number} Role'] = ",".join(roles)

    return all_roles

roles = df['participants'].apply(get_roles)
roles = roles.fillna('')  # put empty string in place of NaN

names = df['participants'].apply(get_names)
names = names.fillna('')  # put empty string in place of NaN

df = df.join(roles)
df = df.join(names)

df = df.drop(columns=['participants'])  # remove old column

pd.options.display.max_colwidth = 100
print(df.to_string())

Result:

                 Participant #1 Role        Participant #2 Role Participant #3 Role     Participant #1 Name    Participant #2 Name Participant #3 Name
0  director,founder,owner,real_owner                                                              Lichun Du                                           
1                              board  director,board,real_owner    board,real_owner           Erik Mølgaard  Mikael Bodholdt Linde  Dorte Bøcker Linde
2                director,real_owner                      owner                      Kristian Løth Hougaard  WORLD JET HOLDING ApS

I used two function to get first only columns with roles and next columns only with names – but if you would need role1, name1, role2, name2, role3, name3 then it could be done with one function.

import pandas as pd

data = {'participants': 
[
    [{'roles': [{'type': 'director'}, {'type': 'founder'}, {'type': 'owner'}, {'type': 'real_owner'}], 'life': {'name': 'Lichun Du'}}],
    [{'roles': [{'type': 'board'}], 'life': {'name': 'Erik Mølgaard'}}, {'roles': [{'type': 'director'}, {'type': 'board'}, {'type': 'real_owner'}], 'life': {'name': 'Mikael Bodholdt Linde'}}, {'roles': [{'type': 'board'}, {'type': 'real_owner'}], 'life': {'name': 'Dorte Bøcker Linde'}}],
    [{'roles': [{'type': 'director'}, {'type': 'real_owner'}], 'life': {'name': 'Kristian Løth Hougaard'}}, {'roles': [{'type': 'owner'}], 'life': {'name': 'WORLD JET HOLDING ApS'}}],
]
}

df = pd.DataFrame(data)

def get_columns(cell):
    
    results = pd.Series(dtype=object)
    
    for number, item in enumerate(cell, 1):
        name = item['life']['name']
        results[f'Participant #{number} Name'] = name

        roles = [role['type'] for role in item['roles']]
        results[f'Participant #{number} Role'] = ",".join(roles)

    return results

columns = df['participants'].apply(get_columns)
names = columns.fillna('')  # put empty string in place of NaN

df = df.join(columns)
#print(df.columns)

df = df.drop(columns=['participants'])

pd.options.display.max_colwidth = 100
print(df.to_string())

Result:

      Participant #1 Name                Participant #1 Role    Participant #2 Name        Participant #2 Role Participant #3 Name Participant #3 Role
0               Lichun Du  director,founder,owner,real_owner                    NaN                        NaN                 NaN                 NaN
1           Erik Mølgaard                              board  Mikael Bodholdt Linde  director,board,real_owner  Dorte Bøcker Linde    board,real_owner
2  Kristian Løth Hougaard                director,real_owner  WORLD JET HOLDING ApS                      owner                 NaN                 NaN

Accepted Answer

You could run one apply() which will use for-loop to convert list to Series with headers – it can use enumerate to put correct number in headers.

Because some rows have less participants so it puts NaN which you can later fill with empty strings.

And next you can use join() to add all as new columns. Because headers are create in apply() so you don’t have to create them in join()

import pandas as pd

data = {'participants': 
[
    [{'roles': [{'type': 'director'}, {'type': 'founder'}, {'type': 'owner'}, {'type': 'real_owner'}], 'life': {'name': 'Lichun Du'}}],
    [{'roles': [{'type': 'board'}], 'life': {'name': 'Erik Mølgaard'}}, {'roles': [{'type': 'director'}, {'type': 'board'}, {'type': 'real_owner'}], 'life': {'name': 'Mikael Bodholdt Linde'}}, {'roles': [{'type': 'board'}, {'type': 'real_owner'}], 'life': {'name': 'Dorte Bøcker Linde'}}],
    [{'roles': [{'type': 'director'}, {'type': 'real_owner'}], 'life': {'name': 'Kristian Løth Hougaard'}}, {'roles': [{'type': 'owner'}], 'life': {'name': 'WORLD JET HOLDING ApS'}}],
]
}

df = pd.DataFrame(data)

def get_names(cell):
    
    all_names = pd.Series(dtype=object)
    
    for number, item in enumerate(cell, 1):
        name = item['life']['name']
        all_names[f'Participant #{number} Name'] = name

    return all_names

def get_roles(cell):
    
    all_roles = pd.Series(dtype=object)
    
    for number, item in enumerate(cell, 1):
        roles = [role['type'] for role in item['roles']]
        all_roles[f'Participant #{number} Role'] = ",".join(roles)

    return all_roles

roles = df['participants'].apply(get_roles)
roles = roles.fillna('')  # put empty string in place of NaN

names = df['participants'].apply(get_names)
names = names.fillna('')  # put empty string in place of NaN

df = df.join(roles)
df = df.join(names)

df = df.drop(columns=['participants'])  # remove old column

pd.options.display.max_colwidth = 100
print(df.to_string())

Result:

                 Participant #1 Role        Participant #2 Role Participant #3 Role     Participant #1 Name    Participant #2 Name Participant #3 Name
0  director,founder,owner,real_owner                                                              Lichun Du                                           
1                              board  director,board,real_owner    board,real_owner           Erik Mølgaard  Mikael Bodholdt Linde  Dorte Bøcker Linde
2                director,real_owner                      owner                      Kristian Løth Hougaard  WORLD JET HOLDING ApS

I used two function to get first only columns with roles and next columns only with names – but if you would need role1, name1, role2, name2, role3, name3 then it could be done with one function.

import pandas as pd

data = {'participants': 
[
    [{'roles': [{'type': 'director'}, {'type': 'founder'}, {'type': 'owner'}, {'type': 'real_owner'}], 'life': {'name': 'Lichun Du'}}],
    [{'roles': [{'type': 'board'}], 'life': {'name': 'Erik Mølgaard'}}, {'roles': [{'type': 'director'}, {'type': 'board'}, {'type': 'real_owner'}], 'life': {'name': 'Mikael Bodholdt Linde'}}, {'roles': [{'type': 'board'}, {'type': 'real_owner'}], 'life': {'name': 'Dorte Bøcker Linde'}}],
    [{'roles': [{'type': 'director'}, {'type': 'real_owner'}], 'life': {'name': 'Kristian Løth Hougaard'}}, {'roles': [{'type': 'owner'}], 'life': {'name': 'WORLD JET HOLDING ApS'}}],
]
}

df = pd.DataFrame(data)

def get_columns(cell):
    
    results = pd.Series(dtype=object)
    
    for number, item in enumerate(cell, 1):
        name = item['life']['name']
        results[f'Participant #{number} Name'] = name

        roles = [role['type'] for role in item['roles']]
        results[f'Participant #{number} Role'] = ",".join(roles)

    return results

columns = df['participants'].apply(get_columns)
names = columns.fillna('')  # put empty string in place of NaN

df = df.join(columns)
#print(df.columns)

df = df.drop(columns=['participants'])

pd.options.display.max_colwidth = 100
print(df.to_string())

Result:

      Participant #1 Name                Participant #1 Role    Participant #2 Name        Participant #2 Role Participant #3 Name Participant #3 Role
0               Lichun Du  director,founder,owner,real_owner                    NaN                        NaN                 NaN                 NaN
1           Erik Mølgaard                              board  Mikael Bodholdt Linde  director,board,real_owner  Dorte Bøcker Linde    board,real_owner
2  Kristian Løth Hougaard                director,real_owner  WORLD JET HOLDING ApS                      owner                 NaN                 NaN