import pandas as pd
import numpy as np
# Create the DataFrame
d = {
"col1": ["A", "A", "A", "B", "B", "B"],
"col2": ["start_amount", "mid_amount", "end_amount", "start_amount", "mid_amount", "end_amount"],
"amount": [0, 2, 8, 1, 2, 3]
}
df = pd.DataFrame(d)
# Calculate the end_amount for each group
end_amounts = df[df['col2'] == 'end_amount'].set_index('col1')['amount']
'''
col1
A 8
B 3
Name: amount, dtype: int64
'''
# Map end_amounts to the original DataFrame
df['end_amounts'] = df['col1'].map(end_amounts)
# Calculate shares
df['share'] = df['amount'] / df['end_amounts']
print(df)
'''
col1 col2 amount end_amounts share
0 A start_amount 0 8 0.000000
1 A mid_amount 2 8 0.250000
2 A end_amount 8 8 1.000000
3 B start_amount 1 3 0.333333
4 B mid_amount 2 3 0.666667
5 B end_amount 3 3 1.000000
'''
Method 2 : Numpy solution
import pandas as pd
import numpy as np
d = {
"col1": ["A", "A", "A", "B", "B", "B"],
"col2": ["start_amount", "mid_amount", "end_amount", "start_amount", "mid_amount", "end_amount"],
"amount": [0, 2, 8, 1, 2, 3]
}
df = pd.DataFrame(d)
# Convert to NumPy arrays for efficiency
col1 = df['col1'].values
col2 = df['col2'].values
amount = df['amount'].values
# Find unique groups and their end_amounts
unique_groups, end_amounts_idx = np.unique(col1, return_index=True)
end_amounts_dict = {
ug: amount[(col1 == ug) & (col2 == "end_amount")][0]
for ug in unique_groups
}
print(end_amounts_dict)#{'A': 8, 'B': 3}
# Convert the dictionary to a NumPy array for efficient indexing
end_amounts = np.array([end_amounts_dict[element] for element in col1])
print(end_amounts)#[8 8 8 3 3 3]
# Calculate shares using vectorized operations
shares = amount / end_amounts
# Add the computed shares back to the DataFrame
df['share'] = shares
print(df)
'''
col1 col2 amount share
0 A start_amount 0 0.000000
1 A mid_amount 2 0.250000
2 A end_amount 8 1.000000
3 B start_amount 1 0.333333
4 B mid_amount 2 0.666667
5 B end_amount 3 1.000000
'''