Six subsidiaries send a budget workbook every quarter. Each is slightly different — different column orders, different account naming, sometimes an extra sheet. Priya needs one consolidated rollup.
from pathlib import Path
import pandas as pd
files = list(Path("subs_q2").glob("*.xlsx"))
for p in files:
print(p.name)
df = pd.read_excel(p, sheet_name="Budget")
print(" cols:", list(df.columns))
print(" rows:", len(df))
COL_MAP = {
# canonical_name : list of possible source names
"subsidiary": ["Sub", "Subsidiary", "Entity", "Company"],
"account": ["Account", "GL Account", "Ledger Acct", "AcctName"],
"month": ["Month", "Period", "MonthEnd"],
"amount": ["Amount", "Budget", "USD", "BudgetUSD"],
}
def rename(df):
out = {}
for canon, options in COL_MAP.items():
for opt in options:
if opt in df.columns:
out[opt] = canon
break
return df.rename(columns=out)
def normalise(df, sub_default):
df = rename(df)
if "subsidiary" not in df.columns:
df["subsidiary"] = sub_default
df["month"] = pd.to_datetime(df["month"]).dt.to_period("M").astype(str)
df["amount"] = pd.to_numeric(df["amount"], errors="coerce")
return df[["subsidiary", "account", "month", "amount"]]
frames = []
for p in files:
raw = pd.read_excel(p, sheet_name="Budget")
frames.append(normalise(raw, sub_default=p.stem))
combined = pd.concat(frames, ignore_index=True)
combined = combined.dropna(subset=["account", "amount"])
by_sub_account = (combined.groupby(["subsidiary", "account"])["amount"]
.sum()
.reset_index())
monthly_by_sub = combined.pivot_table(
index="subsidiary", columns="month",
values="amount", aggfunc="sum", fill_value=0,
margins=True, margins_name="Total")
with pd.ExcelWriter("consolidated_q2.xlsx") as w:
combined.to_excel(w, sheet_name="Raw", index=False)
by_sub_account.to_excel(w, sheet_name="By Sub × Account", index=False)
monthly_by_sub.to_excel(w, sheet_name="By Sub × Month")
Add a seventh subsidiary's file with brand-new column names. Update COL_MAP. Re-run. Confirm nothing else needed to change.