from pathlib import Path
import pandas as pd
folder = Path("monthly_files")
frames = []
for path in sorted(folder.glob("*.csv")):
df = pd.read_csv(path)
df["source_file"] = path.name # track which file
frames.append(df)
combined = pd.concat(frames, ignore_index=True)
print(f"Combined {len(frames)} files, {len(combined):,} total rows")
This pattern shows up everywhere — monthly reports, regional exports, daily dumps. Add a sort on path.stat().st_mtime if you need chronological order.
If files are called sales_2026-01.csv, you can pull the month out:
for path in folder.glob("sales_*.csv"):
month = path.stem.split("_")[1] # '2026-01'
df = pd.read_csv(path)
df["month"] = month
frames.append(df)
pd.concat() aligns columns by name; missing columns become NaN. If files have wildly different shapes, normalise each one before stacking:
def normalise(df):
df.columns = df.columns.str.strip().str.lower()
keep = ["date", "customer", "amount"]
return df.reindex(columns=keep)
frames = [normalise(pd.read_csv(p)) for p in folder.glob("*.csv")]
combined = pd.concat(frames, ignore_index=True)
from pathlib import Path
import pandas as pd
import re
folder = Path("monthly_xlsx")
frames = []
for path in sorted(folder.glob("*.xlsx")):
m = re.search(r"(\d{4}-\d{2})", path.stem)
if not m: continue
df = pd.read_excel(path)
df["month"] = m.group(1)
frames.append(df)
year_df = pd.concat(frames, ignore_index=True)
year_df.to_excel("yearly_combined.xlsx", index=False)
print(f"Wrote {len(year_df):,} rows.")
folder.glob("*.csv") + pd.concat() = combine many files into one.Pretend you have ten CSVs in a folder. Write the combine-and-tag script. Bonus: print the row count per file before combining.