Suppliers send Excel invoices into a shared folder. Priya checks each, keys them into the GL. She wants this scripted.
Each invoice has a fixed structure: supplier in B2, invoice no in B3, date in B4, subtotal/tax/total in E10/E11/E12, line items in B6:D8.
import pandas as pd
from openpyxl import load_workbook
from pathlib import Path
valid_suppliers = set(pd.read_excel("master_data.xlsx", sheet_name="Suppliers")["name"])
TAX_RATE = 0.08
rows, errors = [], []
for path in Path("inbox").glob("*.xlsx"):
wb = load_workbook(path, data_only=True)
ws = wb.active
supplier = ws["B2"].value
inv_no = ws["B3"].value
inv_date = ws["B4"].value
subtotal = ws["E10"].value
tax = ws["E11"].value
total = ws["E12"].value
# Validate
if supplier not in valid_suppliers:
errors.append((path.name, f"Unknown supplier: {supplier}")); continue
if subtotal is None or total is None:
errors.append((path.name, "Missing totals")); continue
expected_tax = round(subtotal * TAX_RATE, 2)
if abs(tax - expected_tax) > 0.05:
errors.append((path.name, f"Tax mismatch: {tax} vs expected {expected_tax}"))
continue
if abs(total - (subtotal + tax)) > 0.05:
errors.append((path.name, "Total ≠ subtotal+tax")); continue
rows.append({
"invoice_no": inv_no,
"supplier": supplier,
"date": inv_date,
"subtotal": subtotal,
"tax": tax,
"total": total,
"source": path.name,
})
gl_feed = pd.DataFrame(rows)
gl_feed.to_csv("gl_feed_april.csv", index=False)
if errors:
pd.DataFrame(errors, columns=["file", "error"]).to_csv("invoice_errors.csv", index=False)
print(f"⚠️ {len(errors)} errors — see invoice_errors.csv")
print(f"✅ Posted {len(gl_feed)} clean invoices")
from pathlib import Path
done = Path("processed"); done.mkdir(exist_ok=True)
for r in rows:
Path("inbox") / r["source"]).rename(done / r["source"])
Extend the script: refuse to post an invoice number that's already in the GL feed from previous runs.