orders = pd.read_csv("orders.csv", parse_dates=["order_date"])
# customer_id, order_date, amount
asof = orders["order_date"].max()
rfm = orders.groupby("customer_id").agg(
recency = ("order_date", lambda d: (asof - d.max()).days),
frequency = ("order_date", "count"),
monetary = ("amount", "sum"),
)
# Recency: lower = better, so reverse
rfm["R"] = pd.qcut(rfm["recency"], 5, labels=[5,4,3,2,1]).astype(int)
rfm["F"] = pd.qcut(rfm["frequency"], 5, labels=[1,2,3,4,5], duplicates="drop").astype(int)
rfm["M"] = pd.qcut(rfm["monetary"], 5, labels=[1,2,3,4,5]).astype(int)
rfm["RFM"] = rfm["R"].astype(str) + rfm["F"].astype(str) + rfm["M"].astype(str)
def label(row):
if row["R"] >= 4 and row["F"] >= 4 and row["M"] >= 4: return "Champion"
if row["R"] >= 4 and row["F"] >= 3: return "Loyal"
if row["R"] >= 4 and row["F"] <= 2: return "New"
if row["R"] <= 2 and row["F"] >= 4: return "At Risk"
if row["R"] <= 2 and row["F"] <= 2: return "Lost"
return "Need Attention"
rfm["segment"] = rfm.apply(label, axis=1)
print(rfm["segment"].value_counts())
with pd.ExcelWriter("rfm_targets.xlsx") as w:
for seg in rfm["segment"].unique():
rfm[rfm["segment"]==seg].reset_index().to_excel(w, sheet_name=seg[:31], index=False)
Build a campaign email list of "At Risk" customers (R≤2, F≥4) with their last-order-amount as a personalisation token.