"How are our newer customers doing compared to older ones?" The classic cohort retention chart.
orders["order_month"] = orders["order_date"].dt.to_period("M")
first_order_month = (orders.groupby("customer_id")["order_month"].min()
.rename("cohort"))
orders = orders.merge(first_order_month, on="customer_id")
orders["months_since_join"] = (orders["order_month"] - orders["cohort"]).apply(lambda d: d.n)
cohort_counts = (orders.groupby(["cohort", "months_since_join"])["customer_id"]
.nunique()
.unstack(fill_value=0))
cohort_sizes = cohort_counts.iloc[:, 0]
retention = cohort_counts.divide(cohort_sizes, axis=0).round(3) * 100
import seaborn as sns
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(12, 6))
sns.heatmap(retention.iloc[:, :12], # first 12 months
annot=True, fmt=".0f",
cmap="YlGn", cbar_kws={"label": "Retention %"},
ax=ax)
ax.set_title("Cohort Retention — % of cohort active in each month after signup")
ax.set_xlabel("Months after signup")
ax.set_ylabel("Cohort (signup month)")
plt.tight_layout()
plt.savefig("cohort_retention.png", dpi=200)
.divide(..., axis=0) turns counts into percentages.Change the analysis: instead of retention %, show average revenue per active customer by cohort × months-since-signup.