events = pd.read_csv("events.csv")
# user_id, event_name, timestamp
steps = ["session_start","product_view","add_to_cart","checkout","purchase"]
step_users = {}
for s in steps:
step_users[s] = events.loc[events["event_name"]==s, "user_id"].nunique()
funnel = pd.DataFrame({"step": steps, "users": [step_users[s] for s in steps]})
funnel["pct_of_top"] = (funnel["users"] / funnel["users"].iloc[0] * 100).round(1)
funnel["pct_of_prev"] = (funnel["users"] / funnel["users"].shift().fillna(funnel["users"].iloc[0]) * 100).round(1)
print(funnel)
seq = events.sort_values(["user_id","timestamp"]).groupby("user_id")["event_name"].agg(list)
def reached(events_list, step_idx):
pos = 0
for ev in events_list:
if pos < len(steps) and ev == steps[pos]:
pos += 1
if pos > step_idx: return True
return False
reached_step = {s: int(seq.apply(lambda L: reached(L, i)).sum()) for i, s in enumerate(steps)}
print(reached_step)
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(6, 4))
ax.barh(funnel["step"][::-1], funnel["users"][::-1], color="#2b6cb0")
for i, (s, n, pct) in enumerate(zip(funnel["step"][::-1], funnel["users"][::-1], funnel["pct_of_top"][::-1])):
ax.text(n, i, f" {n:,} ({pct}%)", va="center")
ax.set_title("Funnel — last 30 days")
plt.tight_layout()
plt.savefig("funnel.png", dpi=200)
Compute the funnel separately for desktop vs mobile users. Where does mobile drop off more?