touches = pd.read_csv("touches.csv", parse_dates=["ts"])
# user_id, ts, channel, conversion (bool), revenue
converters = touches.loc[touches["conversion"], "user_id"].unique()
j = (touches[touches["user_id"].isin(converters)]
.sort_values(["user_id","ts"])
.groupby("user_id")
.agg(path=("channel", list),
revenue=("revenue", "max")))
j["first"] = j["path"].apply(lambda p: p[0])
first_touch = j.groupby("first")["revenue"].sum()
j["last"] = j["path"].apply(lambda p: p[-1])
last_touch = j.groupby("last")["revenue"].sum()
rows = []
for path, rev in zip(j["path"], j["revenue"]):
share = rev / len(path)
for c in path:
rows.append({"channel": c, "credit": share})
linear = pd.DataFrame(rows).groupby("channel")["credit"].sum()
compare = pd.concat([first_touch.rename("first"),
last_touch.rename("last"),
linear.rename("linear")], axis=1).fillna(0).round(0)
compare.sort_values("linear", ascending=False)
Implement a position-based model: 40% to first, 40% to last, 20% split among the middle touches.