survey = pd.read_excel("engagement_2026.xlsx")
# cols: respondent_id, team, q1_likert (1-5), q2_likert ..., q10_open (free text)
likert_cols = [c for c in survey.columns if c.endswith("_likert")]
by_team = (survey.groupby("team")[likert_cols].mean().round(2))
by_team["overall"] = by_team.mean(axis=1).round(2)
by_team.to_excel("survey_scores_by_team.xlsx")
# Distribution per question
dist = survey.melt(id_vars="team", value_vars=likert_cols,
var_name="question", value_name="score")
dist_pct = (dist.groupby(["question","score"]).size()
.unstack(fill_value=0)
.pipe(lambda d: d.div(d.sum(axis=1), axis=0) * 100)
.round(1))
import re
from collections import Counter
text = " ".join(survey["q10_open"].dropna().astype(str)).lower()
words = re.findall(r"[a-z']+", text)
stop = {"the","and","a","to","of","in","is","i","it","for","that","but","my","we","with","this","on","be","are"}
freq = Counter(w for w in words if w not in stop and len(w) > 3)
top20 = pd.DataFrame(freq.most_common(20), columns=["word","count"])
with pd.ExcelWriter("survey_summary.xlsx") as w:
by_team.to_excel(w, sheet_name="By Team")
dist_pct.to_excel(w, sheet_name="Distributions")
top20.to_excel(w, sheet_name="Top Words", index=False)
Add an NPS computation: % promoters (9-10) minus % detractors (0-6).