From d2cc39a5653dd6e68c9fa9d6cbdbc9a653e066aa Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 23 Apr 2025 08:13:36 +0000 Subject: [PATCH] age band calculator --- etl/age_band_calculator.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/etl/age_band_calculator.py b/etl/age_band_calculator.py index 1c09476..61eb207 100644 --- a/etl/age_band_calculator.py +++ b/etl/age_band_calculator.py @@ -53,7 +53,22 @@ for installer in installers: df["week_commencing"] = date all_dfs.append(df) -giant_df = pd.concat(all_dfs, ignore_index=True) -giant_df -giant_df.to_csv("age_band.csv") +for df in all_dfs: + print(df) + +giant_df = pd.concat(all_dfs, ignore_index=True) +giant_df.to_excel("age_band.xlsx", index=False) + +giant_df['week_commencing_cleaned'] = pd.to_datetime( + giant_df['week_commencing'].str.replace("W.C. ", ""), + dayfirst=True +) +pd.set_option('display.max_rows', None) + +grouped = giant_df.groupby(['week_commencing_cleaned', 'age_band']).size().unstack(fill_value=0) +grouped = grouped.sort_index() +print(grouped) + +output_file = "grouped_age_band_by_week.xlsx" +grouped.to_excel(output_file) \ No newline at end of file