diff --git a/etl/age_band_calculator.py b/etl/age_band_calculator.py index 1c09476..61eb207 100644 --- a/etl/age_band_calculator.py +++ b/etl/age_band_calculator.py @@ -53,7 +53,22 @@ for installer in installers: df["week_commencing"] = date all_dfs.append(df) -giant_df = pd.concat(all_dfs, ignore_index=True) -giant_df -giant_df.to_csv("age_band.csv") +for df in all_dfs: + print(df) + +giant_df = pd.concat(all_dfs, ignore_index=True) +giant_df.to_excel("age_band.xlsx", index=False) + +giant_df['week_commencing_cleaned'] = pd.to_datetime( + giant_df['week_commencing'].str.replace("W.C. ", ""), + dayfirst=True +) +pd.set_option('display.max_rows', None) + +grouped = giant_df.groupby(['week_commencing_cleaned', 'age_band']).size().unstack(fill_value=0) +grouped = grouped.sort_index() +print(grouped) + +output_file = "grouped_age_band_by_week.xlsx" +grouped.to_excel(output_file) \ No newline at end of file