From 7885467fa40240a2a2632b4b6e120cce5a047c61 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 4 Feb 2025 14:35:24 +0000 Subject: [PATCH] formatting output --- .../stonewater/Wave 3 Preparation.py | 42 +++++++++++++++++-- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index 1748f624..fcde164e 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -3777,6 +3777,21 @@ def revised_model(): no_match = [] matches = [] for _, home in tqdm(new_priority_postcodes.iterrows(), total=len(new_priority_postcodes)): + + # We check if the property was surveyed + survey_result = coordinated_packages[ + coordinated_packages["Organisation Reference"] == home["Organisation Reference"] + ] + if not survey_result.empty: + to_extend = [ + { + "Organisation Reference": home["Organisation Reference"], + "Best Match Organisation Reference": m, + "Was Surveyed": True + } for m in survey_result["Organisation Reference"].values + ] + matches.extend(to_extend) + closest_match = find_nearest_matching_property(coordinated_packages, home) if closest_match is None: no_match.append(home["Organisation Reference"]) @@ -3785,7 +3800,8 @@ def revised_model(): to_extend = [ { "Organisation Reference": home["Organisation Reference"], - "Best Match Organisation Reference": m + "Best Match Organisation Reference": m, + "Was Surveyed": False } for m in closest_match["Organisation Reference"].values ] matches.extend(to_extend) @@ -4010,7 +4026,8 @@ def revised_model(): [ 'Organisation Reference', 'Address', 'Postcode', 'Address ID', 'uprn', 'Archetype ID', 'SAP', 'SAP Band', "Property Type", "Walls", "Roofs", 'Glazing', - 'Heating', 'Main Fuel', 'Hot Water', 'Estimated SAP Rating', 'Estimated EPC Rating' + 'Heating', 'Main Fuel', 'Hot Water', 'Number of matches', 'Proportion', + 'Estimated SAP Rating', 'Estimated EPC Rating' ] ].rename( columns={ @@ -4023,6 +4040,7 @@ def revised_model(): 'Heating': 'Parity - Heating', 'Main Fuel': 'Parity - Main Fuel', 'Hot Water': 'Parity - Hot Water', + 'Proportion': 'Proportion of matched properties with same EPC rating', } ).merge( epc_df[ @@ -4061,7 +4079,25 @@ def revised_model(): on='Organisation Reference' ) - worksheet["Years Since Last EPC"] + worksheet["Years Since Last EPC"] = pd.Timestamp.now().year - pd.to_datetime( + worksheet["Last EPC - Date Lodged"]).dt.year + + worksheet["Last EPC - uprn"] = worksheet["Last EPC - uprn"].astype("Int64").astype(str) + + worksheet["uprn"] = np.where( + pd.isnull(worksheet["uprn"]) & pd.notnull(worksheet["Last EPC - uprn"]), + worksheet["Last EPC - uprn"], + worksheet["uprn"] + ) + + worksheet["uprn"] = worksheet["uprn"].replace("", "") + + # Save to Excel with multiple sheets + excel_path = os.path.join(CUSTOMER_FOLDER_PATH, "Jan 2025 Project", "04022025 Stonewater Priority List.xlsx") + with pd.ExcelWriter(excel_path, engine="xlsxwriter") as writer: + worksheet.to_excel(writer, sheet_name="Worksheet", index=False, header=True) + mapped_lookup.to_excel(writer, sheet_name="Lookup Table", index=False, header=True) + output_coordination_sheet.to_excel(writer, sheet_name="Coordination", index=False, header=True) # if __name__ == "__main__": # main()