From 7d209d5d8e07b4112bffcdcfc748d04cc299abe6 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sun, 17 Nov 2024 16:28:43 +0000 Subject: [PATCH] creating loss and gain columns --- .../stonewater/Wave 3 Preparation.py | 48 +++++++++++++++---- 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index d2110de8..b36ae756 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -1703,7 +1703,7 @@ def propsed_wave_3_sample(): region_assets["Confidence Tier"] = np.where( region_assets["Current EPC Band"].isin(["C", "B", "A"]), - "6 - property was surveyed", region_assets["Confidence Tier"] + "5 - property was surveyed", region_assets["Confidence Tier"] ) archetypes = region_assets[ @@ -1721,6 +1721,7 @@ def propsed_wave_3_sample(): (survey_results["Postal Region"] == region) ].groupby("Archetype ID")[["Current SAP Rating"]].mean().reset_index() region_surveyed["Current EPC Band"] = region_surveyed["Current SAP Rating"].apply(sap_to_epc) + region_surveyed = region_surveyed.drop(columns=["Current SAP Rating"]) region_assets = region_assets.merge( region_surveyed, @@ -1743,7 +1744,7 @@ def propsed_wave_3_sample(): # Handle EPC C region_assets["Confidence Tier"] = np.where( region_assets["Current EPC Band"].isin(["C", "B", "A"]) & pd.isnull(region_assets["Confidence Tier"]), - "6 - EPC C or above", region_assets["Confidence Tier"] + "5 - EPC C or above", region_assets["Confidence Tier"] ) region_assets = region_assets.drop(columns=["Current EPC Band_method1"]) @@ -1773,7 +1774,8 @@ def propsed_wave_3_sample(): ) region_assets["Confidence Tier"] = np.where( - region_assets["Current EPC Band_method2"].isin(["D", "E", "F", "G"]), + region_assets["Current EPC Band_method2"].isin(["D", "E", "F", "G"]) & pd.isnull( + region_assets["Confidence Tier"]), "2 - same archetype", region_assets["Confidence Tier"] ) @@ -1786,8 +1788,8 @@ def propsed_wave_3_sample(): # We label EPC C properties region_assets["Confidence Tier"] = np.where( - region_assets["Current EPC Band"].isin(["C", "B", "A"]), - "6 - EPC C or above", region_assets["Confidence Tier"] + region_assets["Current EPC Band"].isin(["C", "B", "A"]) & pd.isnull(region_assets["Confidence Tier"]), + "5 - EPC C or above", region_assets["Confidence Tier"] ) missed_addressids = region_assets[pd.isnull(region_assets["Confidence Tier"])]["Address ID"].unique().tolist() @@ -1823,7 +1825,7 @@ def propsed_wave_3_sample(): final_missed_matches.append( { "Address ID": a_id, - "Confidence Tier": "5 - no similar property, needs survey to confirm", + "Confidence Tier": "4 - no similar property, needs survey to confirm", "Current EPC Band": "Unknown" } ) @@ -1832,7 +1834,7 @@ def propsed_wave_3_sample(): expected_sap = surveyed_similar["Current SAP Rating"].mean() expected_epc = sap_to_epc(expected_sap) if expected_epc in ["C", "B", "A"]: - tier = "6 - EPC C or above" + tier = "5 - EPC C or above" else: tier = "3 - similar property" @@ -1861,12 +1863,42 @@ def propsed_wave_3_sample(): region_assets["Current EPC Band_method3"], region_assets["Current EPC Band"] ) - region_assets = region_assets.drop(columns=["Confidence Tier_method3"]) + region_assets = region_assets.drop(columns=["Confidence Tier_method3", "Current EPC Band_method3"]) if pd.isnull(region_assets["Current EPC Band"]).sum(): raise Exception("Something went wrong") results.append(region_assets) + results = pd.concat(results) + + # Create a pivot table for counts of Confidence Tier by Postal Region + geographic_summary = results.pivot_table( + index='Postal Region', + columns='Confidence Tier', + aggfunc='size', + fill_value=0 + ).reset_index() + + # We create the gain and loss columns + # Gain is the sum of these columns: + # '1 - Archetype surveyed', '1 - property was surveyed', + # '2 - same archetype', '3 - similar property', + # Loss is the sum of these columns: + # '4 - no similar property, needs survey to confirm', + # '5 - EPC C or above', '5 - property was surveyed' + geographic_summary["Gain"] = geographic_summary[ + ['1 - Archetype surveyed', '1 - property was surveyed', '2 - same archetype', '3 - similar property'] + ].sum(axis=1) + + geographic_summary["Loss"] = geographic_summary[ + ['4 - no similar property, needs survey to confirm', '5 - EPC C or above', '5 - property was surveyed'] + ].sum(axis=1) + + geographic_summary.sum() + + geographic_summary = geographic_summary.sort_values("Loss", ascending=True) + geographic_summary["Loss Cumulative Sum"] = geographic_summary["Loss"].cumsum() + # if __name__ == "__main__": # main()