From 8f9b8f08862cbadcbd0daaa29219cd0980606b3f Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 16 Nov 2024 16:30:23 +0000 Subject: [PATCH] working on algorithm --- etl/customers/stonewater/Wave 3 Preparation.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index 7c104f97..008fd3bc 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -1633,6 +1633,9 @@ def propsed_wave_3_sample(): "- Archetyped V3.1.xlsx", header=4 ) + + # TODO: We drop 302 properties that are not priority postcodes - confirm w/ Stonewater + asset_list = asset_list[asset_list["Archetype ID"] == "NOT PRIORITY POSTCODE"] # Clean address ids asset_list = asset_list[~pd.isnull(asset_list["Address ID"])] asset_list = asset_list[asset_list["Address ID"] != "Address ID"] @@ -1674,6 +1677,7 @@ def propsed_wave_3_sample(): # Tier 2: We have a property in the same archetype that was surveyed and is below EPC D # + results = [] for region in unique_postal_regions: # Take all of the properties in that region region_assets = asset_list[asset_list["Postal Region"] == region].copy() @@ -1722,10 +1726,17 @@ def propsed_wave_3_sample(): ) region_assets["Current EPC Band"] = region_assets["Current EPC Band"].fillna( - region_assets["Current EPC Band_method2"]) + region_assets["Current EPC Band_method2"].astype(str), + ) region_assets = region_assets.drop(columns=["Current EPC Band_method2"]) + # We label EPC C properties + region_assets["Confidence Tier"] = np.where( + region_assets["Current EPC Band"].isin(["C", "B", "A"]), + "6 - EPC C or above", region_assets["Confidence Tier"] + ) + missed_addressids = region_assets[pd.isnull(region_assets["Confidence Tier"])]["Address ID"].unique().tolist() # This means that this archetype was never surveyed and so we need to find a sufficiently similar property @@ -1785,5 +1796,7 @@ def propsed_wave_3_sample(): if pd.isnull(region_assets["Current EPC Band"]).sum(): raise Exception("Something went wrong") + results.append(region_assets) + # if __name__ == "__main__": # main()