From 2158ab2cd50df7edcfc7e119b56237145f4f1dd1 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 16 Nov 2024 16:33:43 +0000 Subject: [PATCH] debugging stoneater alg --- etl/customers/stonewater/Wave 3 Preparation.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index 008fd3bc..ef7dd414 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -1635,7 +1635,7 @@ def propsed_wave_3_sample(): ) # TODO: We drop 302 properties that are not priority postcodes - confirm w/ Stonewater - asset_list = asset_list[asset_list["Archetype ID"] == "NOT PRIORITY POSTCODE"] + asset_list = asset_list[asset_list["Archetype ID"] != "NOT PRIORITY POSTCODE"] # Clean address ids asset_list = asset_list[~pd.isnull(asset_list["Address ID"])] asset_list = asset_list[asset_list["Address ID"] != "Address ID"] @@ -1678,7 +1678,7 @@ def propsed_wave_3_sample(): # results = [] - for region in unique_postal_regions: + for region in tqdm(unique_postal_regions): # Take all of the properties in that region region_assets = asset_list[asset_list["Postal Region"] == region].copy() archetypes = region_assets["Archetype ID"].unique() @@ -1739,7 +1739,11 @@ def propsed_wave_3_sample(): missed_addressids = region_assets[pd.isnull(region_assets["Confidence Tier"])]["Address ID"].unique().tolist() - # This means that this archetype was never surveyed and so we need to find a sufficiently similar property + if not missed_addressids: + results.append(region_assets) + continue + + # This means that this archetype was never surveyed and so we need to find a sufficiently similar property final_missed_matches = [] for a_id in missed_addressids: property = asset_list[asset_list["Address ID"] == a_id].squeeze()