From bb2164ccf859a585caca74aecd1e66bda0d2cf0d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 8 Mar 2025 18:39:25 +0000 Subject: [PATCH] debugging non-intrusive colnames --- asset_list/AssetList.py | 27 +++++++++++++++++++++------ asset_list/app.py | 30 ++++++++++++++++++++++++++---- 2 files changed, 47 insertions(+), 10 deletions(-) diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index 05f6b10e..fe4be9f5 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -584,10 +584,12 @@ class AssetList: if self.old_format_non_intrusives_present: non_intrusive_columns = self.OLD_FORMAT_NON_INTRUSIVE_COLNAMES + self.keep_variables += non_intrusive_columns + self.rename_map = { **self.rename_map, **dict( - zip(self.NON_INTRUSIVES_COLNAMES, ["non-intrusives: " + c for c in non_intrusive_columns]) + zip(non_intrusive_columns, ["non-intrusives: " + c for c in non_intrusive_columns]) ) } @@ -987,7 +989,7 @@ class AssetList: def identify_worktypes(self, cleaned): - if not self.non_intrusives_present: + if not self.non_intrusives_present and not self.old_format_non_intrusives_present: raise NotImplementedError("Need to implement the case for non-intrusives") # If we have non-intrusives completed, we can use this to identify work types @@ -1855,7 +1857,9 @@ class AssetList: def flag_outcomes( self, outcomes_filepath, - outcomes_sheetname + outcomes_sheetname, + outcomes_postcode, + outcomes_houseno ): if outcomes_filepath is None: pass @@ -1901,7 +1905,7 @@ class AssetList: continue matched = self.standardised_asset_list[ - (self.standardised_asset_list[self.STANDARD_POSTCODE] == x["Post Code"]) + (self.standardised_asset_list[self.STANDARD_POSTCODE] == x[outcomes_postcode]) ].copy() if not matched.empty: matched["houseno"] = matched.apply( @@ -1909,7 +1913,7 @@ class AssetList: axis=1 ) matched = matched[ - matched["houseno"].astype(str) == str(x["Numb."]) + matched["houseno"].astype(str) == str(x[outcomes_houseno]) ] if matched.shape[0] == 1: lookup.append( @@ -1920,7 +1924,18 @@ class AssetList: ) continue elif not matched.empty: - raise NotImplementedError("Implement me - multiple matches on house number") + # Use levenstein distance to match + matched["address"] = matched[self.STANDARD_ADDRESS_1] + " " + matched[self.STANDARD_POSTCODE] + + best_match = process.extractOne(x["Address"], matched[self.STANDARD_FULL_ADDRESS].values)[0] + matched = matched[matched[self.STANDARD_FULL_ADDRESS] == best_match] + lookup.append( + { + "row_id": x["row_id"], + self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0] + } + ) + continue nomatch.append(x["row_id"]) diff --git a/asset_list/app.py b/asset_list/app.py index bea9cdde..63ca40d8 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -301,8 +301,14 @@ def app(): # landlord_heating_system = "Heating" # landlord_existing_pv = None # outcomes_filename = "ACIS Group - 25.11.2024 - outcomes.xlsx" - # master_filename_eco3 = "ECO 3 -Table 1.csv" - # master_filename_eco4 = "ECO 4 -Table 1.csv" + # outcomes_sheetname = "Feedback" + # outcomes_postcode = "Postcode" + # outcomes_houseno = "No" + # master_filepaths = [ + # os.path.join(data_folder, "ECO 3 -Table 1.csv"), + # os.path.join(data_folder, "ECO 4 -Table 1.csv"), + # ] + # master_to_asset_list_filepath = None # For plus dane data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane" @@ -323,6 +329,8 @@ def app(): landlord_existing_pv = None outcomes_filename = "plus dane outcomes.xlsx" outcomes_sheetname = "EVERYTHING" + outcomes_postcode = "Post Code" + outcomes_houseno = "Numb." master_filepaths = [ os.path.join(data_folder, "JJC Rolling Master.csv"), os.path.join(data_folder, "SCIS Rolling Master.csv"), @@ -386,7 +394,9 @@ def app(): # We now flag properties that have been treated under existing programmes asset_list.flag_outcomes( outcomes_filepath=os.path.join(data_folder, outcomes_filename), - outcomes_sheetname=outcomes_sheetname + outcomes_sheetname=outcomes_sheetname, + outcomes_postcode=outcomes_postcode, + outcomes_houseno=outcomes_houseno ) asset_list.flag_survey_master( @@ -457,7 +467,9 @@ def app(): csv_data = pd.read_csv(os.path.join(download_folder, file)) # We need to convert the recommendations back to a list csv_data["recommendations"] = csv_data["recommendations"].apply(eval) - csv_data["find_my_epc_data"] = csv_data["find_my_epc_data"].apply(eval) + # We don't have this if we didn't run the pulling from find my epc + if "find_my_epc_data" in csv_data.columns: + csv_data["find_my_epc_data"] = csv_data["find_my_epc_data"].apply(eval) epc_data.append(csv_data) epc_df = pd.concat(epc_data) @@ -499,6 +511,9 @@ def app(): ) # Get the find my epc data + if "find_my_epc_data" not in epc_df.columns: + epc_df["find_my_epc_data"] = None + find_my_epc_data = epc_df[[asset_list.DOMNA_PROPERTY_ID, "find_my_epc_data"]].drop( columns=["find_my_epc_data"]).join( pd.json_normalize(epc_df["find_my_epc_data"]) @@ -519,6 +534,13 @@ def app(): columns=asset_list.EPC_API_DATA_NAMES ) + # Look for columns not in the find my EPC data, which will have happened if we didn't + # retrieve it in the first place + missed_find_epc_cols = [c for c in list(asset_list.FIND_EPC_DATA_NAMES.keys()) if c not in find_my_epc_data.columns] + if missed_find_epc_cols: + for c in missed_find_epc_cols: + find_my_epc_data[c] = None + epc_df = epc_df.merge( find_my_epc_data[ [asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"] + list(asset_list.FIND_EPC_DATA_NAMES.keys())