debugging non-intrusive colnames

This commit is contained in:
Khalim Conn-Kowlessar 2025-03-08 18:39:25 +00:00
parent 9eba778eb1
commit bb2164ccf8
2 changed files with 47 additions and 10 deletions

View file

@ -584,10 +584,12 @@ class AssetList:
if self.old_format_non_intrusives_present:
non_intrusive_columns = self.OLD_FORMAT_NON_INTRUSIVE_COLNAMES
self.keep_variables += non_intrusive_columns
self.rename_map = {
**self.rename_map,
**dict(
zip(self.NON_INTRUSIVES_COLNAMES, ["non-intrusives: " + c for c in non_intrusive_columns])
zip(non_intrusive_columns, ["non-intrusives: " + c for c in non_intrusive_columns])
)
}
@ -987,7 +989,7 @@ class AssetList:
def identify_worktypes(self, cleaned):
if not self.non_intrusives_present:
if not self.non_intrusives_present and not self.old_format_non_intrusives_present:
raise NotImplementedError("Need to implement the case for non-intrusives")
# If we have non-intrusives completed, we can use this to identify work types
@ -1855,7 +1857,9 @@ class AssetList:
def flag_outcomes(
self,
outcomes_filepath,
outcomes_sheetname
outcomes_sheetname,
outcomes_postcode,
outcomes_houseno
):
if outcomes_filepath is None:
pass
@ -1901,7 +1905,7 @@ class AssetList:
continue
matched = self.standardised_asset_list[
(self.standardised_asset_list[self.STANDARD_POSTCODE] == x["Post Code"])
(self.standardised_asset_list[self.STANDARD_POSTCODE] == x[outcomes_postcode])
].copy()
if not matched.empty:
matched["houseno"] = matched.apply(
@ -1909,7 +1913,7 @@ class AssetList:
axis=1
)
matched = matched[
matched["houseno"].astype(str) == str(x["Numb."])
matched["houseno"].astype(str) == str(x[outcomes_houseno])
]
if matched.shape[0] == 1:
lookup.append(
@ -1920,7 +1924,18 @@ class AssetList:
)
continue
elif not matched.empty:
raise NotImplementedError("Implement me - multiple matches on house number")
# Use levenstein distance to match
matched["address"] = matched[self.STANDARD_ADDRESS_1] + " " + matched[self.STANDARD_POSTCODE]
best_match = process.extractOne(x["Address"], matched[self.STANDARD_FULL_ADDRESS].values)[0]
matched = matched[matched[self.STANDARD_FULL_ADDRESS] == best_match]
lookup.append(
{
"row_id": x["row_id"],
self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0]
}
)
continue
nomatch.append(x["row_id"])

View file

@ -301,8 +301,14 @@ def app():
# landlord_heating_system = "Heating"
# landlord_existing_pv = None
# outcomes_filename = "ACIS Group - 25.11.2024 - outcomes.xlsx"
# master_filename_eco3 = "ECO 3 -Table 1.csv"
# master_filename_eco4 = "ECO 4 -Table 1.csv"
# outcomes_sheetname = "Feedback"
# outcomes_postcode = "Postcode"
# outcomes_houseno = "No"
# master_filepaths = [
# os.path.join(data_folder, "ECO 3 -Table 1.csv"),
# os.path.join(data_folder, "ECO 4 -Table 1.csv"),
# ]
# master_to_asset_list_filepath = None
# For plus dane
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane"
@ -323,6 +329,8 @@ def app():
landlord_existing_pv = None
outcomes_filename = "plus dane outcomes.xlsx"
outcomes_sheetname = "EVERYTHING"
outcomes_postcode = "Post Code"
outcomes_houseno = "Numb."
master_filepaths = [
os.path.join(data_folder, "JJC Rolling Master.csv"),
os.path.join(data_folder, "SCIS Rolling Master.csv"),
@ -386,7 +394,9 @@ def app():
# We now flag properties that have been treated under existing programmes
asset_list.flag_outcomes(
outcomes_filepath=os.path.join(data_folder, outcomes_filename),
outcomes_sheetname=outcomes_sheetname
outcomes_sheetname=outcomes_sheetname,
outcomes_postcode=outcomes_postcode,
outcomes_houseno=outcomes_houseno
)
asset_list.flag_survey_master(
@ -457,7 +467,9 @@ def app():
csv_data = pd.read_csv(os.path.join(download_folder, file))
# We need to convert the recommendations back to a list
csv_data["recommendations"] = csv_data["recommendations"].apply(eval)
csv_data["find_my_epc_data"] = csv_data["find_my_epc_data"].apply(eval)
# We don't have this if we didn't run the pulling from find my epc
if "find_my_epc_data" in csv_data.columns:
csv_data["find_my_epc_data"] = csv_data["find_my_epc_data"].apply(eval)
epc_data.append(csv_data)
epc_df = pd.concat(epc_data)
@ -499,6 +511,9 @@ def app():
)
# Get the find my epc data
if "find_my_epc_data" not in epc_df.columns:
epc_df["find_my_epc_data"] = None
find_my_epc_data = epc_df[[asset_list.DOMNA_PROPERTY_ID, "find_my_epc_data"]].drop(
columns=["find_my_epc_data"]).join(
pd.json_normalize(epc_df["find_my_epc_data"])
@ -519,6 +534,13 @@ def app():
columns=asset_list.EPC_API_DATA_NAMES
)
# Look for columns not in the find my EPC data, which will have happened if we didn't
# retrieve it in the first place
missed_find_epc_cols = [c for c in list(asset_list.FIND_EPC_DATA_NAMES.keys()) if c not in find_my_epc_data.columns]
if missed_find_epc_cols:
for c in missed_find_epc_cols:
find_my_epc_data[c] = None
epc_df = epc_df.merge(
find_my_epc_data[
[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"] + list(asset_list.FIND_EPC_DATA_NAMES.keys())