mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
debugging non-intrusive colnames
This commit is contained in:
parent
9eba778eb1
commit
bb2164ccf8
2 changed files with 47 additions and 10 deletions
|
|
@ -584,10 +584,12 @@ class AssetList:
|
|||
if self.old_format_non_intrusives_present:
|
||||
non_intrusive_columns = self.OLD_FORMAT_NON_INTRUSIVE_COLNAMES
|
||||
|
||||
self.keep_variables += non_intrusive_columns
|
||||
|
||||
self.rename_map = {
|
||||
**self.rename_map,
|
||||
**dict(
|
||||
zip(self.NON_INTRUSIVES_COLNAMES, ["non-intrusives: " + c for c in non_intrusive_columns])
|
||||
zip(non_intrusive_columns, ["non-intrusives: " + c for c in non_intrusive_columns])
|
||||
)
|
||||
}
|
||||
|
||||
|
|
@ -987,7 +989,7 @@ class AssetList:
|
|||
|
||||
def identify_worktypes(self, cleaned):
|
||||
|
||||
if not self.non_intrusives_present:
|
||||
if not self.non_intrusives_present and not self.old_format_non_intrusives_present:
|
||||
raise NotImplementedError("Need to implement the case for non-intrusives")
|
||||
|
||||
# If we have non-intrusives completed, we can use this to identify work types
|
||||
|
|
@ -1855,7 +1857,9 @@ class AssetList:
|
|||
def flag_outcomes(
|
||||
self,
|
||||
outcomes_filepath,
|
||||
outcomes_sheetname
|
||||
outcomes_sheetname,
|
||||
outcomes_postcode,
|
||||
outcomes_houseno
|
||||
):
|
||||
if outcomes_filepath is None:
|
||||
pass
|
||||
|
|
@ -1901,7 +1905,7 @@ class AssetList:
|
|||
continue
|
||||
|
||||
matched = self.standardised_asset_list[
|
||||
(self.standardised_asset_list[self.STANDARD_POSTCODE] == x["Post Code"])
|
||||
(self.standardised_asset_list[self.STANDARD_POSTCODE] == x[outcomes_postcode])
|
||||
].copy()
|
||||
if not matched.empty:
|
||||
matched["houseno"] = matched.apply(
|
||||
|
|
@ -1909,7 +1913,7 @@ class AssetList:
|
|||
axis=1
|
||||
)
|
||||
matched = matched[
|
||||
matched["houseno"].astype(str) == str(x["Numb."])
|
||||
matched["houseno"].astype(str) == str(x[outcomes_houseno])
|
||||
]
|
||||
if matched.shape[0] == 1:
|
||||
lookup.append(
|
||||
|
|
@ -1920,7 +1924,18 @@ class AssetList:
|
|||
)
|
||||
continue
|
||||
elif not matched.empty:
|
||||
raise NotImplementedError("Implement me - multiple matches on house number")
|
||||
# Use levenstein distance to match
|
||||
matched["address"] = matched[self.STANDARD_ADDRESS_1] + " " + matched[self.STANDARD_POSTCODE]
|
||||
|
||||
best_match = process.extractOne(x["Address"], matched[self.STANDARD_FULL_ADDRESS].values)[0]
|
||||
matched = matched[matched[self.STANDARD_FULL_ADDRESS] == best_match]
|
||||
lookup.append(
|
||||
{
|
||||
"row_id": x["row_id"],
|
||||
self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0]
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
nomatch.append(x["row_id"])
|
||||
|
||||
|
|
|
|||
|
|
@ -301,8 +301,14 @@ def app():
|
|||
# landlord_heating_system = "Heating"
|
||||
# landlord_existing_pv = None
|
||||
# outcomes_filename = "ACIS Group - 25.11.2024 - outcomes.xlsx"
|
||||
# master_filename_eco3 = "ECO 3 -Table 1.csv"
|
||||
# master_filename_eco4 = "ECO 4 -Table 1.csv"
|
||||
# outcomes_sheetname = "Feedback"
|
||||
# outcomes_postcode = "Postcode"
|
||||
# outcomes_houseno = "No"
|
||||
# master_filepaths = [
|
||||
# os.path.join(data_folder, "ECO 3 -Table 1.csv"),
|
||||
# os.path.join(data_folder, "ECO 4 -Table 1.csv"),
|
||||
# ]
|
||||
# master_to_asset_list_filepath = None
|
||||
|
||||
# For plus dane
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane"
|
||||
|
|
@ -323,6 +329,8 @@ def app():
|
|||
landlord_existing_pv = None
|
||||
outcomes_filename = "plus dane outcomes.xlsx"
|
||||
outcomes_sheetname = "EVERYTHING"
|
||||
outcomes_postcode = "Post Code"
|
||||
outcomes_houseno = "Numb."
|
||||
master_filepaths = [
|
||||
os.path.join(data_folder, "JJC Rolling Master.csv"),
|
||||
os.path.join(data_folder, "SCIS Rolling Master.csv"),
|
||||
|
|
@ -386,7 +394,9 @@ def app():
|
|||
# We now flag properties that have been treated under existing programmes
|
||||
asset_list.flag_outcomes(
|
||||
outcomes_filepath=os.path.join(data_folder, outcomes_filename),
|
||||
outcomes_sheetname=outcomes_sheetname
|
||||
outcomes_sheetname=outcomes_sheetname,
|
||||
outcomes_postcode=outcomes_postcode,
|
||||
outcomes_houseno=outcomes_houseno
|
||||
)
|
||||
|
||||
asset_list.flag_survey_master(
|
||||
|
|
@ -457,7 +467,9 @@ def app():
|
|||
csv_data = pd.read_csv(os.path.join(download_folder, file))
|
||||
# We need to convert the recommendations back to a list
|
||||
csv_data["recommendations"] = csv_data["recommendations"].apply(eval)
|
||||
csv_data["find_my_epc_data"] = csv_data["find_my_epc_data"].apply(eval)
|
||||
# We don't have this if we didn't run the pulling from find my epc
|
||||
if "find_my_epc_data" in csv_data.columns:
|
||||
csv_data["find_my_epc_data"] = csv_data["find_my_epc_data"].apply(eval)
|
||||
epc_data.append(csv_data)
|
||||
|
||||
epc_df = pd.concat(epc_data)
|
||||
|
|
@ -499,6 +511,9 @@ def app():
|
|||
)
|
||||
|
||||
# Get the find my epc data
|
||||
if "find_my_epc_data" not in epc_df.columns:
|
||||
epc_df["find_my_epc_data"] = None
|
||||
|
||||
find_my_epc_data = epc_df[[asset_list.DOMNA_PROPERTY_ID, "find_my_epc_data"]].drop(
|
||||
columns=["find_my_epc_data"]).join(
|
||||
pd.json_normalize(epc_df["find_my_epc_data"])
|
||||
|
|
@ -519,6 +534,13 @@ def app():
|
|||
columns=asset_list.EPC_API_DATA_NAMES
|
||||
)
|
||||
|
||||
# Look for columns not in the find my EPC data, which will have happened if we didn't
|
||||
# retrieve it in the first place
|
||||
missed_find_epc_cols = [c for c in list(asset_list.FIND_EPC_DATA_NAMES.keys()) if c not in find_my_epc_data.columns]
|
||||
if missed_find_epc_cols:
|
||||
for c in missed_find_epc_cols:
|
||||
find_my_epc_data[c] = None
|
||||
|
||||
epc_df = epc_df.merge(
|
||||
find_my_epc_data[
|
||||
[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"] + list(asset_list.FIND_EPC_DATA_NAMES.keys())
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue