mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
expanded eco3 matching
This commit is contained in:
parent
7f88f0e0f5
commit
9a0c6c3e8f
1 changed files with 12 additions and 6 deletions
|
|
@ -172,7 +172,7 @@ class DataLoader:
|
|||
}
|
||||
|
||||
UNMATCHED_ECO3 = {
|
||||
"HA25": 119
|
||||
"HA25": 154
|
||||
}
|
||||
|
||||
def __init__(self, directories, december_figures_filepath, use_cache, rebuild):
|
||||
|
|
@ -1508,12 +1508,16 @@ class DataLoader:
|
|||
eco3_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_eco3_list")
|
||||
eco3_list = eco3_list_correction_function(eco3_list)
|
||||
|
||||
asset_list["matching_postcode_nospace"] = asset_list["matching_postcode"].str.replace(" ", "").str.lower()
|
||||
eco3_list["postcode_no_space"] = eco3_list["Post Code"].str.lower().str.replace(" ", "")
|
||||
|
||||
if ha_name == "HA25":
|
||||
# 317 -> 259
|
||||
missed_postcodes = {
|
||||
postcode.lower() for postcode in eco3_list["Post Code"] if
|
||||
postcode.lower() not in asset_list["matching_postcode"].values
|
||||
postcode for postcode in eco3_list["postcode_no_space"] if
|
||||
postcode not in asset_list["matching_postcode_nospace"].values
|
||||
}
|
||||
eco3_list = eco3_list[~eco3_list["Post Code"].str.lower().isin(missed_postcodes)]
|
||||
eco3_list = eco3_list[~eco3_list["postcode_no_space"].isin(missed_postcodes)]
|
||||
|
||||
# For the asset list, we create a matching address without any punctuation
|
||||
# TODO: We should generally just remove puncutation from addresses when matching
|
||||
|
|
@ -1530,11 +1534,11 @@ class DataLoader:
|
|||
for _, row in tqdm(eco3_list.iterrows(), total=len(eco3_list)):
|
||||
# if row["eco3_list_row_id"] == "HA25_Eco3_5422":
|
||||
# raise Exception()
|
||||
postcode = row["Post Code"].lower().strip()
|
||||
postcode = row["postcode_no_space"]
|
||||
|
||||
# df will never be empty, since we've already done a check for common postcodes
|
||||
df = asset_list[
|
||||
asset_list["matching_postcode"].str.contains(postcode)
|
||||
asset_list["matching_postcode_nospace"].str.contains(postcode)
|
||||
]
|
||||
|
||||
house_number = row["NO "]
|
||||
|
|
@ -1588,6 +1592,8 @@ class DataLoader:
|
|||
f"Unmatched addresses for {ha_name} is not as expected, got {len(missed)} unmatched"
|
||||
)
|
||||
|
||||
# 154 missed, 2827 matched for HA 25
|
||||
|
||||
matching_lookup = pd.DataFrame(matching_lookup)
|
||||
# Check dupes as this will cause problems later on
|
||||
if matching_lookup["asset_list_row_id"].duplicated().any():
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue