HA51 eco3 matching

This commit is contained in:
Khalim Conn-Kowlessar 2024-03-18 12:19:15 +00:00
parent 897d58eec2
commit c58acadb73

View file

@ -491,6 +491,10 @@ class DataLoader:
"address": "A_Address",
"postcode": "matching_postcode"
},
"HA45": {
"address": "Full postal address",
"postcode": "Postcode"
},
"HA48": {
"address": "Full Address",
"postcode": "Postcode"
@ -518,7 +522,8 @@ class DataLoader:
"HA50": 4,
"HA63": 15,
"HA107": 51,
"HA48": 0
"HA48": 0,
"HA45": 0
}
UNMATCHED_ECO3 = {
@ -527,7 +532,8 @@ class DataLoader:
"HA50": 5,
"HA56": 320,
"HA63": 0,
"HA117": 4
"HA117": 4,
"HA51": 24
}
def __init__(self, directories, december_figures_filepath, use_cache, rebuild):
@ -542,7 +548,7 @@ class DataLoader:
def create_asset_list_matching_address(self, ha_name, asset_list):
if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA48", "HA49", "HA54"]:
if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA45", "HA48", "HA49", "HA54"]:
asset_list["matching_address"] = asset_list[
self.COLUMN_CONFIG[ha_name]["address"]
].astype(str).str.lower().str.strip()
@ -717,6 +723,18 @@ class DataLoader:
asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
asset_list["Post Code"].astype(str).str.lower().str.strip()
asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip()
elif ha_name == "HA51":
asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " + \
asset_list["Address Line 3"].astype(str).str.lower().str.strip() + ", " + \
asset_list["Postcode"].astype(str).str.lower().str.strip()
asset_list["matching_address"] = np.where(
asset_list["Block"].str.strip().str.len() > 0,
asset_list["Block"].astype(str).str.lower().str.strip() + ", " + \
asset_list["matching_address"],
asset_list["matching_address"]
)
asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
elif ha_name == "HA56":
asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \
asset_list["Address 2"].astype(str).str.lower().str.strip() + ", " + \
@ -2485,6 +2503,13 @@ class DataLoader:
)
return survey_list
@staticmethod
def correct_ha45_survey_list(survey_list):
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
"Norwich Road", "Norwich Avenue"
)
return survey_list
@staticmethod
def levenstein_match(matching_string, df):
match_to = df["matching_address"].tolist()
@ -2744,6 +2769,38 @@ class DataLoader:
return eco3_list
@staticmethod
def correct_ha51_eco3_list(eco3_list):
eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
"HASELEMERE AVENUE", "HASLEMERE AVENUE"
)
eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
"THORVILLE GROVE", "THORNVILLE GROVE"
)
eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
"MONTBRETA CLOSE", "MONTBRETIA CLOSE"
)
eco3_list["Post Code"] = np.where(
(eco3_list["Street / Block Name"] == "SYDENHAM ROAD") &
(eco3_list["Post Code"] == "CR0 2DW"),
"CR0 2ED",
eco3_list["Post Code"]
)
# Not in asset list
eco3_list = eco3_list[
~((eco3_list["Street / Block Name"] == "WOODLEY LANE") &
(eco3_list["Post Code"] == "SM5 2RJ") &
(eco3_list["NO "] == "FLAT 3, 11"))
]
eco3_list["NO "] = np.where(
(eco3_list["NO "] == "47 B"),
"47B",
eco3_list["NO "]
)
return eco3_list
def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name):
eco3_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_eco3_list")
@ -2752,7 +2809,7 @@ class DataLoader:
asset_list["matching_postcode_nospace"] = asset_list["matching_postcode"].str.replace(" ", "").str.lower()
eco3_list["postcode_no_space"] = eco3_list["Post Code"].str.lower().str.replace(" ", "")
if ha_name in ["HA25", "HA56"]:
if ha_name in ["HA25", "HA56", "HA51"]:
# HA25: 317 -> 259
missed_postcodes = {
postcode for postcode in eco3_list["postcode_no_space"] if
@ -2774,7 +2831,7 @@ class DataLoader:
matching_lookup = []
missed = []
for _, row in tqdm(eco3_list.iterrows(), total=len(eco3_list)):
# if row["eco3_list_row_id"] == "HA25_Eco3_5422":
# if row["eco3_list_row_id"] == "HA51_Eco3_22":
# raise Exception()
postcode = row["postcode_no_space"]
@ -2813,6 +2870,12 @@ class DataLoader:
missed.append(row["eco3_list_row_id"])
continue
if df.shape[0] > 1:
if "flat" in str(row["NO "]).lower():
df = df[df["matching_address"].str.contains("flat")]
else:
df = df[~df["matching_address"].str.contains("flat")]
if df.shape[0] != 1:
print(row["Street / Block Name"])
print(house_number)
@ -6200,10 +6263,9 @@ def app():
priority_has = [
"HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24", "HA25",
"HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54", "HA56",
"HA63", "HA107", "HA117",
# Added as of March 17th
"HA8", "HA11", "HA21", "HA37", "HA42",
"HA44",
"HA63", "HA107", "HA117", "HA8", "HA11", "HA21", "HA37", "HA42",
# Added as of March 18th
"HA44", "HA45", "HA51",
# New HAS
"HAXX", "HAXXX",
]