fixing up ha63 eco3 list

This commit is contained in:
Khalim Conn-Kowlessar 2024-03-07 21:34:46 +00:00
parent c3fd2ae902
commit 19850f9244

View file

@ -289,6 +289,10 @@ class DataLoader:
asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
asset_list["Post Code"].astype(str).str.lower().str.strip()
asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip()
elif ha_name == "HA63":
asset_list["matching_address"] = asset_list["Address1"].astype(str).str.lower().str.strip() + ", " + \
asset_list["POSTCODE"].astype(str).str.lower().str.strip()
asset_list["matching_postcode"] = asset_list["POSTCODE"].astype(str).str.lower().str.strip()
elif ha_name == "HA107":
# Create matching_address by concatenating House No, Street, Town, District, Postcode
asset_list["matching_address"] = asset_list["House No"].astype(str).str.lower().str.strip() + ", " + \
@ -1551,6 +1555,16 @@ class DataLoader:
def correct_ha41_survey_list(survey_list):
return survey_list
@staticmethod
def correct_ha63_survey_list(survey_list):
# Drop some filler rows
survey_list = survey_list[
~survey_list[survey_list.columns[0]].isin(
["NO JOBS SURVEYED JULY 2021 ", "NO JOBS SURVEYED SEPTEMBER 2021"]
)
]
return survey_list
@staticmethod
def levenstein_match(matching_string, df):
match_to = df["matching_address"].tolist()
@ -1714,6 +1728,26 @@ class DataLoader:
def correct_ha41_eco3_list(eco3_list):
return eco3_list
@staticmethod
def correct_ha63_eco3_list(eco3_list):
eco3_list = eco3_list[~pd.isnull(eco3_list["Post Code"])]
# Some postcode that aren't in the asset list
eco3_list = eco3_list[
~eco3_list["Post Code"].isin(
["NR32 15X", "NR30 2BT"]
)
]
eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
"POUND COTTAGES - BLOOMSBERRY CLOSE", "POUND COTTAGES"
)
eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
"FREDRICK ROAD", "Frederick Road"
)
return eco3_list
def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name):
eco3_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_eco3_list")
@ -1799,12 +1833,15 @@ class DataLoader:
# We verify the missed
# HA25 contains 119 missed entries. These are actually 24 unique postcodes, and the majority belong to 2
# where many surveys were conducted on house numbers, not in the asset list
# 154 missed, 2827 matched for HA 25
if len(missed) != self.UNMATCHED_ECO3[ha_name]:
raise ValueError(
f"Unmatched addresses for {ha_name} is not as expected, got {len(missed)} unmatched"
)
# 154 missed, 2827 matched for HA 25
# 41
missed_df = eco3_list[eco3_list["eco3_list_row_id"].isin(missed)]
missed_df.head(1)["Street / Block Name"]
matching_lookup = pd.DataFrame(matching_lookup)
# Check dupes as this will cause problems later on
@ -4418,11 +4455,12 @@ def app():
# Add in:
priority_has = [
"HA1", "HA2", "HA6", "HA7", "HA14", "HA15", "HA16", "HA24", "HA25", "HA28", "HA32", "HA39", "HA41", "HA48",
"HA50", "HA107",
"HA50", "HA63", "HA107",
]
# Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
# back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE],
# Consider for ECO4: 2, 63, 12, 13, 136, 117
# back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE]
# 63 [WIP]
# Consider for ECO4: 12, 13, 136, 117
# COnsider for GBIS: 56, 35, 34
# Ignore for now:
# 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in