ha56 survey list matching

This commit is contained in:
Khalim Conn-Kowlessar 2024-03-10 14:25:47 +00:00
parent db7b6de87b
commit 8b3f4d3a52

View file

@ -694,6 +694,20 @@ class DataLoader:
asset_list["ECO Eligibility"]
)
# Already surveyed under ECO4
asset_list["ECO Eligibility"] = np.where(
asset_list["Post Code"] == "SK17 6NR",
"Not eligible",
asset_list["ECO Eligibility"]
)
asset_list["ECO Eligibility"] = np.where(
~((asset_list["Post Code"] == "WA5 0EN") &
(asset_list["Address 1"] == "Block 17-26 Tavlin Avenue")),
"Not eligible",
asset_list["ECO Eligibility"]
)
return asset_list
@staticmethod
@ -1811,6 +1825,29 @@ class DataLoader:
return survey_list
@staticmethod
def correct_ha56_survey_list(survey_list):
# Not in asset list
survey_list = survey_list[
~((survey_list["Street / Block Name"] == "Samual Street") &
(survey_list["NO."].isin([22, 24])) &
(survey_list["Post Code"] == "WA5 1BB"))
]
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
"STOURTON RD", "Stourton Road"
)
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
"BIRKIN RD", "Birkin Road"
)
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
"PORTLAND RD", "Portland Road"
)
return survey_list
@staticmethod
def levenstein_match(matching_string, df):
match_to = df["matching_address"].tolist()
@ -1843,6 +1880,10 @@ class DataLoader:
if ha_name == "HA13":
missed_postcodes = ["hp17 8le"]
if ha_name == "HA56":
# Multiple properties are listed as blocks, which is a problem for matching
missed_postcodes = ["sk17 6nr", "wa5 0en"]
matching_lookup = []
for _, row in tqdm(survey_list.iterrows(), total=len(survey_list)):
@ -1890,6 +1931,19 @@ class DataLoader:
df = df[df["HouseNo"].astype(str).str.lower() == str(house_number)]
if df.shape[0] != 1:
df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower().strip())]
if df.empty:
postcode_lower = row["Post Code"].lower()
if postcode_lower in missed_postcodes:
matching_lookup.append(
{
"survey_list_row_id": row["survey_list_row_id"],
"asset_list_row_id": None,
}
)
continue
if df.shape[0] != 1:
if "Town/Area" not in row.keys():
full_key = (str(row["NO."]).lower().strip() + row["Street / Block Name"].lower().strip() +