60% through matching

This commit is contained in:
Khalim Conn-Kowlessar 2023-12-28 11:45:14 +00:00
parent dcc7757190
commit c88afeb330

View file

@ -184,11 +184,24 @@ def load_data():
eco4_prospects_survey_list["ADDRESS 1"] = np.where(
(eco4_prospects_survey_list["ADDRESS 1"] == "old farm road") & (
eco4_prospects_survey_list["POSTCODE"] == "PL5 1EP"),
eco4_prospects_survey_list["POSTCODE"] == "PL5 1EP"),
"old school road",
eco4_prospects_survey_list["ADDRESS 1"]
)
eco4_prospects_survey_list["ADDRESS 1"] = np.where(
(eco4_prospects_survey_list["ADDRESS 1"] == "croft orchard") & (
eco4_prospects_survey_list["POSTCODE"] == "TQ12 6RP") & (
eco4_prospects_survey_list["NO"] == 52),
"drum way",
eco4_prospects_survey_list["ADDRESS 1"]
)
# String replace
eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.replace(
"the gulls, collaton road", "the gulls collaton road"
)
matched = []
for _, row in tqdm(eco4_prospects_survey_list.iterrows(), total=len(eco4_prospects_survey_list)):
@ -209,9 +222,12 @@ def load_data():
# Filter on the first line of the address
df = asset_list[asset_list["T1_Address"].str.lower().str.contains(row["ADDRESS 1"].lower())].copy()
df = df[df["T1_Address"].str.lower().str.contains(str(house_number))]
if house_number is not None:
if df.shape[0] != 1:
df = df[df["T1_Address"].str.lower().str.contains(str(house_number))]
if df.shape[0] != 1:
df = df[df["HouseNo"] == str(house_number)]
if house_number is not None:
df = df[df["HouseNo"] == str(house_number)]
if df.shape[0] != 1:
df = df[df["postcode"].str.lower().str.contains(row["POSTCODE"].lower())]
if df.shape[0] != 1: