started wates matching

This commit is contained in:
Khalim Conn-Kowlessar 2025-01-29 21:11:29 +00:00
parent 678a4b52d2
commit 7291f7128e

View file

@ -3331,7 +3331,7 @@ def revised_model():
)
if to_filter.sum() == 0:
blah
raise Exception("Error")
filtered = filtered[to_filter]
if filtered.empty:
@ -3347,34 +3347,97 @@ def revised_model():
)
continue
blah2
raise Exception("No match")
# home["Name"] should be contained in the survey_folder
# filtered = filtered[filtered["survey_folder"].str.contains(home["Name"], case=False)]
# # We have an edge case wher some properties have two outputs in Sharepoint
# if home["Name"] == "197 Granby Court" and home["Postcode"] == "MK1 1NQ":
# raise Exception("Fix me1")
# # filtered = filtered[filtered["survey_folder"] == "113-1-197 Granby Court-MK1 1NQ"]
#
# if home["Name"] == '1 Cluny Way' and home["Postcode"] == 'SG15 6ZB':
# raise Exception("Fix me2")
# # filtered = filtered[filtered["survey_folder"] == "12-1-1 Cluny Way-SG15 6ZB"]
#
# if home["Name"] == '2 Bromyard Road' and home["Postcode"] == 'WR15 8BZ':
# filtered = filtered[filtered["survey_folder"] == "StonewaterSurveys_4/192-9-2 Bromyard Road-WR15 8BZ"]
#
# if filtered.empty:
# continue
# if filtered.shape[0] != 1:
# raise Exception("something went wrong")
#
# matching_lookup.append(
# {
# "survey_folder": filtered["survey_folder"].values[0],
# "Address ID": home["Address ID"],
# "Name": home["Name"]
# }
# )
ccs_matching_lookup = pd.DataFrame(ccs_matching_lookup)
# We get a match for all records
assert ccs_matching_lookup.shape[0] == ccs_coordination.shape[0]
assert not pd.isnull(ccs_matching_lookup["Asset ID.1"]).sum()
# We do the same for Wates
wates_coordination = wates_coordination.rename(
columns={"Post Code": "Postcode"}
)
wates_coordination = wates_coordination[
wates_coordination["Retrofit Assessment"].isin(["Completed"])
]
wates_manual_filters = {}
wates_matching_lookup = []
for _, home in tqdm(wates_coordination.iterrows(), total=len(wates_coordination)):
# Handle the case that has the wrong postcode in the asset data
if home["Name"] in wates_manual_filters:
filtered = retrofit_assessment_data[
retrofit_assessment_data["survey_folder"] == wates_manual_filters[home["Name"]]
].copy()
else:
filtered = retrofit_assessment_data[
retrofit_assessment_data["Postcode"].str.lower() == home["Postcode"].lower()
].copy()
# We check that home["Name"] is contained in the survey_folder, after removing punctuation and spaces
to_filter = filtered["survey_folder"].str.replace(r"[^\w\s]", "").str.contains(
home["Name"].replace(r"[^\w\s]", "").replace("Flat", "").lstrip(), case=False
)
if to_filter.sum() == 0:
to_filter = (
filtered["survey_folder"].
str.replace(r"[^\w\s]", "").
str.replace(",", "").
str.replace(".", "").
str.contains(
home["Name"].replace(r"[^\w\s]", "").replace(",", ""), case=False
)
)
if to_filter.sum() == 0:
to_filter = (
filtered["Address"].str.replace(" ,", "").str.split(",").str[0:2].str.join("").str.lower() ==
home["Name"].lower()
)
if to_filter.sum() == 0:
to_filter = (
filtered["Address"].str.replace(" ,", "").str.split(",").str[0:1].str.join("").str.lower() ==
home["Name"].lower()
)
if to_filter.sum() == 0:
# Do a fuzzy match on the name
# Find the best filter
to_filter = filtered["Address"].str.replace(" ,", "").str.split(",").str[0:2].str.join("").apply(
lambda x: fuzz.partial_ratio(home["Name"], x) > 93
)
if to_filter.sum() == 0:
# We also some cases where the name of the survey folder is like "Colville Road 7" and the
# property name is actually 7 Colville Road, so we try taking the final part of the address,
# splitting on space, and adding it to the front
def reformat_survey_folder(x):
filename = x.split("/")[-1]
parts = filename.split(" ")
return " ".join(parts[-1:] + parts[:-1])
to_filter = (
filtered["survey_folder"].apply(lambda x: reformat_survey_folder(x)).str.lower() ==
home["Name"].lower()
)
if to_filter.sum() == 0:
raise Exception("Error")
filtered = filtered[to_filter]
if filtered.empty:
continue
if filtered.shape[0] == 1:
wates_matching_lookup.append(
{
"survey_folder": filtered["survey_folder"].values[0],
"Asset ID": home["Asset ID"],
"Name": home["Name"]
}
)
continue
raise Exception("No match")
# if __name__ == "__main__":
# main()