mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
started wates matching
This commit is contained in:
parent
678a4b52d2
commit
7291f7128e
1 changed files with 91 additions and 28 deletions
|
|
@ -3331,7 +3331,7 @@ def revised_model():
|
|||
)
|
||||
|
||||
if to_filter.sum() == 0:
|
||||
blah
|
||||
raise Exception("Error")
|
||||
filtered = filtered[to_filter]
|
||||
|
||||
if filtered.empty:
|
||||
|
|
@ -3347,34 +3347,97 @@ def revised_model():
|
|||
)
|
||||
continue
|
||||
|
||||
blah2
|
||||
raise Exception("No match")
|
||||
|
||||
# home["Name"] should be contained in the survey_folder
|
||||
# filtered = filtered[filtered["survey_folder"].str.contains(home["Name"], case=False)]
|
||||
# # We have an edge case wher some properties have two outputs in Sharepoint
|
||||
# if home["Name"] == "197 Granby Court" and home["Postcode"] == "MK1 1NQ":
|
||||
# raise Exception("Fix me1")
|
||||
# # filtered = filtered[filtered["survey_folder"] == "113-1-197 Granby Court-MK1 1NQ"]
|
||||
#
|
||||
# if home["Name"] == '1 Cluny Way' and home["Postcode"] == 'SG15 6ZB':
|
||||
# raise Exception("Fix me2")
|
||||
# # filtered = filtered[filtered["survey_folder"] == "12-1-1 Cluny Way-SG15 6ZB"]
|
||||
#
|
||||
# if home["Name"] == '2 Bromyard Road' and home["Postcode"] == 'WR15 8BZ':
|
||||
# filtered = filtered[filtered["survey_folder"] == "StonewaterSurveys_4/192-9-2 Bromyard Road-WR15 8BZ"]
|
||||
#
|
||||
# if filtered.empty:
|
||||
# continue
|
||||
# if filtered.shape[0] != 1:
|
||||
# raise Exception("something went wrong")
|
||||
#
|
||||
# matching_lookup.append(
|
||||
# {
|
||||
# "survey_folder": filtered["survey_folder"].values[0],
|
||||
# "Address ID": home["Address ID"],
|
||||
# "Name": home["Name"]
|
||||
# }
|
||||
# )
|
||||
ccs_matching_lookup = pd.DataFrame(ccs_matching_lookup)
|
||||
# We get a match for all records
|
||||
assert ccs_matching_lookup.shape[0] == ccs_coordination.shape[0]
|
||||
assert not pd.isnull(ccs_matching_lookup["Asset ID.1"]).sum()
|
||||
|
||||
# We do the same for Wates
|
||||
wates_coordination = wates_coordination.rename(
|
||||
columns={"Post Code": "Postcode"}
|
||||
)
|
||||
wates_coordination = wates_coordination[
|
||||
wates_coordination["Retrofit Assessment"].isin(["Completed"])
|
||||
]
|
||||
|
||||
wates_manual_filters = {}
|
||||
wates_matching_lookup = []
|
||||
for _, home in tqdm(wates_coordination.iterrows(), total=len(wates_coordination)):
|
||||
|
||||
# Handle the case that has the wrong postcode in the asset data
|
||||
if home["Name"] in wates_manual_filters:
|
||||
filtered = retrofit_assessment_data[
|
||||
retrofit_assessment_data["survey_folder"] == wates_manual_filters[home["Name"]]
|
||||
].copy()
|
||||
else:
|
||||
filtered = retrofit_assessment_data[
|
||||
retrofit_assessment_data["Postcode"].str.lower() == home["Postcode"].lower()
|
||||
].copy()
|
||||
|
||||
# We check that home["Name"] is contained in the survey_folder, after removing punctuation and spaces
|
||||
to_filter = filtered["survey_folder"].str.replace(r"[^\w\s]", "").str.contains(
|
||||
home["Name"].replace(r"[^\w\s]", "").replace("Flat", "").lstrip(), case=False
|
||||
)
|
||||
if to_filter.sum() == 0:
|
||||
to_filter = (
|
||||
filtered["survey_folder"].
|
||||
str.replace(r"[^\w\s]", "").
|
||||
str.replace(",", "").
|
||||
str.replace(".", "").
|
||||
str.contains(
|
||||
home["Name"].replace(r"[^\w\s]", "").replace(",", ""), case=False
|
||||
)
|
||||
)
|
||||
if to_filter.sum() == 0:
|
||||
to_filter = (
|
||||
filtered["Address"].str.replace(" ,", "").str.split(",").str[0:2].str.join("").str.lower() ==
|
||||
home["Name"].lower()
|
||||
)
|
||||
if to_filter.sum() == 0:
|
||||
to_filter = (
|
||||
filtered["Address"].str.replace(" ,", "").str.split(",").str[0:1].str.join("").str.lower() ==
|
||||
home["Name"].lower()
|
||||
)
|
||||
if to_filter.sum() == 0:
|
||||
# Do a fuzzy match on the name
|
||||
# Find the best filter
|
||||
to_filter = filtered["Address"].str.replace(" ,", "").str.split(",").str[0:2].str.join("").apply(
|
||||
lambda x: fuzz.partial_ratio(home["Name"], x) > 93
|
||||
)
|
||||
if to_filter.sum() == 0:
|
||||
# We also some cases where the name of the survey folder is like "Colville Road 7" and the
|
||||
# property name is actually 7 Colville Road, so we try taking the final part of the address,
|
||||
# splitting on space, and adding it to the front
|
||||
def reformat_survey_folder(x):
|
||||
filename = x.split("/")[-1]
|
||||
parts = filename.split(" ")
|
||||
return " ".join(parts[-1:] + parts[:-1])
|
||||
|
||||
to_filter = (
|
||||
filtered["survey_folder"].apply(lambda x: reformat_survey_folder(x)).str.lower() ==
|
||||
home["Name"].lower()
|
||||
)
|
||||
|
||||
if to_filter.sum() == 0:
|
||||
raise Exception("Error")
|
||||
filtered = filtered[to_filter]
|
||||
|
||||
if filtered.empty:
|
||||
continue
|
||||
|
||||
if filtered.shape[0] == 1:
|
||||
wates_matching_lookup.append(
|
||||
{
|
||||
"survey_folder": filtered["survey_folder"].values[0],
|
||||
"Asset ID": home["Asset ID"],
|
||||
"Name": home["Name"]
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
raise Exception("No match")
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# main()
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue