mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
adding to land registry matching logic
This commit is contained in:
parent
a2a5094b01
commit
2174a85a8b
1 changed files with 94 additions and 17 deletions
|
|
@ -357,6 +357,8 @@ def app():
|
|||
properties = properties.sort_values("LODGEMENT_DATE", ascending=False).drop_duplicates("UPRN")
|
||||
|
||||
# TODO: Do we want to filter properties based on lodgement dates?
|
||||
# E.g. we might want to filter properties that have had a sale EPC lodged in the last x months, because
|
||||
# this could be indicative of a sale happening, and the land registry data may not have caught up yet
|
||||
|
||||
# Remove entries where the address begins with the term "land adjoining", or other records that don't reference the
|
||||
# the property itself
|
||||
|
|
@ -456,13 +458,9 @@ def app():
|
|||
|
||||
freehold_matching_lookup = pd.DataFrame(freehold_matching_lookup)
|
||||
leasehold_matching_lookup = pd.DataFrame(leasehold_matching_lookup)
|
||||
# shared_leasehold_match = pd.concat(shared_leasehold_match)
|
||||
# shared_freehold_match = pd.concat(shared_freehold_match)
|
||||
|
||||
# freehold_matching_lookup.to_excel("freehold_matching_lookup_new.xlsx")
|
||||
# leasehold_matching_lookup.to_excel("leasehold_matching_lookup_new.xlsx")
|
||||
# shared_leasehold_match.to_excel("shared_leasehold_match_new.xlsx")
|
||||
# shared_freehold_match.to_excel("shared_freehold_match_new.xlsx")
|
||||
# freehold_matching_lookup.to_excel("freehold_matching_lookup V2.xlsx")
|
||||
# leasehold_matching_lookup.to_excel("leasehold_matching_lookup V2.xlsx")
|
||||
|
||||
# The approximate matches aren't very good
|
||||
freehold_matching_lookup = freehold_matching_lookup[freehold_matching_lookup["match_type"] == "exact"]
|
||||
|
|
@ -477,10 +475,6 @@ def app():
|
|||
# We also have duplicates at a UPRN level
|
||||
combined_matching_lookup = remove_duplicate_uprn_matches(combined_matching_lookup, properties, company_ownership)
|
||||
|
||||
# There are some cases where we have duplicates
|
||||
# freehold_matching_lookup = remove_duplicate_matches(freehold_matching_lookup, properties, company_ownership)
|
||||
# leasehold_matching_lookup = remove_duplicate_matches(leasehold_matching_lookup, properties, company_ownership)
|
||||
|
||||
matched_addresses = combined_matching_lookup.merge(
|
||||
properties[
|
||||
[
|
||||
|
|
@ -534,6 +528,7 @@ def app():
|
|||
land_registry["postcode"] = land_registry["postcode"].str.lower().str.strip()
|
||||
land_registry["street"] = land_registry["street"].str.lower().str.strip()
|
||||
land_registry["paon"] = land_registry["paon"].str.lower().str.strip()
|
||||
land_registry["saon"] = land_registry["saon"].str.lower().str.strip()
|
||||
land_registry["date_of_transfer"] = pd.to_datetime(land_registry["date_of_transfer"])
|
||||
|
||||
def is_substring(x, match_string):
|
||||
|
|
@ -576,8 +571,9 @@ def app():
|
|||
# Filter further, when the street is in in the address
|
||||
# street should be contained in epc_address
|
||||
lr_filtered = lr_filtered[
|
||||
lr_filtered["street"].apply(lambda x: is_substring(x, match["epc_address"].lower()))
|
||||
]
|
||||
lr_filtered["street"].apply(lambda x: is_substring(x, match["epc_address"].lower())) |
|
||||
lr_filtered["street"].apply(lambda x: is_substring(x, match["Property Address"].lower()))
|
||||
]
|
||||
|
||||
if lr_filtered.empty:
|
||||
continue
|
||||
|
|
@ -585,10 +581,11 @@ def app():
|
|||
# We now check if paon is in address 1
|
||||
lr_filtered["paon_match"] = lr_filtered["paon"].apply(lambda x: house_number_match(x, match["house_number"]))
|
||||
# We also try the secondary match
|
||||
lr_filtered["saon_match"] = lr_filtered["saon"].apply(
|
||||
lambda x: False if pd.isnull(x) else is_substring(x, match["epc_address1"])
|
||||
lr_filtered["saon_match"] = (
|
||||
lr_filtered["saon"].apply(
|
||||
lambda x: False if pd.isnull(x) else is_substring(x, match["epc_address1"])
|
||||
)
|
||||
)
|
||||
|
||||
# We fileter where we have a primary or secondary match
|
||||
lr_filtered = lr_filtered[
|
||||
lr_filtered["paon_match"] | lr_filtered["saon_match"]
|
||||
|
|
@ -599,6 +596,7 @@ def app():
|
|||
elif lr_filtered.shape[0] == 1:
|
||||
land_registry_matches.append(
|
||||
{
|
||||
"uprn": match["UPRN"],
|
||||
"transaction_id": lr_filtered['transaction_id'].values[0],
|
||||
"price": lr_filtered["price"].values[0],
|
||||
"date_of_transfer": lr_filtered["date_of_transfer"].values[0],
|
||||
|
|
@ -616,11 +614,13 @@ def app():
|
|||
lr_filtered = lr_filtered.head(1)
|
||||
land_registry_matches.append(
|
||||
{
|
||||
"uprn": match["UPRN"],
|
||||
"transaction_id": lr_filtered['transaction_id'].values[0],
|
||||
"price": lr_filtered["price"].values[0],
|
||||
"date_of_transfer": lr_filtered["date_of_transfer"].values[0],
|
||||
}
|
||||
)
|
||||
continue
|
||||
elif has_paon_match and all_street_equal:
|
||||
# Peform filter on paon
|
||||
lr_filtered = lr_filtered[lr_filtered["paon_match"]]
|
||||
|
|
@ -631,15 +631,92 @@ def app():
|
|||
lr_filtered = lr_filtered.head(1)
|
||||
land_registry_matches.append(
|
||||
{
|
||||
"uprn": match["UPRN"],
|
||||
"transaction_id": lr_filtered['transaction_id'].values[0],
|
||||
"price": lr_filtered["price"].values[0],
|
||||
"date_of_transfer": lr_filtered["date_of_transfer"].values[0],
|
||||
}
|
||||
)
|
||||
else:
|
||||
raise NotImplementedError("wtf")
|
||||
# We do a match on saon
|
||||
lr_filtered["saon_match2"] = lr_filtered["saon"].apply(
|
||||
lambda x: False if pd.isnull(x) else is_substring(x, match["epc_address"])
|
||||
)
|
||||
|
||||
lr_filtered = lr_filtered[lr_filtered["saon_match2"]]
|
||||
|
||||
if lr_filtered.empty:
|
||||
continue
|
||||
elif lr_filtered.shape[0] == 1:
|
||||
land_registry_matches.append(
|
||||
{
|
||||
"uprn": match["UPRN"],
|
||||
"transaction_id": lr_filtered['transaction_id'].values[0],
|
||||
"price": lr_filtered["price"].values[0],
|
||||
"date_of_transfer": lr_filtered["date_of_transfer"].values[0],
|
||||
}
|
||||
)
|
||||
continue
|
||||
else:
|
||||
raise NotImplementedError("wtf")
|
||||
else:
|
||||
raise NotImplementedError("wtf")
|
||||
# We have a final check, based on an observed case
|
||||
lr_address_1 = " ".join([x.lower().strip() for x in match["Property Address"].split(",")[0:2]])
|
||||
|
||||
lr_filtered["paon_match2"] = lr_filtered["paon"].apply(
|
||||
lambda x: False if pd.isnull(x) else is_substring(x, lr_address_1)
|
||||
)
|
||||
|
||||
lr_filtered = lr_filtered[lr_filtered["paon_match2"]]
|
||||
|
||||
if lr_filtered.empty:
|
||||
continue
|
||||
elif lr_filtered.shape[0] == 1:
|
||||
land_registry_matches.append(
|
||||
{
|
||||
"uprn": match["UPRN"],
|
||||
"transaction_id": lr_filtered['transaction_id'].values[0],
|
||||
"price": lr_filtered["price"].values[0],
|
||||
"date_of_transfer": lr_filtered["date_of_transfer"].values[0],
|
||||
}
|
||||
)
|
||||
continue
|
||||
else:
|
||||
# Check all the same
|
||||
all_paon_equal, all_saon_equal, all_street_equal = check_equalities(lr_filtered)
|
||||
|
||||
# Check saon is house number with exact match
|
||||
lr_filtered["saon_match2"] = lr_filtered["saon"].apply(
|
||||
lambda x: False if pd.isnull(x) else house_number_match(x, match["house_number"])
|
||||
)
|
||||
|
||||
if all_paon_equal and all_saon_equal and all_street_equal:
|
||||
# Take the newest record
|
||||
lr_filtered = lr_filtered.sort_values("date_of_transfer", ascending=False)
|
||||
lr_filtered = lr_filtered.head(1)
|
||||
land_registry_matches.append(
|
||||
{
|
||||
"uprn": match["UPRN"],
|
||||
"transaction_id": lr_filtered['transaction_id'].values[0],
|
||||
"price": lr_filtered["price"].values[0],
|
||||
"date_of_transfer": lr_filtered["date_of_transfer"].values[0],
|
||||
}
|
||||
)
|
||||
continue
|
||||
elif any(lr_filtered["saon_match2"]):
|
||||
lr_filtered = lr_filtered[lr_filtered["saon_match2"]]
|
||||
if lr_filtered.shape[0] == 1:
|
||||
land_registry_matches.append(
|
||||
{
|
||||
"uprn": match["UPRN"],
|
||||
"transaction_id": lr_filtered['transaction_id'].values[0],
|
||||
"price": lr_filtered["price"].values[0],
|
||||
"date_of_transfer": lr_filtered["date_of_transfer"].values[0],
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
raise NotImplementedError("wtf")
|
||||
else:
|
||||
raise NotImplementedError("What happened here?")
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue