diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py index f1f0de38..71c53a74 100644 --- a/etl/customers/goldman/property_ownership.py +++ b/etl/customers/goldman/property_ownership.py @@ -689,6 +689,15 @@ def app(): lr_filtered["saon_match2"] = lr_filtered["saon"].apply( lambda x: False if pd.isnull(x) else house_number_match(x, match["house_number"]) ) + # We check if we have a flat + match_flat_number = re.match("flat (\d+)", match["epc_address1"].lower()) + lr_filtered["saon_match3"] = False + if match_flat_number is not None: + # Get out the match + match_flat_number = "flat " + match_flat_number.group(1) + lr_filtered["saon_match3"] = lr_filtered["saon"].apply( + lambda x: False if pd.isnull(x) else x == match_flat_number + ) if all_paon_equal and all_saon_equal and all_street_equal: # Take the newest record @@ -705,6 +714,23 @@ def app(): continue elif any(lr_filtered["saon_match2"]): lr_filtered = lr_filtered[lr_filtered["saon_match2"]] + all_saon_equal, all_paon_equal, all_street_equal = check_equalities(lr_filtered) + if all_paon_equal and all_saon_equal and all_street_equal: + # Filter on the newest record + lr_filtered = lr_filtered.sort_values("date_of_transfer", ascending=False) + lr_filtered = lr_filtered.head(1) + if lr_filtered.shape[0] == 1: + land_registry_matches.append( + { + "uprn": match["UPRN"], + "transaction_id": lr_filtered['transaction_id'].values[0], + "price": lr_filtered["price"].values[0], + "date_of_transfer": lr_filtered["date_of_transfer"].values[0], + } + ) + continue + elif any(lr_filtered["saon_match3"]): + lr_filtered = lr_filtered[lr_filtered["saon_match3"]] if lr_filtered.shape[0] == 1: land_registry_matches.append( {