diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py index 5d39f139..1c828566 100644 --- a/etl/customers/goldman/property_ownership.py +++ b/etl/customers/goldman/property_ownership.py @@ -754,6 +754,39 @@ def app(): else: raise NotImplementedError("What happened here?") + land_registry_matches = pd.DataFrame(land_registry_matches) + # land_registry_matches.to_excel("land_registry_matches.xlsx") + + # Check the matches against the addresses + # lr_to_addresses = matched_addresses[ + # ["UPRN", "epc_address", "epc_postcode", "Property Address", "Postcode"] + # ].merge( + # land_registry_matches, + # how="inner", + # left_on="UPRN", + # right_on="uprn" + # ).drop(columns=["uprn"]).merge( + # land_registry[["transaction_id", "paon", "saon", "street", "postcode"]], + # how="left", on="transaction_id" + # ) + + # Merge onto matched addresses + matched_addresses = matched_addresses.merge( + land_registry_matches, + how="left", + left_on="UPRN", + right_on="uprn" + ).drop(columns=["uprn"]) + + # Flat anything that sold in the last year + # TODO: Decide on what this logic should be! + matched_addresses["sold_recently"] = ( + matched_addresses["date_of_transfer"] >= pd.Timestamp.now() - pd.DateOffset(years=1) + ) + + # Drop anything that sold recently + matched_addresses = matched_addresses[~matched_addresses["sold_recently"]] + # shared_freehold_match = pd.DataFrame(shared_freehold_match) # Strore these files # freehold_matching_lookup.to_excel("freehold_matching_lookup.xlsx") @@ -785,11 +818,30 @@ def app(): matched_addresses["Company Registration No. (1)"].isin(investment_50m["Company Registration No. (1)"]) ] + # Merge on the owner + al_rayan = investment_50m_properties[ + investment_50m_properties["Proprietor Name (1)"].str.contains("AL RAYAN BANK PLC")] + portfolio_epc_data_50m = properties[properties["UPRN"].isin(investment_50m_properties["UPRN"])] portfolio_epc_data_20m = properties[properties["UPRN"].isin(investment_20m_properties["UPRN"])] - investment_20m_properties.to_excel("investment_20m_properties 28th May.xlsx", index=False) - investment_50m_properties.to_excel("investment_50m_properties 28th May.xlsx", index=False) + # investment_20m_properties.to_excel("investment_20m_properties 28th July.xlsx", index=False) + # investment_50m_properties.to_excel("investment_50m_properties 28th July.xlsx", index=False) + + z = pd.read_excel("investment_50m_properties 28th May.xlsx") + new = investment_50m_properties[~investment_50m_properties["UPRN"].isin(z["UPRN"])] + new_al_rayan = new[ + new["Proprietor Name (1)"].str.contains("AL RAYAN BANK PLC") + ] + new_al_rayan = new_al_rayan.merge( + properties[["UPRN", "LODGEMENT_DATE"]], + how="left", + on="UPRN" + ).merge( + company_ownership[["Title Number", "Date Proprietor Added"]], + how="left", + on="Title Number", + ) # Store the EPC data portfolio_epc_data_50m.to_excel("portfolio_epc_data_50m 28th May.xlsx", index=False)