diff --git a/backend/Property.py b/backend/Property.py index 6365bb0b..2098a2a4 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -1321,3 +1321,16 @@ class Property: self.hot_water_energy_source = self.heating_energy_source else: raise Exception("Investiage me") + + def is_ashp_valid(self, exclusions): + + if "air_source_heat_pump" in self.non_invasive_recommendations: + return True + + if "air_source_heat_pump" in exclusions: + return False + + suitable_property_type = self.data["property-type"] in ["House", "Bungalow"] + has_air_source_heat_pump = self.main_heating["has_air_source_heat_pump"] + + return suitable_property_type and not has_air_source_heat_pump diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 4796cd9f..e21226fa 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -520,11 +520,37 @@ async def trigger_plan(body: PlanTriggerRequest): # rating to the target SAP rating (ie 69C) # TODO: Update this! energy_consumption = energy_consumption_client.estimate_new_consumption( - current_rating=p.data["current-energy-rating"], - target_rating="C", + current_energy_efficiency=p.data["current-energy-efficiency"], + target_efficiency="69", current_consumption=p.current_adjusted_energy ) + def convert_to_electric_consumption(self, p, energy_consumption, assumed_ashp_efficiency, exclusions): + if (p.main_fuel["fuel_type"] == "electricity") or ( + p.main_fuel["fuel_type"] == "mains gas" and not p.is_ashp_valid(exclusions=exclusions) + ): + # if the primary fuel is already electricity, we don't need to adjust the consumpion + return energy_consumption + + if p.main_fuel["fuel_type"] == "mains gas" and p.is_ashp_valid(exclusions=exclusions): + # if the primary fuel is gas, we need to adjust the consumption to reflect the expected + # efficiency of an ASHP. + # We should adjust the energy consumption to reflect the 200-400% efficiency of an ASHP with + # electrified heating, so that the solar panel can cover heating generation. + heating_consumption = p.energy_consumption_estimates["adjusted"]["heating"] + hot_water_consumption = p.energy_consumption_estimates["adjusted"]["hot_water"] + + systems_consumptions = heating_consumption + hot_water_consumption + + adjusted_consumption = systems_consumptions / (assumed_ashp_efficiency / 100) + electric_consumption = ( + adjusted_consumption + + p.energy_consumption_estimates["adjusted"]["lighting"] + + p.energy_consumption_estimates["adjusted"]["appliances"] + ) + + return electric_consumption + # TODO: Should energy_consumption to adjusted to just electricity requirement? # We should align our calculation of required energy consumption with expectations around decarbonising # heating and hot water, so worse case we should take just the electrical consumption of the property diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py index dfb0e574..ff225073 100644 --- a/etl/bill_savings/EnergyConsumptionModel.py +++ b/etl/bill_savings/EnergyConsumptionModel.py @@ -507,31 +507,36 @@ class EnergyConsumptionModel: return prediction @staticmethod - def calculate_percentage_decrease(start_rating, end_rating, consumption_averages): + def calculate_percentage_decrease(start_efficiency, end_efficiency, consumption_averages): start_consumption = consumption_averages.loc[ - consumption_averages["current-energy-rating"] == start_rating, "total_consumption" + consumption_averages["current-energy-efficiency"].astype(str) == str(start_efficiency), "total_consumption" ].values[0] + end_consumption = consumption_averages.loc[ - consumption_averages["current-energy-rating"] == end_rating, "total_consumption" + consumption_averages["current-energy-efficiency"].astype(str) == str(end_efficiency), "total_consumption" ].values[0] percentage_decrease = ((start_consumption - end_consumption) / start_consumption) * 100 + # percentage_decrease cannot be nehative + if percentage_decrease < 0: + percentage_decrease = 0 return percentage_decrease - def estimate_new_consumption(self, current_rating, target_rating, current_consumption): + def estimate_new_consumption(self, current_energy_efficiency, target_efficiency, current_consumption): """ Given then consumption_averages dataset, which is produced as a result of the data_combining.py script, for the energy kwh models, this function will estimate the new consumption based on the current consumption, based on the expected reduction in consumption from the current rating to the target rating. - :param current_rating: - :param target_rating: + :param current_energy_efficiency: + :param target_efficiency: :param current_consumption: - :param df: :return: """ percentage_decrease = self.calculate_percentage_decrease( - current_rating, target_rating, self.consumption_averages + start_efficiency=current_energy_efficiency, + end_efficiency=target_efficiency, + consumption_averages=self.consumption_averages ) new_consumption = current_consumption * (1 - percentage_decrease / 100) return new_consumption diff --git a/etl/bill_savings/data_combining.py b/etl/bill_savings/data_combining.py index d3a8d679..dece3834 100644 --- a/etl/bill_savings/data_combining.py +++ b/etl/bill_savings/data_combining.py @@ -94,7 +94,7 @@ def app(): # We also estimate the energy consumption reduction from this data, by band df["total_consumption"] = df["heating_kwh"] + df["hot_water_kwh"] - consumption_averages = df.groupby("current-energy-rating")["total_consumption"].mean().reset_index() + consumption_averages = df.groupby("current-energy-efficiency")["total_consumption"].mean().reset_index() # Save the consumption averages back to s3 save_dataframe_to_s3_parquet( diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py index 1c828566..ebd72732 100644 --- a/etl/customers/goldman/property_ownership.py +++ b/etl/customers/goldman/property_ownership.py @@ -11,7 +11,10 @@ from utils.s3 import read_dataframe_from_s3_parquet # The mode EPC rating is D, so we associate the £238k valuation with an EPC D property # Therefore value_of_F * 1.15 = value_of_D * 1.03 # Therefore value_of_F = value_of_D * 1.03/1.15 = 238k * (1.03/1.15) = 213165 -PROPERTY_VALUE_ESTIMATE = 213_165 +PROPERTY_VALUE_ESTIMATE = 200_000 + +# UPRNs of properties we need +MANUAL_EXCLUSIONS = [] def aggregate_matches(matching_lookup, company_ownership, properties): @@ -283,6 +286,36 @@ def filter_land_registry(properties): ) +def is_substring(x, match_string): + if pd.isnull(x): + return False + return x in match_string.lower() + + +def house_number_match(paon, house_number): + # Firstly try and convert to numberic + try: + paon_numeric = int(paon) + house_number_numeric = int(house_number) + return paon_numeric == house_number_numeric + except Exception as e: # noqa + # If we can't convert both to numeric, we do an equality + + return paon == house_number + + +def check_equalities(lr_filtered): + all_paon_equal = all(lr_filtered["paon"] == lr_filtered["paon"].values[0]) + if pd.isnull(lr_filtered["saon"].values[0]): + all_saon_equal = all(pd.isnull(lr_filtered["saon"])) + else: + all_saon_equal = all(lr_filtered["saon"] == lr_filtered["saon"].values[0]) + + all_street_equal = all(lr_filtered["street"] == lr_filtered["street"].values[0]) + + return all_paon_equal, all_saon_equal, all_street_equal + + def app(): """ This script is for scoping property ownership for EPC F & G rated properties in Birmingam, for Goldman Sachs @@ -292,8 +325,8 @@ def app(): # https://epc.opendatacommunities.org/domestic/search?address=&postcode=&local-authority=&constituency # =&uprn=100031179243&from-month=1&from-year=2008&to-month=12&to-year=2024 # is actually listed in two local authorities causing us to think it's an EPC F & G property, but it's - # it's actually EPC E. Need to handle this, probably by reading in all of the EPC data, concatenating together - # and performing a singular filter for most recent EPC by UPRN + # it's actually EPC E. Need to handle this, probably by reading in all of the EPC data, concatenating + # together and performing a singular filter for most recent EPC by UPRN # paths = [ # "local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv", # "local_data/all-domestic-certificates/domestic-E08000031-Wolverhampton/certificates.csv", @@ -356,10 +389,6 @@ def app(): # Take the newest UPRN properties = properties.sort_values("LODGEMENT_DATE", ascending=False).drop_duplicates("UPRN") - # TODO: Do we want to filter properties based on lodgement dates? - # E.g. we might want to filter properties that have had a sale EPC lodged in the last x months, because - # this could be indicative of a sale happening, and the land registry data may not have caught up yet - # Remove entries where the address begins with the term "land adjoining", or other records that don't reference the # the property itself starting_terms = [ @@ -461,6 +490,8 @@ def app(): # freehold_matching_lookup.to_excel("freehold_matching_lookup V2.xlsx") # leasehold_matching_lookup.to_excel("leasehold_matching_lookup V2.xlsx") + # freehold_matching_lookup = pd.read_excel("freehold_matching_lookup V2.xlsx") + # leasehold_matching_lookup = pd.read_excel("leasehold_matching_lookup V2.xlsx") # The approximate matches aren't very good freehold_matching_lookup = freehold_matching_lookup[freehold_matching_lookup["match_type"] == "exact"] @@ -483,7 +514,9 @@ def app(): "ADDRESS1", "CURRENT_ENERGY_EFFICIENCY", "CURRENT_ENERGY_RATING", - "POSTCODE" + "POSTCODE", + "LODGEMENT_DATE", + "TRANSACTION_TYPE" ] ].rename( columns={ @@ -501,7 +534,7 @@ def app(): "Postcode", "Company Registration No. (1)", "Proprietor Name (1)", - + "Date Proprietor Added", ] ], how="left", on="Title Number" @@ -531,35 +564,6 @@ def app(): land_registry["saon"] = land_registry["saon"].str.lower().str.strip() land_registry["date_of_transfer"] = pd.to_datetime(land_registry["date_of_transfer"]) - def is_substring(x, match_string): - - if pd.isnull(x): - return False - - return x in match_string.lower() - - def house_number_match(paon, house_number): - # Firstly try and convert to numberic - try: - paon_numeric = int(paon) - house_number_numeric = int(house_number) - return paon_numeric == house_number_numeric - except Exception as e: # noqa - # If we can't convert both to numeric, we do an equality - - return paon == house_number - - def check_equalities(lr_filtered): - all_paon_equal = all(lr_filtered["paon"] == lr_filtered["paon"].values[0]) - if pd.isnull(lr_filtered["saon"].values[0]): - all_saon_equal = all(pd.isnull(lr_filtered["saon"])) - else: - all_saon_equal = all(lr_filtered["saon"] == lr_filtered["saon"].values[0]) - - all_street_equal = all(lr_filtered["street"] == lr_filtered["street"].values[0]) - - return all_paon_equal, all_saon_equal, all_street_equal - land_registry_matches = [] for _, match in tqdm(matched_addresses.iterrows(), total=len(matched_addresses)): @@ -779,13 +783,25 @@ def app(): ).drop(columns=["uprn"]) # Flat anything that sold in the last year - # TODO: Decide on what this logic should be! matched_addresses["sold_recently"] = ( matched_addresses["date_of_transfer"] >= pd.Timestamp.now() - pd.DateOffset(years=1) ) - # Drop anything that sold recently - matched_addresses = matched_addresses[~matched_addresses["sold_recently"]] + matched_addresses["sale_lodged_recently"] = ( + (pd.to_datetime(matched_addresses["LODGEMENT_DATE"]) >= pd.Timestamp.now() - pd.DateOffset(months=12)) & + (matched_addresses["TRANSACTION_TYPE"].isin(["marketed sale", "non marketed sale"])) + ) + + # Drop rows on the booleans + matched_addresses = matched_addresses[ + ~matched_addresses["sold_recently"] & + ~matched_addresses["sale_lodged_recently"] + ] + + # Filter combined_matching_lookup accordingly + combined_matching_lookup = combined_matching_lookup[ + combined_matching_lookup["UPRN"].isin(matched_addresses["UPRN"]) + ] # shared_freehold_match = pd.DataFrame(shared_freehold_match) # Strore these files @@ -807,45 +823,19 @@ def app(): properties=properties ) - investment_20m = combined_aggregate[combined_aggregate["cumulative_value"] <= 20_500_000] investment_50m = combined_aggregate[combined_aggregate["cumulative_value"] <= 51_000_000] - investment_20m_properties = matched_addresses[ - matched_addresses["Company Registration No. (1)"].isin(investment_20m["Company Registration No. (1)"]) - ] - investment_50m_properties = matched_addresses[ matched_addresses["Company Registration No. (1)"].isin(investment_50m["Company Registration No. (1)"]) ] - # Merge on the owner - al_rayan = investment_50m_properties[ - investment_50m_properties["Proprietor Name (1)"].str.contains("AL RAYAN BANK PLC")] - portfolio_epc_data_50m = properties[properties["UPRN"].isin(investment_50m_properties["UPRN"])] - portfolio_epc_data_20m = properties[properties["UPRN"].isin(investment_20m_properties["UPRN"])] - # investment_20m_properties.to_excel("investment_20m_properties 28th July.xlsx", index=False) + # Storing data # investment_50m_properties.to_excel("investment_50m_properties 28th July.xlsx", index=False) - z = pd.read_excel("investment_50m_properties 28th May.xlsx") - new = investment_50m_properties[~investment_50m_properties["UPRN"].isin(z["UPRN"])] - new_al_rayan = new[ - new["Proprietor Name (1)"].str.contains("AL RAYAN BANK PLC") - ] - new_al_rayan = new_al_rayan.merge( - properties[["UPRN", "LODGEMENT_DATE"]], - how="left", - on="UPRN" - ).merge( - company_ownership[["Title Number", "Date Proprietor Added"]], - how="left", - on="Title Number", - ) - # Store the EPC data - portfolio_epc_data_50m.to_excel("portfolio_epc_data_50m 28th May.xlsx", index=False) - portfolio_epc_data_20m.to_excel("portfolio_epc_data_20m 28th May.xlsx", index=False) + # portfolio_epc_data_50m.to_excel("portfolio_epc_data_50m 29th July.xlsx", index=False) # We check if any of these properties are in a conservation area valuations = pd.read_excel("property value.xlsx") @@ -891,6 +881,48 @@ def company_aggregation(): aggregation.to_excel("Company ownership aggregation.xlsx") +def extract_price_info(text): + # Use regex to find the relevant price information + match = re.search(r'Estimated price\n\nLow£([\d,]+)k\n\n£([\d,]+)k\n\nHigh£([\d,]+)k', text) + if match: + low_price = int(match.group(1).replace(',', '')) * 1000 + est_price = int(match.group(2).replace(',', '')) * 1000 + high_price = int(match.group(3).replace(',', '')) * 1000 + + price_info = { + 'Zoopla Valuation': est_price, + 'Zoopla Lower Bound': low_price, + 'Zoopla Upper Bound': high_price + } + + return price_info + + return None + + +def get_valuations(portfolio_epc_data_50m): + # This gets blocked pretty quickly by Zoopla + import requests + import time + from tqdm import tqdm + valuation_data = [] + for _, property_data in tqdm(portfolio_epc_data_50m.iterrows(), total=len(portfolio_epc_data_50m)): + uprn = property_data["UPRN"] + response = requests.get( + f"https://r.jina.ai/https://www.zoopla.co.uk/property/uprn/{uprn}/" + ) + + pricing = extract_price_info(response.text) + valuation_data.append( + { + "UPRN": uprn, + **pricing + } + ) + + time.sleep(2) + + def prepare_anonymised_data(): investment_50m_properties = pd.read_excel("investment_50m_properties 28th May.xlsx", header=0) investment_epc_data = pd.read_excel("portfolio_epc_data_50m 28th May.xlsx", header=0) diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index 07bac2cd..1d409be6 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -116,7 +116,7 @@ class HeatingRecommender: # In the future, we'll allow overrides, so that non-intrusive surveys can contradict these conditions # and either allow or prevent the recommendation of an air source heat pump - if self.is_ashp_valid(exclusions=exclusions): + if self.property.is_ashp_valid(exclusions=exclusions): self.recommend_air_source_heat_pump( phase=phase, has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations ) @@ -186,19 +186,6 @@ class HeatingRecommender: description = ("Replace the existing boiler and cylinder without a thermostat with a new electric combi " "boiler") - def is_ashp_valid(self, exclusions): - - if "air_source_heat_pump" in self.property.non_invasive_recommendations: - return True - - if "air_source_heat_pump" in exclusions: - return False - - suitable_property_type = self.property.data["property-type"] in ["House", "Bungalow"] - has_air_source_heat_pump = self.property.main_heating["has_air_source_heat_pump"] - - return suitable_property_type and not has_air_source_heat_pump - def recommend_air_source_heat_pump(self, phase, has_cavity_or_loft_recommendations, _return=False): """ This method will implement the recommendation for an air source heat pump