From e000c87cad98963e8c734a5cf8990a5a7b713217 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 18 Apr 2024 12:16:13 +0100 Subject: [PATCH 01/20] added patches for immo pilot 2 --- etl/customers/immo/pilot/asset_list_2.py | 126 +++++++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 etl/customers/immo/pilot/asset_list_2.py diff --git a/etl/customers/immo/pilot/asset_list_2.py b/etl/customers/immo/pilot/asset_list_2.py new file mode 100644 index 00000000..f722a490 --- /dev/null +++ b/etl/customers/immo/pilot/asset_list_2.py @@ -0,0 +1,126 @@ +import pandas as pd +from utils.s3 import read_excel_from_s3 +from utils.s3 import save_csv_to_s3 + +USER_ID = 8 +PORTFOLIO_ID = 72 + +# For +patches = [ + { + 'address': '116 Parkes Hall Road', + 'postcode': 'DY1 3RJ', + 'walls-description': 'Cavity wall, filled cavity', + 'walls-energy-eff': 'Average', + 'roof-description': 'Pitched, 270 mm loft insulation', + 'roof-energy-eff': 'Good', + 'windows-description': 'Fully double glazed', + 'windows-energy-eff': 'Good', + 'mainheat-description': 'Boiler and radiators, mains gas', + 'mainheat-energy-eff': 'Good', + 'mainheatcont-description': 'Programmer, room thermostat and TRVs', + 'mainheatc-energy-eff': 'Good', + 'lighting-description': 'Low energy lighting in 27% of fixed outlets', + 'lighting-energy-eff': 'Good', + 'floor-description': 'Solid, no insulation (assumed)', + 'secondheat-description': 'None', + 'current-energy-efficiency': '73', + 'current-energy-rating': 'C', + 'energy-consumption-current': '184', + 'co2-emissions-current': '2.4', + 'potential-energy-efficiency': '88', + 'total-floor-area': '73', + 'construction-age-band': 'England and Wales: 1930-1949', + 'property-type': 'House', + 'built-form': 'Mid-Terrace', + } +] + +# This is information that is found as a result of the non-invasives, that mean that certain measures +# have been installed already. To reflect this in the front end, it is included in the recommendation, however +# the cost is removed and instead, a message is presented saying that the measure is already installed. +already_installed = [] + +non_invasive_recommendations = [] + + +def app(): + raw_asset_list = read_excel_from_s3( + bucket_name="retrofit-datalake-dev", + file_key="customers/Immo/Dudley Asset List - Hestia - pilot2.xlsx", + header_row=0 + ) + + raw_asset_list = raw_asset_list[raw_asset_list["in_pilot"]].copy() + + # Extract address and postcode + raw_asset_list["address"] = raw_asset_list["Full Address"].str.split(",").str[0] + raw_asset_list["postcode"] = raw_asset_list["Full Address"].str.split(",").str[-1].str.strip() + + # We're provided with number of bathrooms and number of bedrooms. + asset_list = raw_asset_list.rename( + columns={ + "No. of Beds": "n_bedrooms", + "No. of WC's": "n_bathrooms" + } + ) + + # Store the asset list in s3 + filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv" + save_csv_to_s3( + dataframe=asset_list, + bucket_name="retrofit-plan-inputs-dev", + file_name=filename + ) + + # Store overrides in s3 + already_installed_filename = f"{USER_ID}/{PORTFOLIO_ID}/already_installed.json" + save_csv_to_s3( + dataframe=pd.DataFrame(already_installed), + bucket_name="retrofit-plan-inputs-dev", + file_name=already_installed_filename + ) + + # Store patches in s3 + patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.json" + save_csv_to_s3( + dataframe=pd.DataFrame(patches), + bucket_name="retrofit-plan-inputs-dev", + file_name=patches_filename + ) + + # Store non-invasive recommendations in S3 + non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.json" + save_csv_to_s3( + dataframe=pd.DataFrame(non_invasive_recommendations), + bucket_name="retrofit-plan-inputs-dev", + file_name=non_invasive_recommendations_filename + ) + + # EPC C portoflio + body = { + "portfolio_id": str(PORTFOLIO_ID), + "housing_type": "Private", + "goal": "Increase EPC", + "goal_value": "C", + "trigger_file_path": filename, + "already_installed_file_path": already_installed_filename, + "patches_file_path": patches_filename, + "non_invasive_recommendations_file_path": non_invasive_recommendations_filename, + "budget": None, + } + print(body) + + # EPC B portoflio + body = { + "portfolio_id": str(PORTFOLIO_ID + 1), + "housing_type": "Private", + "goal": "Increase EPC", + "goal_value": "B", + "trigger_file_path": filename, + "already_installed_file_path": already_installed_filename, + "patches_file_path": patches_filename, + "non_invasive_recommendations_file_path": non_invasive_recommendations_filename, + "budget": None, + } + print(body) From acada27061d09f47ac76ecd2785c95eb39e741d3 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 18 Apr 2024 15:16:46 +0100 Subject: [PATCH 02/20] rounding up roof coverage % --- backend/SearchEpc.py | 9 +++++++-- backend/app/plan/router.py | 11 +++++++++-- backend/ml_models/Valuation.py | 8 ++++++++ etl/customers/immo/pilot/asset_list_2.py | 21 ++++++++++++++++++--- etl/epc/Record.py | 2 +- recommendations/SolarPvRecommendations.py | 10 +++++++--- 6 files changed, 50 insertions(+), 11 deletions(-) diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index cc2ee4a9..44178792 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -709,8 +709,13 @@ class SearchEpc: self.full_sap_epc = {} # Finally, set a standardised address 1 and postcode - self.address_clean = self.ordnance_survey_client.address_os - self.postcode_clean = self.ordnance_survey_client.postcode_os + self.address_clean = ( + self.ordnance_survey_client.address_os if self.ordnance_survey_client.address_os else self.address1 + ) + self.postcode_clean = ( + self.ordnance_survey_client.postcode_os if self.ordnance_survey_client.postcode_os else + self.postcode + ) return os_response = self.ordnance_survey_client.get_places_api() diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 9854abe8..a8464ee6 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -52,6 +52,10 @@ def patch_epc(patch, epc_records): """ for patch_variable, patch_value in patch.items(): + + if patch_variable in ["address", "postcode"]: + continue + if patch_value == "": continue if patch_variable in epc_records["original_epc"]: @@ -268,9 +272,12 @@ async def trigger_plan(body: PlanTriggerRequest): postcode=config["postcode"], uprn=uprn, auth_token=get_settings().EPC_AUTH_TOKEN, - os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY + os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY, ) - epc_searcher.find_property() + epc_searcher.ordnance_survey_client.built_form = config.get("built_form", None) + epc_searcher.ordnance_survey_client.property_type = config.get("property_type", None) + # For the moment, our OS API access is unavailable, so we skip and interpolate + epc_searcher.find_property(skip_os=True) # Create a record in db property_id, is_new = create_property( session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py index 251c016a..39ea5a98 100644 --- a/backend/ml_models/Valuation.py +++ b/backend/ml_models/Valuation.py @@ -63,6 +63,14 @@ class PropertyValuation: 90093693: 279_000, # Based on Zoopla 90055152: 149_000, # Based on Zoopla 90028499: 238_000, # Based on Zoopla + # IMMO Dudley Pilot 2- search by going to https://www.zoopla.co.uk/property/uprn/{uprn}/ + 90039318: 177_000, # Based on Zoopla + 90038384: 170_000, # Based on Zoopla + 90105380: 185_000, # Based on Zoopla + 90124001: 165_000, # Based on Zoopla + 90013980: 148_000, # Based on Zoopla + 90087154: 184_000, # Based on Zoopla + 90046817: 167_000, # Based on Zoopla } # We base our valuation uplifts on a number of sources diff --git a/etl/customers/immo/pilot/asset_list_2.py b/etl/customers/immo/pilot/asset_list_2.py index f722a490..121e7a81 100644 --- a/etl/customers/immo/pilot/asset_list_2.py +++ b/etl/customers/immo/pilot/asset_list_2.py @@ -10,6 +10,7 @@ patches = [ { 'address': '116 Parkes Hall Road', 'postcode': 'DY1 3RJ', + 'uprn': '90046817', 'walls-description': 'Cavity wall, filled cavity', 'walls-energy-eff': 'Average', 'roof-description': 'Pitched, 270 mm loft insulation', @@ -21,7 +22,7 @@ patches = [ 'mainheatcont-description': 'Programmer, room thermostat and TRVs', 'mainheatc-energy-eff': 'Good', 'lighting-description': 'Low energy lighting in 27% of fixed outlets', - 'lighting-energy-eff': 'Good', + 'lighting-energy-eff': 'Average', 'floor-description': 'Solid, no insulation (assumed)', 'secondheat-description': 'None', 'current-energy-efficiency': '73', @@ -39,7 +40,11 @@ patches = [ # This is information that is found as a result of the non-invasives, that mean that certain measures # have been installed already. To reflect this in the front end, it is included in the recommendation, however # the cost is removed and instead, a message is presented saying that the measure is already installed. -already_installed = [] +already_installed = [ + { + 'address': '28 Sangwin Road', 'postcode': 'WV14 9EQ', "already_installed": ["loft_insulation"] + } +] non_invasive_recommendations = [] @@ -58,13 +63,23 @@ def app(): raw_asset_list["postcode"] = raw_asset_list["Full Address"].str.split(",").str[-1].str.strip() # We're provided with number of bathrooms and number of bedrooms. + # THe UPRNs are not the official ones asset_list = raw_asset_list.rename( columns={ "No. of Beds": "n_bedrooms", - "No. of WC's": "n_bathrooms" + "No. of WC's": "n_bathrooms", + 'Property Type': 'property_type', + 'Architype': 'built_form' } ) + # Remap the values + asset_list["built_form"] = asset_list["built_form"].map({ + "SEMI DETACHED": "Semi-Detached", + "MID TERRACE": "Mid-Terrace", + "END TERRACE": "End-Terrace", + }) + # Store the asset list in s3 filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv" save_csv_to_s3( diff --git a/etl/epc/Record.py b/etl/epc/Record.py index e74330a2..9a965c6a 100644 --- a/etl/epc/Record.py +++ b/etl/epc/Record.py @@ -191,7 +191,7 @@ class EPCRecord: This method will clean the records using the data processor """ epc_data_processor = EPCDataProcessor( - data=self.epc_record_as_dataframe("prepared_epc"), + data=self.epc_record_as_dataframe("prepared_epc").copy(), run_mode="newdata", cleaning_averages=self.cleaning_data, ) diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py index 58cf9735..b44557ab 100644 --- a/recommendations/SolarPvRecommendations.py +++ b/recommendations/SolarPvRecommendations.py @@ -56,14 +56,18 @@ class SolarPvRecommendations: if not is_valid_property_type or not is_valid_roof_type or not has_no_existing_solar_pv: return + solar_pv_percentage = self.property.solar_pv_percentage + # We round up to the neaest 10% + solar_pv_percentage = np.ceil(solar_pv_percentage * 10) / 10 + # For the solar recommendations, we produce the following scenarios: # 1) Solar panels only, we present a high, medium and low coverage # 2) With and without battery roof_coverage_scenarios = [ - self.property.solar_pv_percentage - 0.1, self.property.solar_pv_percentage, + solar_pv_percentage - 0.1, solar_pv_percentage, ] - if self.property.solar_pv_percentage <= 0.4: - roof_coverage_scenarios.append(self.property.solar_pv_percentage + 0.1) + if solar_pv_percentage <= 0.4: + roof_coverage_scenarios.append(solar_pv_percentage + 0.1) # We make sure we haven't gone too low or high - we allow no more than 60% coverage roof_coverage_scenarios = [v for v in roof_coverage_scenarios if 0 <= v <= 0.6] # If we only have two scenarios, we add a coverage scenario 10% less than the smallest From db2586061598471f182fc338668618dfd4109a61 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 18 Apr 2024 16:01:41 +0100 Subject: [PATCH 03/20] Completed pilot 2 --- etl/customers/immo/pilot/asset_list_2.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/etl/customers/immo/pilot/asset_list_2.py b/etl/customers/immo/pilot/asset_list_2.py index 121e7a81..1b4fad9a 100644 --- a/etl/customers/immo/pilot/asset_list_2.py +++ b/etl/customers/immo/pilot/asset_list_2.py @@ -43,6 +43,15 @@ patches = [ already_installed = [ { 'address': '28 Sangwin Road', 'postcode': 'WV14 9EQ', "already_installed": ["loft_insulation"] + }, + { + 'address': '51 Hillwood Road', 'postcode': 'B62 8NQ', "already_installed": ["loft_insulation"] + }, + { + 'address': '47 Watsons Close', 'postcode': 'DY2 7HL', "already_installed": ["loft_insulation"] + }, + { + 'address': '44 Hatfield Road', 'postcode': 'DY9 7LW', "already_installed": ["loft_insulation"] } ] From 3593b7ae9ebd4245985a2dabc80446b23f00d84e Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 19 Apr 2024 13:54:04 +0100 Subject: [PATCH 04/20] Added boiler upgrade recommendation --- etl/customers/gla_croydon_demo/asset_list.py | 5 ++-- recommendations/Costs.py | 12 ++------ recommendations/HeatingRecommender.py | 31 +++++++++----------- 3 files changed, 20 insertions(+), 28 deletions(-) diff --git a/etl/customers/gla_croydon_demo/asset_list.py b/etl/customers/gla_croydon_demo/asset_list.py index 7dde8926..1655979b 100644 --- a/etl/customers/gla_croydon_demo/asset_list.py +++ b/etl/customers/gla_croydon_demo/asset_list.py @@ -34,8 +34,9 @@ def app(): low_memory=False ) - z = epc_data.groupby(["WALLS_DESCRIPTION", "WALLS_ENERGY_EFF"]).size().reset_index(name="count") - z = z[z["MAINHEAT_DESCRIPTION"] == "Boiler and radiators, mains gas"] + z = epc_data[epc_data["MAINHEAT_DESCRIPTION"] == "Boiler and radiators, mains gas"] + z["HOTWATER_DESCRIPTION"].value_counts() + z["MAIN_FUEL"].value_counts() # Filter on entries where we have a UPRN epc_data = epc_data[~pd.isnull(epc_data["UPRN"])] diff --git a/recommendations/Costs.py b/recommendations/Costs.py index 852bb11f..d7a8ad2f 100644 --- a/recommendations/Costs.py +++ b/recommendations/Costs.py @@ -67,18 +67,12 @@ LOW_CARBON_COMBI_BOILER = 2200 # https://www.greenmatch.co.uk/boilers/35kw-boiler # https://www.greenmatch.co.uk/boilers/40kw-boiler # These are exclusive of installation costs -COMBI_BOILER_COSTS = { +CONDENSING_BOILER_COSTS = { "30kw": 1550, "35kw": 1610, "40kw": 1625 } -CONVENTIONAL_BOILER_COSTS = { - "30kw": 1117, - "35kw": 1546, - "40kw": 1776 -} - # Assumes 3 hours to remove each heater (including re-decorating) ROOM_HEATER_REMOVAL_COST = 120 ROOM_HEATER_REMOVAL_LABOUR_HOURS = 3 @@ -1179,7 +1173,7 @@ class Costs: estimated_radiators = max(total_radiators_based_on_power, base_radiators + additional_radiators) return round(estimated_radiators) - def boiler(self, is_combi, size, exising_room_heaters, system_change, n_heated_rooms, n_rooms): + def boiler(self, size, exising_room_heaters, system_change, n_heated_rooms, n_rooms): """ Based on a basic estimate of median value £2600 to install a low carbon combi boiler First time central heating vosts can als be found here: @@ -1187,7 +1181,7 @@ class Costs: :return: """ - unit_cost = COMBI_BOILER_COSTS[size] if is_combi else CONVENTIONAL_BOILER_COSTS[size] + unit_cost = CONDENSING_BOILER_COSTS[size] # The unit cost is the cost without VAT # We now need to estimate the cost of the works labour_days = 2 diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index 432dc6a6..2423901a 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -312,7 +312,15 @@ class HeatingRecommender: simulation_config = {} boiler_costs = {} boiler_recommendation = {} - if self.property.data["mainheat-energy-eff"] in ["Very Poor", "Poor", "Average"]: + + has_inefficient_space_heating = self.property.data["mainheat-energy-eff"] in ["Very Poor", "Poor", "Average"] + + has_inefficient_mains_water = ( + self.property.hotwater["clean_description"] in ["From main system"] and + self.property.data["hot-water-energy-eff"] in ["Very Poor", "Poor", "Average"] + ) + + if has_inefficient_space_heating or has_inefficient_mains_water: boiler_size = self.estimate_boiler_size( property_type=self.property.data["property-type"], built_form=self.property.data["built-form"], @@ -321,22 +329,12 @@ class HeatingRecommender: num_heated_rooms=self.property.data["number-heated-rooms"], ) - # We recommend a combi boiler under the following conditions - # 1) If there are 4 or fewer rooms (we don't use heqted rooms because none of the rooms could be - # heated if there is no existing heating system). - # 2) There 1 or fewer bathrooms - # Otherwise, we recommend a gas condensing boiler, which will server a larger property, that has multiple - # bathrooms - is_combi = ( - (self.property.number_of_rooms <= 4) and - (self.property.n_bathrooms in [None, 0, 1]) - ) - if is_combi: - description = "Upgrade to a new combi boiler" - else: - description = "Upgrade to a new gas condensing boiler" + description = "Upgrade to a new condensing boiler" - simulation_config = {"mainheat_energy_eff_ending": "Good"} + simulation_config = { + "mainheat_energy_eff_ending": "Good", + "hot_water_energy_eff_ending": "Good" + } if system_change: # Installation of a boiler improves the hot water system so we need to reflect this in # the outcome of the recommendation @@ -363,7 +361,6 @@ class HeatingRecommender: } boiler_costs = self.costs.boiler( - is_combi=is_combi, size=f"{boiler_size}kw", exising_room_heaters=exising_room_heaters, system_change=system_change, From 391cb356ee12270aa9f5a4ffeff6a917f07ff05e Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 19 Apr 2024 14:07:47 +0100 Subject: [PATCH 05/20] debugging recommendation when we have independent boiler upgrade and heating controls --- recommendations/HeatingRecommender.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index 2423901a..aa5cabdb 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -394,9 +394,13 @@ class HeatingRecommender: controls_recommender.recommend(heating_description="Boiler and radiators, mains gas") # We may have 2 recommendations from the heating controls - if not controls_recommender.recommendation: + if not controls_recommender.recommendation and not boiler_recommendation: return + if not system_change and len(boiler_recommendation): + # If there is not a system change, we add the boiler recommendation at point. + self.recommendations.append(boiler_recommendation) + if system_change: # We combine the heating and controls recommendations, in the case of a system change combined_recommendations = [] From 8bd899bcba8739b3232ec254fa799ff8497efb0f Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 19 Apr 2024 16:43:13 +0100 Subject: [PATCH 06/20] debugging structure of heating recommendations --- backend/app/plan/router.py | 1 + recommendations/HeatingRecommender.py | 8 ++++---- recommendations/Recommendations.py | 9 +++++++-- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index a8464ee6..06d1aadf 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -380,6 +380,7 @@ async def trigger_plan(body: PlanTriggerRequest): logger.info("Preparing data for scoring in sap change api") recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data) + recommendations_scoring_data = recommendations_scoring_data.drop( columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", "carbon_ending"] diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index aa5cabdb..fe5cdd46 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -399,7 +399,7 @@ class HeatingRecommender: if not system_change and len(boiler_recommendation): # If there is not a system change, we add the boiler recommendation at point. - self.recommendations.append(boiler_recommendation) + self.recommendations.append([boiler_recommendation]) if system_change: # We combine the heating and controls recommendations, in the case of a system change @@ -417,12 +417,12 @@ class HeatingRecommender: combined_recommendations.extend(combined_recommendation) # Overwrite the existing boiler recommendation - self.recommendations.extend(combined_recommendations) + self.recommendations.append(combined_recommendations) else: # We increment the recommendation phase, since the heating controls are separate from the boiler upgrade # but we'll only upgrade if we have a heating recommendation has_heating_recommendation = any( - recommendation["type"] == "heating" for recommendation in self.recommendations + rec["type"] == "heating" for recommendation in self.recommendations for rec in recommendation ) if has_heating_recommendation: recommendation_phase += 1 @@ -431,6 +431,6 @@ class HeatingRecommender: for recommendation in controls_recommender.recommendation: recommendation["phase"] = recommendation_phase - self.recommendations.extend(controls_recommender.recommendation) + self.recommendations.append(controls_recommender.recommendation) return diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index 5960d7be..aba75ad9 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -111,11 +111,16 @@ class Recommendations: if "heating" not in self.exclusions: self.heating_recommender.recommend(phase=phase) if self.heating_recommender.recommendations: - property_recommendations.append(self.heating_recommender.recommendations) + if len(self.heating_recommender.recommendations) == 1: + property_recommendations.append(self.heating_recommender.recommendations) + else: + property_recommendations.extend(self.heating_recommender.recommendations) # We check if we have distinct heating and heating controls recommendations # If so, we increment by 2 (one of the heating system, one for the heating controls) # otherwise we incremenet by 1 - max_used_phase = max([rec["phase"] for rec in self.heating_recommender.recommendations]) + max_used_phase = max( + [rec["phase"] for recs in self.heating_recommender.recommendations for rec in recs] + ) amount_to_increment = max_used_phase - phase + 1 phase += amount_to_increment From 7bdf2147badefd9f43250ac0eedc933f6378b842 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 19 Apr 2024 18:38:16 +0100 Subject: [PATCH 07/20] restructured output of heating and heating control recommendations --- backend/app/plan/router.py | 20 ++++++++++---------- recommendations/HeatingRecommender.py | 16 +++++++++------- recommendations/Recommendations.py | 19 +++++++++++++------ 3 files changed, 32 insertions(+), 23 deletions(-) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 06d1aadf..ebaf482d 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -282,16 +282,16 @@ async def trigger_plan(body: PlanTriggerRequest): property_id, is_new = create_property( session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn ) - if not is_new: - continue - - create_property_targets( - session, - property_id=property_id, - portfolio_id=body.portfolio_id, - epc_target=body.goal_value, - heat_demand_target=None - ) + # if not is_new: + # continue + # + # create_property_targets( + # session, + # property_id=property_id, + # portfolio_id=body.portfolio_id, + # epc_target=body.goal_value, + # heat_demand_target=None + # ) epc_records = { 'original_epc': epc_searcher.newest_epc.copy(), diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index fe5cdd46..537125a1 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -15,7 +15,8 @@ class HeatingRecommender: self.property = property_instance self.costs = Costs(self.property) - self.recommendations = [] + self.heating_recommendations = [] + self.heating_control_recommendations = [] def recommend(self, phase=0): @@ -23,7 +24,8 @@ class HeatingRecommender: # the boiler, but instead flushing the system will make it run more efficiently. There is a cost for this # in the Costs class, stored as SYSTEM_FLUSH_COST - self.recommendations = [] + self.heating_recommendations = [] + self.heating_control_recommendations = [] # This first iteration of the recommender will provide very basic recommendation # We recommend heating controls based on the main heating system @@ -254,7 +256,7 @@ class HeatingRecommender: system_change=system_change ) - self.recommendations.extend(recommendations) + self.heating_recommendations.extend(recommendations) @staticmethod def estimate_boiler_size(property_type, built_form, floor_area, floor_height, num_heated_rooms): @@ -399,7 +401,7 @@ class HeatingRecommender: if not system_change and len(boiler_recommendation): # If there is not a system change, we add the boiler recommendation at point. - self.recommendations.append([boiler_recommendation]) + self.heating_recommendations.extend([boiler_recommendation]) if system_change: # We combine the heating and controls recommendations, in the case of a system change @@ -417,12 +419,12 @@ class HeatingRecommender: combined_recommendations.extend(combined_recommendation) # Overwrite the existing boiler recommendation - self.recommendations.append(combined_recommendations) + self.heating_recommendations.extend(combined_recommendations) else: # We increment the recommendation phase, since the heating controls are separate from the boiler upgrade # but we'll only upgrade if we have a heating recommendation has_heating_recommendation = any( - rec["type"] == "heating" for recommendation in self.recommendations for rec in recommendation + rec["type"] == "heating" for rec in self.heating_recommendations ) if has_heating_recommendation: recommendation_phase += 1 @@ -431,6 +433,6 @@ class HeatingRecommender: for recommendation in controls_recommender.recommendation: recommendation["phase"] = recommendation_phase - self.recommendations.append(controls_recommender.recommendation) + self.heating_control_recommendations.extend(controls_recommender.recommendation) return diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index aba75ad9..06dc2d61 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -110,16 +110,23 @@ class Recommendations: # Heating and Electical systems if "heating" not in self.exclusions: self.heating_recommender.recommend(phase=phase) - if self.heating_recommender.recommendations: - if len(self.heating_recommender.recommendations) == 1: - property_recommendations.append(self.heating_recommender.recommendations) - else: - property_recommendations.extend(self.heating_recommender.recommendations) + if ( + self.heating_recommender.heating_recommendations or + self.heating_recommender.heating_control_recommendations + ): + if self.heating_recommender.heating_recommendations: + property_recommendations.append(self.heating_recommender.heating_recommendations) + + if self.heating_recommender.heating_control_recommendations: + property_recommendations.append(self.heating_recommender.heating_control_recommendations) + # We check if we have distinct heating and heating controls recommendations # If so, we increment by 2 (one of the heating system, one for the heating controls) # otherwise we incremenet by 1 max_used_phase = max( - [rec["phase"] for recs in self.heating_recommender.recommendations for rec in recs] + [rec["phase"] for rec in + self.heating_recommender.heating_recommendations + + self.heating_recommender.heating_control_recommendations] ) amount_to_increment = max_used_phase - phase + 1 phase += amount_to_increment From 5a879572f46fba68fc136f2d0681805119e60ccb Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 23 Apr 2024 15:34:29 +0100 Subject: [PATCH 08/20] final modifications for immo pilot --- etl/customers/immo/pilot/asset_list_2.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/etl/customers/immo/pilot/asset_list_2.py b/etl/customers/immo/pilot/asset_list_2.py index 1b4fad9a..52260f57 100644 --- a/etl/customers/immo/pilot/asset_list_2.py +++ b/etl/customers/immo/pilot/asset_list_2.py @@ -51,7 +51,9 @@ already_installed = [ 'address': '47 Watsons Close', 'postcode': 'DY2 7HL', "already_installed": ["loft_insulation"] }, { - 'address': '44 Hatfield Road', 'postcode': 'DY9 7LW', "already_installed": ["loft_insulation"] + 'address': '44 Hatfield Road', + 'postcode': 'DY9 7LW', + "already_installed": ["loft_insulation", "cavity_wall_insulation"] } ] From 7a275deb6df6a231bde60d64d78ba3b04ab32f38 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 23 Apr 2024 17:12:39 +0100 Subject: [PATCH 09/20] route march code --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- etl/customers/guiness/route_march.py | 98 ++++++++++++++++++++++++++++ 3 files changed, 100 insertions(+), 2 deletions(-) create mode 100644 etl/customers/guiness/route_march.py diff --git a/.idea/Model.iml b/.idea/Model.iml index 4413bb06..b0f9c00d 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 6f308057..1122b380 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/etl/customers/guiness/route_march.py b/etl/customers/guiness/route_march.py new file mode 100644 index 00000000..28f350d3 --- /dev/null +++ b/etl/customers/guiness/route_march.py @@ -0,0 +1,98 @@ +import os + +import pandas as pd +from tqdm import tqdm + +from dotenv import load_dotenv +from utils.s3 import read_excel_from_s3 +from backend.SearchEpc import SearchEpc +from epc_api.client import EpcClient +from utils.s3 import save_csv_to_s3 + +load_dotenv(dotenv_path="backend/.env") +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") + + +def app(): + """ + This app is satisying an adhoc request to retrieve EPC data for properties owned by Guiness, to help plan the + route march + + These properties were provided to us by Ecosurv + :return: + """ + asset_list = read_excel_from_s3( + bucket_name="retrofit-datalake-dev", + file_key="customers/guiness/TGP CW Properties PV.xlsx", + header_row=0 + ) + + epc_data = [] + for _, guiness_property in tqdm(asset_list.iterrows(), total=len(asset_list)): + + searcher = SearchEpc( + address1=str(guiness_property["Address"]), + postcode=guiness_property["POSTCODES"], + auth_token=EPC_AUTH_TOKEN, + os_api_key="", + property_type=None, + fast=True + ) + # Force the skipping of estimating the EPC + searcher.ordnance_survey_client.property_type = None + searcher.ordnance_survey_client.built_form = None + + searcher.find_property(skip_os=True) + if searcher.newest_epc is None: + continue + + epc = { + "asset_list_address": guiness_property["Address"], + "asset_list_postcode": guiness_property["POSTCODES"], + **searcher.newest_epc.copy() + } + + epc_data.append(epc) + + epc_df = pd.DataFrame(epc_data) + + # Retrieve just the data we need + epc_df = epc_df[ + [ + "asset_list_address", + "asset_list_postcode", + "uprn", + "property-type", + "built-form", + "inspection-date", + "current-energy-rating", + "current-energy-efficiency", + "roof-description", + "walls-description", + "transaction-type" + ] + ] + + asset_list = asset_list.merge( + epc_df, how="left", left_on=["Address", "POSTCODES"], right_on=["asset_list_address", "asset_list_postcode"] + ) + + # De-dupe on the address and postcode, since 137 Badger Avenue was duplicated + asset_list = asset_list.drop_duplicates(subset=["Address", "POSTCODES"]) + asset_list = asset_list.drop(columns=["asset_list_address", "asset_list_postcode"]) + + # Rename the columns + asset_list = asset_list.rename(columns={ + "property-type": "Property Type", + "built-form": "Archetype", + "inspection-date": "Last EPC Inspection Date", + "current-energy-rating": "Last survey EPC Rating", + "current-energy-efficiency": "Last survey SAP Score", + "roof-description": "Roof Construction", + "walls-description": "Wall Construction", + "transaction-type": "Last EPC Reason" + }) + + # Store as an excel + filename = "Guiness EPC data.xlsx" + asset_list.to_excel(filename, index=False) From 03ca16bfc5c94d8325f5c20e5a82aabbb66e014d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 26 Apr 2024 14:06:48 +0100 Subject: [PATCH 10/20] Added rightmove property valuation increase estimates --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- backend/app/plan/router.py | 20 +++---- backend/ml_models/Valuation.py | 39 +++++++++++- etl/customers/gla_croydon_demo/asset_list.py | 4 -- etl/customers/goldman/asset_list.py | 63 ++++++++++++++++++++ etl/customers/goldman/epc_f_g_properties.py | 25 ++++++++ recommendations/HeatingRecommender.py | 1 - 8 files changed, 137 insertions(+), 19 deletions(-) create mode 100644 etl/customers/goldman/asset_list.py create mode 100644 etl/customers/goldman/epc_f_g_properties.py diff --git a/.idea/Model.iml b/.idea/Model.iml index b0f9c00d..4413bb06 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 1122b380..6f308057 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index ebaf482d..06d1aadf 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -282,16 +282,16 @@ async def trigger_plan(body: PlanTriggerRequest): property_id, is_new = create_property( session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn ) - # if not is_new: - # continue - # - # create_property_targets( - # session, - # property_id=property_id, - # portfolio_id=body.portfolio_id, - # epc_target=body.goal_value, - # heat_demand_target=None - # ) + if not is_new: + continue + + create_property_targets( + session, + property_id=property_id, + portfolio_id=body.portfolio_id, + epc_target=body.goal_value, + heat_demand_target=None + ) epc_records = { 'original_epc': epc_searcher.newest_epc.copy(), diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py index 39ea5a98..5c781979 100644 --- a/backend/ml_models/Valuation.py +++ b/backend/ml_models/Valuation.py @@ -71,6 +71,14 @@ class PropertyValuation: 90013980: 148_000, # Based on Zoopla 90087154: 184_000, # Based on Zoopla 90046817: 167_000, # Based on Zoopla + # Goldman Sachs Pilot for inrto - search by going to https://www.zoopla.co.uk/property/uprn/{uprn}/ + 100070358888: 153_000, # Based on Zoopla + 10090436544: 282_000, # Based on Zoopla + 100070365751: 177_000, # Based on Zoopla + 10095952767: 168_000, # Based on Zoopla + 100070520130: 177_000, # Based on Zoopla + 100070333957: 185_000, # Based on Zoopla + 100070543258: 211_000, # Based on Zoopla } # We base our valuation uplifts on a number of sources @@ -108,6 +116,29 @@ class PropertyValuation: # {"start": "D", "end": "A", "increase_percentage": 0.017}, ] + # Found here: https://www.rightmove.co.uk/news/articles/property-news/green-premium-epc-ratings/ + # F -> C is + 15% + # E -> C is +7% + # D -> C is +3% + RIGHTMOVE_MAPPING = [ + {"start": "G", "end": "C", "increase_percentage": 0.15}, + {"start": "G", "end": "B", "increase_percentage": 0.15}, + {"start": "G", "end": "A", "increase_percentage": 0.15}, + + {"start": "F", "end": "C", "increase_percentage": 0.15}, + {"start": "F", "end": "B", "increase_percentage": 0.15}, + {"start": "F", "end": "A", "increase_percentage": 0.15}, + + {"start": "E", "end": "C", "increase_percentage": 0.07}, + {"start": "E", "end": "B", "increase_percentage": 0.07}, + {"start": "E", "end": "A", "increase_percentage": 0.07}, + + {"start": "D", "end": "C", "increase_percentage": 0.03}, + {"start": "D", "end": "B", "increase_percentage": 0.03}, + {"start": "D", "end": "A", "increase_percentage": 0.03}, + + ] + EPC_BANDS = ["G", "F", "E", "D", "C", "B", "A"] @classmethod @@ -159,14 +190,18 @@ class PropertyValuation: msm_increase, lloyds_increase = cls.get_increase(epc_band_range) - # We now use the knight frank and nationwide data to get further valuation evidence, if we have it + # We now use the knight frank, nationwide and Rightmove data to get further valuation evidence, if we have it kf_increase = [x for x in cls.KNIGHT_FRANK_MAPPING if x["start"] == current_epc and x["end"] == target_epc] nw_increase = [x for x in cls.NATIONWIDE_MAPPING if x["start"] == current_epc and x["end"] == target_epc] + rm_increase = [x for x in cls.RIGHTMOVE_MAPPING if x["start"] == current_epc and x["end"] == target_epc] kf_increase = kf_increase[0]["increase_percentage"] if kf_increase else None nw_increase = nw_increase[0]["increase_percentage"] if nw_increase else None + rm_increase = rm_increase[0]["increase_percentage"] if rm_increase else None - all_increases = [x for x in [msm_increase, lloyds_increase, kf_increase, nw_increase] if x is not None] + all_increases = [ + x for x in [msm_increase, lloyds_increase, kf_increase, nw_increase, rm_increase] if x is not None + ] max_increase = max(all_increases) min_increase = min(all_increases) diff --git a/etl/customers/gla_croydon_demo/asset_list.py b/etl/customers/gla_croydon_demo/asset_list.py index 1655979b..52e9422c 100644 --- a/etl/customers/gla_croydon_demo/asset_list.py +++ b/etl/customers/gla_croydon_demo/asset_list.py @@ -34,10 +34,6 @@ def app(): low_memory=False ) - z = epc_data[epc_data["MAINHEAT_DESCRIPTION"] == "Boiler and radiators, mains gas"] - z["HOTWATER_DESCRIPTION"].value_counts() - z["MAIN_FUEL"].value_counts() - # Filter on entries where we have a UPRN epc_data = epc_data[~pd.isnull(epc_data["UPRN"])] diff --git a/etl/customers/goldman/asset_list.py b/etl/customers/goldman/asset_list.py new file mode 100644 index 00000000..afe3c64c --- /dev/null +++ b/etl/customers/goldman/asset_list.py @@ -0,0 +1,63 @@ +import pandas as pd +from utils.s3 import read_excel_from_s3 +from utils.s3 import save_csv_to_s3 + +PORTFOLIO_ID = 75 +USER_ID = 8 + + +def app(): + asset_list = [ + { + "address": "19 Emily Gardens", + "postcode": "B16 0ED", + }, + { + "address": "Flat 6 41 Bradford Street", + "postcode": "B5 6HX", + }, + { + "address": "197 FIELD LANE", + "postcode": "B32 4HL", + }, + { + "address": "FLAT 4 108 SUMMER ROAD", + "postcode": "B23 6DY", + }, + { + "address": "1, St. Benedicts Road", + "postcode": "B10 9DP", + }, + { + "address": "29 COOKSEY LANE", + "postcode": "B44 9QL", + }, + { + "address": "40 TRITTIFORD ROAD", + "postcode": "B13 0HG", + } + ] + + asset_list = pd.DataFrame(asset_list) + + # Store the asset list in s3 + filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv" + save_csv_to_s3( + dataframe=asset_list, + bucket_name="retrofit-plan-inputs-dev", + file_name=filename + ) + + # EPC C portoflio + body = { + "portfolio_id": str(PORTFOLIO_ID), + "housing_type": "Private", + "goal": "Increase EPC", + "goal_value": "B", + "trigger_file_path": filename, + "already_installed_file_path": "", + "patches_file_path": "", + "non_invasive_recommendations_file_path": "", + "budget": None, + } + print(body) diff --git a/etl/customers/goldman/epc_f_g_properties.py b/etl/customers/goldman/epc_f_g_properties.py new file mode 100644 index 00000000..28197126 --- /dev/null +++ b/etl/customers/goldman/epc_f_g_properties.py @@ -0,0 +1,25 @@ +import pandas as pd + + +def app(): + """ + Pulling the list of EPC G & F properties in Birmingham for Goldman Sachs + """ + epc_data = pd.read_csv( + "local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv", + low_memory=False + ) + + epc_data = epc_data[~pd.isnull(epc_data["UPRN"])] + epc_data["UPRN"] = epc_data["UPRN"].astype(int).astype(str) + + # Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this + epc_data["LODGEMENT_DATETIME"] = pd.to_datetime(epc_data["LODGEMENT_DATETIME"], format='mixed') + + epc_data = epc_data.sort_values("LODGEMENT_DATETIME", ascending=False).drop_duplicates("UPRN") + + # Get G & F properties + epc_data = epc_data[epc_data["CURRENT_ENERGY_RATING"].isin(["G", "F"])] + + # Save as an excel + epc_data.to_excel("Birmingham EPC F & G Properties.xlsx", index=False) diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index 537125a1..8988d2a6 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -359,7 +359,6 @@ class HeatingRecommender: **heating_simulation_config, **hotwater_simulation_config, **fuel_simulation_config, - "hot_water_energy_eff_ending": "Good" } boiler_costs = self.costs.boiler( From 155a8c568c595207e4d69cd2f766eeec4b5129f1 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 30 Apr 2024 17:41:33 +0100 Subject: [PATCH 11/20] working through the air source heat pump recommendations, added route march code for livewest --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- .../AirSourceHeatPumpEfficiency.py | 46 +++++- etl/customers/livewest/route_march.py | 135 +++++++++++++++++ .../places_for_people/route_march.py | 137 ++++++++++++++++++ recommendations/Costs.py | 29 ++++ recommendations/HeatingControlRecommender.py | 3 + recommendations/HeatingRecommender.py | 127 +++++++++++++++- .../tests/test_air_source_heat_pump.py | 77 ++++++++++ 9 files changed, 546 insertions(+), 12 deletions(-) create mode 100644 etl/customers/livewest/route_march.py create mode 100644 etl/customers/places_for_people/route_march.py create mode 100644 recommendations/tests/test_air_source_heat_pump.py diff --git a/.idea/Model.iml b/.idea/Model.iml index 4413bb06..b0f9c00d 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 6f308057..1122b380 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py b/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py index 2ba82e77..044cc830 100644 --- a/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py +++ b/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py @@ -21,6 +21,8 @@ class AirSourceHeatPumpEfficiency: def create_dataset(self): logger.info("Creating solar photo supply dataset") + + all_counts = [] for dir in tqdm(self.file_directories): filepath = dir / "certificates.csv" df = pd.read_csv(filepath, low_memory=False) @@ -44,9 +46,15 @@ class AirSourceHeatPumpEfficiency: df = df[ df["MAINHEAT_DESCRIPTION"].str.contains("air source heat pump", case=False, na=False) ] + + # Drop rows that have a missing PROPERTY_TYPE, BUILT_FORM, CONSTRUCTION_AGE_BAND, TOTAL_FLOOR_AREA + for col in ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA"]: + df = df[~pd.isnull(df[col])] # Get the columns we're interested in df = df[ [ + "PROPERTY_TYPE", + "BUILT_FORM", "MAINHEAT_DESCRIPTION", "MAINHEAT_ENERGY_EFF", "MAINHEATCONT_DESCRIPTION", @@ -60,6 +68,8 @@ class AirSourceHeatPumpEfficiency: counts = df.groupby( [ + "PROPERTY_TYPE", + "BUILT_FORM", "MAINHEAT_DESCRIPTION", "MAINHEAT_ENERGY_EFF", "MAINHEATCONT_DESCRIPTION", @@ -71,8 +81,34 @@ class AirSourceHeatPumpEfficiency: ] ).size().reset_index(name="count") - # Drop rows that have a missing PROPERTY_TYPE, BUILT_FORM, CONSTRUCTION_AGE_BAND, TOTAL_FLOOR_AREA - for col in ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA"]: - df = df[~pd.isnull(df[col])] - # Take newest LODGEMENT_DATE per UPRN - df = df.sort_values(by="LODGEMENT_DATE", ascending=False).drop_duplicates(subset=["UPRN"]) + all_counts.append(counts) + + all_counts = pd.concat(all_counts) + + all_counts_agg = all_counts.groupby( + [ + "PROPERTY_TYPE", + "BUILT_FORM", + "MAINHEAT_DESCRIPTION", + "MAINHEAT_ENERGY_EFF", + "MAINHEATCONT_DESCRIPTION", + "MAINHEATC_ENERGY_EFF", + "MAIN_FUEL", + "HOTWATER_DESCRIPTION", + "HOT_WATER_ENERGY_EFF", + "MAINS_GAS_FLAG" + ] + )["count"].sum().reset_index() + + all_counts_agg.groupby("PROPERTY_TYPE")["count"].sum() + # In houses, 68% of the cases where we see air source heat pumps are in detached and semi-detached houses + all_counts_agg[all_counts_agg["PROPERTY_TYPE"] == "House"]["BUILT_FORM"].value_counts(normalize=True) + + all_counts_agg[all_counts_agg["PROPERTY_TYPE"] == "Flat"]["BUILT_FORM"].value_counts() + + # In Bungalows, 74% of cases where we see air source heat pumps are in detached and semi-detached houses + all_counts_agg[all_counts_agg["PROPERTY_TYPE"] == "Bungalow"]["BUILT_FORM"].value_counts(normalize=True) + + # TODO: Research options for mid and end-terrace houses + # TODO: Research the options for flats - we see them appear in flats, but practically speaking, how does the + # install process work? diff --git a/etl/customers/livewest/route_march.py b/etl/customers/livewest/route_march.py new file mode 100644 index 00000000..713ee56a --- /dev/null +++ b/etl/customers/livewest/route_march.py @@ -0,0 +1,135 @@ +import os + +import pandas as pd +from tqdm import tqdm + +from dotenv import load_dotenv +from utils.s3 import read_excel_from_s3 +from backend.SearchEpc import SearchEpc +from epc_api.client import EpcClient +from utils.s3 import save_csv_to_s3 + +load_dotenv(dotenv_path="backend/.env") +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") + + +def route_march_may_2024(): + """ + This code pulls supplementary data for a route march that is expected to happen in May 2024. This code + was authored on the 30th April 2024. + """ + + asset_list = read_excel_from_s3( + bucket_name="retrofit-datalake-dev", + file_key="customers/Livewest/Livewest proposed route march Apr-May 2024.xlsx", + header_row=1 + ) + asset_list = pd.read_excel("/Users/khalimconn-kowlessar/Downloads/Livewest proposed route march Apr-May 2024.xlsx") + + epc_data = [] + for _, unit in tqdm(asset_list.iterrows(), total=len(asset_list)): + + lst = [unit["NO"], unit["ADDRESS 1"], unit["ADDRESS 2"], unit["ADDRESS 3"], unit["POSTCODE"]] + lst = [str(x).strip() for x in lst if not pd.isnull(x)] + + full_address = ", ".join(lst) + + searcher = SearchEpc( + address1=str(unit["NO"]), + postcode=unit["POSTCODE"], + auth_token=EPC_AUTH_TOKEN, + os_api_key="", + property_type=None, + fast=True, + full_address=full_address + ) + # Force the skipping of estimating the EPC + searcher.ordnance_survey_client.property_type = None + searcher.ordnance_survey_client.built_form = None + + searcher.find_property(skip_os=True) + if searcher.newest_epc is None: + # We try with a different address 1 + add1 = str(unit["NO"]).lower() + add1 = ( + add1 + .replace("flat", "") + .replace("ft", "") + .replace("t", "").strip() + ) + + searcher = SearchEpc( + address1=add1, + postcode=unit["POSTCODE"], + auth_token=EPC_AUTH_TOKEN, + os_api_key="", + property_type=None, + fast=True, + full_address=full_address + ) + # Force the skipping of estimating the EPC + searcher.ordnance_survey_client.property_type = None + searcher.ordnance_survey_client.built_form = None + + searcher.find_property(skip_os=True) + + if searcher.newest_epc is None: + continue + + epc = { + "asset_list_house_no": unit["NO"], + "asset_list_address1": unit["ADDRESS 1"], + "asset_list_postcode": unit["POSTCODE"], + **searcher.newest_epc.copy() + } + + epc_data.append(epc) + + epc_df = pd.DataFrame(epc_data) + + # + + # Retrieve just the data we need + epc_df = epc_df[ + [ + "asset_list_house_no", + "asset_list_address1", + "asset_list_postcode", + "uprn", + "address", + "property-type", + "built-form", + "inspection-date", + "current-energy-rating", + "current-energy-efficiency", + "roof-description", + "walls-description", + "transaction-type" + ] + ].rename(columns={"address": "Matched EPC Address"}) + + asset_list = asset_list.merge( + epc_df, + how="left", + left_on=["NO", "ADDRESS 1", "POSTCODE"], + right_on=["asset_list_house_no", "asset_list_address1", "asset_list_postcode"] + ) + + asset_list = asset_list.drop_duplicates(subset=["NO", "ADDRESS 1", "POSTCODE"]) + asset_list = asset_list.drop(columns=["asset_list_house_no", "asset_list_address1", "asset_list_postcode"]) + + # Rename the columns + asset_list = asset_list.rename(columns={ + "property-type": "Property Type", + "built-form": "Archetype", + "inspection-date": "Last EPC Inspection Date", + "current-energy-rating": "Last survey EPC Rating", + "current-energy-efficiency": "Last survey SAP Score", + "roof-description": "Roof Construction", + "walls-description": "Wall Construction", + "transaction-type": "Last EPC Reason" + }) + + # Store as an excel + filename = "Livewest EPC data.xlsx" + asset_list.to_excel(filename, index=False) diff --git a/etl/customers/places_for_people/route_march.py b/etl/customers/places_for_people/route_march.py new file mode 100644 index 00000000..c38c71d3 --- /dev/null +++ b/etl/customers/places_for_people/route_march.py @@ -0,0 +1,137 @@ +import os + +import pandas as pd +from tqdm import tqdm + +from dotenv import load_dotenv +from utils.s3 import read_excel_from_s3 +from backend.SearchEpc import SearchEpc +from epc_api.client import EpcClient +from utils.s3 import save_csv_to_s3 + +load_dotenv(dotenv_path="backend/.env") +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") + + +def app(): + """ + This app is satisying an adhoc request to retrieve EPC data for properties owned by Guiness, to help plan the + route march + + These properties were provided to us by Ecosurv + :return: + """ + asset_list = read_excel_from_s3( + bucket_name="retrofit-datalake-dev", + file_key="customers/Places For People/PFP ROUTE MARCH PHASE 1.xlsx", + header_row=1 + ) + + epc_data = [] + for _, pfp_property in tqdm(asset_list.iterrows(), total=len(asset_list)): + + lst = [ + pfp_property["ADDRESS"], + pfp_property["ADDRESS.1"], + pfp_property["ADDRESS.2"], + pfp_property["POSTCODE"] + ] + lst = [str(x).strip() for x in lst if not pd.isnull(x)] + + full_address = ", ".join(lst) + + searcher = SearchEpc( + address1=str(pfp_property["ADDRESS"]), + postcode=pfp_property["POSTCODE"], + auth_token=EPC_AUTH_TOKEN, + os_api_key="", + property_type=None, + fast=True, + full_address=full_address + ) + # Force the skipping of estimating the EPC + searcher.ordnance_survey_client.property_type = None + searcher.ordnance_survey_client.built_form = None + + searcher.find_property(skip_os=True) + if searcher.newest_epc is None: + # We try with a different address 1 + add1 = str(pfp_property["ADDRESS"]).lower() + add1 = add1.replace("ft", "").replace("t", "").strip() + + searcher = SearchEpc( + address1=add1, + postcode=pfp_property["POSTCODE"], + auth_token=EPC_AUTH_TOKEN, + os_api_key="", + property_type=None, + fast=True, + full_address=full_address + ) + # Force the skipping of estimating the EPC + searcher.ordnance_survey_client.property_type = None + searcher.ordnance_survey_client.built_form = None + + searcher.find_property(skip_os=True) + + if searcher.newest_epc is None: + continue + + epc = { + "asset_list_address": pfp_property["ADDRESS"], + "asset_list_address1": pfp_property["ADDRESS.1"], + "asset_list_postcode": pfp_property["POSTCODE"], + **searcher.newest_epc.copy() + } + + epc_data.append(epc) + + epc_df = pd.DataFrame(epc_data) + + # 702 + + # Retrieve just the data we need + epc_df = epc_df[ + [ + "asset_list_address", + "asset_list_address1", + "asset_list_postcode", + "uprn", + "address", + "property-type", + "built-form", + "inspection-date", + "current-energy-rating", + "current-energy-efficiency", + "roof-description", + "walls-description", + "transaction-type" + ] + ].rename(columns={"address": "Matched EPC Address"}) + + asset_list = asset_list.merge( + epc_df, + how="left", + left_on=["ADDRESS", "ADDRESS.1", "POSTCODE"], + right_on=["asset_list_address", "asset_list_address1", "asset_list_postcode"] + ) + + # De-dupe on the address and postcode, since 137 Badger Avenue was duplicated + asset_list = asset_list.drop_duplicates(subset=["ADDRESS", "ADDRESS.1", "POSTCODE"]) + asset_list = asset_list.drop(columns=["asset_list_address", "asset_list_address1", "asset_list_postcode"]) + + # Rename the columns + asset_list = asset_list.rename(columns={ + "property-type": "Property Type", + "built-form": "Archetype", + "inspection-date": "Last EPC Inspection Date", + "current-energy-rating": "Last survey EPC Rating", + "current-energy-efficiency": "Last survey SAP Score", + "roof-description": "Roof Construction", + "walls-description": "Wall Construction", + "transaction-type": "Last EPC Reason" + }) + + # Store as an excel + filename = "Places For People EPC data.xlsx" + asset_list.to_excel(filename, index=False) diff --git a/recommendations/Costs.py b/recommendations/Costs.py index d7a8ad2f..113bb6f8 100644 --- a/recommendations/Costs.py +++ b/recommendations/Costs.py @@ -37,6 +37,24 @@ MCS_SOLAR_PV_COST_DATA = { "average_cost_per_kwh-Northern Ireland": 2126.09, } +# This data is based on the MCS database +MCS_AIR_SOURCE_HEAT_PUMP_COST_DATA = { + "Outer London": None, + "Inner London": None, + "South East England": None, + "South West England": None, + "East of England": None, + "East Midlands": None, + "West Midlands": None, + "North East England": None, + "North West England": None, + "Yorkshire and the Humber": None, + "Wales": None, + "Scotland": None, + "Northern Ireland": None, +} +BOILER_UPGRADE_SCHEME_ASHP_VALUE = 7500 + # This is based on quotes from installers BATTERY_COST = 3500 @@ -1240,3 +1258,14 @@ class Costs: "labour_hours": labour_hours, "labour_days": labour_days, } + + def air_source_heat_pump(self): + """ + Based on the region and type of property, this function will produce a cost estimation for an air source heat + pump. This cost will include the boiler upgrade scheme grant + + :return: + """ + + regional_cost = MCS_AIR_SOURCE_HEAT_PUMP_COST_DATA[self.region] + pass diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py index d24ad811..76da6c37 100644 --- a/recommendations/HeatingControlRecommender.py +++ b/recommendations/HeatingControlRecommender.py @@ -35,6 +35,9 @@ class HeatingControlRecommender: return + if heating_description in ["Air source heat pump, radiators, electric"]: + self.recommend_time_temperature_zone_controls() + def recommend_room_heaters_electric_controls(self): """ If the home has Room heaters, electric, we start by identifying potential heating controls that could diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index 8988d2a6..b197d817 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -1,6 +1,4 @@ -import pandas as pd - -from recommendations.Costs import Costs +from recommendations.Costs import Costs, BOILER_UPGRADE_SCHEME_ASHP_VALUE from recommendations.recommendation_utils import check_simulation_difference, override_costs from backend.Property import Property from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes @@ -18,7 +16,14 @@ class HeatingRecommender: self.heating_recommendations = [] self.heating_control_recommendations = [] - def recommend(self, phase=0): + def recommend(self, has_cavity_and_loft_recommendations, phase=0): + """ + Produces heating recommendations + :param has_cavity_and_loft_recommendations: boolean indicating if we have produced a cavity or loft insulation + recommendation. If there are cavity or loft recommendations, the property would need to complete those measures + before being able to get the boiler upgrade scheme benefits. The messaging in the front end would be to + :param phase: indicates the phase of the retrofit programme + """ # TODO: We could have a system flush recommendation for an existing boiler, where there is no need to replace # the boiler, but instead flushing the system will make it run more efficiently. There is a cost for this @@ -81,8 +86,120 @@ class HeatingRecommender: phase=phase, system_change=system_change, exising_room_heaters=exising_room_heaters ) + # We recommend air source heat pumps + # Heat pumps are suitable for all property types: + # https://energysavingtrust.org.uk/from-flats-to-terraced-houses-heat-pumps-are-suitable-for-all-property-types/ + # Just seems least probable for flats, so we'll allow houses and bungalows + # In the future, we'll allow overrides, so that non-intrusive surveys can contradict these conditions + # and either allow or prevent the recommendation of an air source heat pump + + suitable_property_types = self.property.data["property-type"] in ["House", "Bungalow"] + has_air_source_heat_pump = self.property.main_heating["has_air_source_heat_pump"] + + if suitable_property_types and not has_air_source_heat_pump: + self.recommend_air_source_heat_pump( + phase=phase, has_cavity_and_loft_recommendations=has_cavity_and_loft_recommendations + ) + return + def recommend_air_source_heat_pump(self, phase, has_cavity_and_loft_recommendations): + """ + This method will implement the recommendation for an air source heat pump + This is ultimately an overhaul to the heating system and so is recommended as an alternative to other + heating system recommendations + :return: + """ + + controls_recommender = HeatingControlRecommender(self.property) + controls_recommender.recommend(heating_description="Air source heat pump, radiators, electric") + + ashp_costs = self.costs.air_source_heat_pump() + # We add the costs of the heating controls, onto each key in the costs dictionary + if controls_recommender.recommendation: + for key in ashp_costs: + ashp_costs[key] += controls_recommender.recommendation[0][key] + + already_installed = "air_source_heat_pump" in self.property.already_installed + if already_installed: + ashp_costs = override_costs(ashp_costs) + description = "The property already has an air source heat pump, no further action needed." + else: + if controls_recommender.recommendation: + description = ("Install an air source heat pump, and upgrade heating controls to Smart Thermostats, " + "room sensors and smart radiator valves (time & temperature zone control) ") + else: + description = "Install an air source heat pump." + + # If the property does not have existing cavity and loft insulation, we include a note that the cost + # includes the boiler upgrade scheme and that the cavity and loft need to be treated, to ensure access + # to the funding + if has_cavity_and_loft_recommendations: + description = description + (f" The cost of works includes the £" + f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant. " + f"You must ensure that the property has an insulated cavity and " + f"270mm+ loft insulation to qualify for the grant") + else: + description = description + (f" The cost of works includes the £" + f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant") + + simulation_config = { + "mainheat_energy_eff_ending": "Good", + "hot_water_energy_eff_ending": "Good" + } + # Installation of a boiler improves the hot water system so we need to reflect this in + # the outcome of the recommendation + heating_ending_config = MainHeatAttributes("Air source heat pump, radiators, electric").process() + hotwater_ending_config = HotWaterAttributes("From main system").process() + + # If the property does not currently have electric main fuel, we'll simulate the change + fuel_ending_config = {} + if self.property.main_fuel["fuel_type"] != "electricity": + fuel_ending_config = MainFuelAttributes("electricity (not community)").process() + + # Check the simulation differences + heating_simulation_config = check_simulation_difference( + new_config=heating_ending_config, old_config=self.property.main_heating + ) + hotwater_simulation_config = check_simulation_difference( + new_config=hotwater_ending_config, old_config=self.property.hotwater + ) + fuel_simulation_config = check_simulation_difference( + new_config=fuel_ending_config, old_config=self.property.main_fuel + ) + + simulation_config = { + **simulation_config, + **heating_simulation_config, + **hotwater_simulation_config, + **fuel_simulation_config, + } + + if controls_recommender.recommendation: + # We should have just the single recommendation for heat controls, which is time + # and temperature zone controls + simulation_config = { + **simulation_config, + **controls_recommender.recommendation[0]["simulation_config"] + } + + ashp_recommendation = { + "phase": phase, + "parts": [ + # TODO + ], + "type": "heating", + "description": description, + "starting_u_value": None, + "new_u_value": None, + "sap_points": None, + "already_installed": already_installed, + "simulation_config": simulation_config, + **ashp_costs + } + + self.heating_recommendations.append(ashp_recommendation) + @staticmethod def check_simulation_difference(old_config, new_config): """ @@ -146,7 +263,7 @@ class HeatingRecommender: recommendation_description = f"{description} and {controls_description}" - already_installed = "cavity_wall_insulation" in self.property.already_installed + already_installed = "heating_controls" in self.property.already_installed if already_installed: total_costs = override_costs(total_costs) recommendation_description = "Heating system has already been upgraded, no further action needed." diff --git a/recommendations/tests/test_air_source_heat_pump.py b/recommendations/tests/test_air_source_heat_pump.py new file mode 100644 index 00000000..d80afc6e --- /dev/null +++ b/recommendations/tests/test_air_source_heat_pump.py @@ -0,0 +1,77 @@ +from backend.Property import Property +from recommendations.HeatingRecommender import HeatingRecommender +from etl.epc.Record import EPCRecord + + +class TestAirSourceHeatPump: + + def test_eligible(self): + # This tests a house, which will be suitable for an air source heat pump + epc_record = EPCRecord() + epc_record.prepared_epc = { + "county": "Broxbourne", + "mainheat-energy-eff": "Good", + "hot-water-energy-eff": "Good", + "mainheatc-energy-eff": "Good", + "number-heated-rooms": 5, + "property-type": "House", + "built-form": "Semi-Detached" + } + + property_instance = Property(id=0, address="fake", postcode="fake", epc_record=epc_record) + property_instance.main_heating = { + 'original_description': 'Boiler and radiators, mains gas', + "clean_description": "Boiler and radiators, mains gas", + 'has_radiators': True, + 'has_fan_coil_units': False, 'has_pipes_in_screed_above_insulation': False, + 'has_pipes_in_insulated_timber_floor': False, 'has_pipes_in_concrete_slab': False, 'has_boiler': True, + 'has_air_source_heat_pump': False, + 'has_room_heaters': False, 'has_electric_storage_heaters': False, + 'has_warm_air': False, + 'has_electric_underfloor_heating': False, + 'has_electric_ceiling_heating': False, 'has_community_scheme': False, + 'has_ground_source_heat_pump': False, 'has_no_system_present': False, + 'has_portable_electric_heaters': False, + 'has_water_source_heat_pump': False, 'has_electric': False, + 'has_mains_gas': True, 'has_wood_logs': False, + 'has_coal': False, 'has_oil': False, 'has_wood_pellets': False, + 'has_anthracite': False, + 'has_dual_fuel_mineral_and_wood': False, 'has_smokeless_fuel': False, + 'has_lpg': False, 'has_assumed': False, + 'has_electricaire': False, 'has_assumed_for_most_rooms': False, + 'has_underfloor_heating': False, + "has_electric_heat_pumps": False, + "has_micro-cogeneration": False + } + property_instance.main_fuel = { + 'original_description': 'mains gas (not community)', 'fuel_type': 'mains gas', + 'tariff_type': None, + 'is_community': False, 'no_individual_heating_or_community_network': False, + 'complex_fuel_type': None + } + property_instance.hotwater = { + 'original_description': 'From main system', + 'clean_description': 'From main system', + 'heater_type': None, + 'system_type': 'from main system', + 'thermostat_characteristics': None, 'heating_scope': None, + 'energy_recovery': None, 'tariff_type': None, + 'extra_features': None, 'chp_systems': None, 'distribution_system': None, + 'no_system_present': None, + 'assumed': False, "appliance": None + } + property_instance.main_heating_controls = { + 'original_description': 'Programmer, room thermostat and TRVs', + 'thermostatic_control': 'room thermostat', 'charging_system': None, 'switch_system': 'programmer', + 'no_control': None, 'dhw_control': None, 'community_heating': None, 'multiple_room_thermostats': False, + 'auxiliary_systems': None, 'trvs': 'trvs', 'rate_control': None + + } + + recommender = HeatingRecommender(property_instance=property_instance) + + assert not recommender.heating_recommendations + + recommender.recommend(phase=0) + + assert recommender.recommendation is None From cce9c64fdc029b0f3fa35445f5784ad5698b7b29 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 2 May 2024 00:37:36 +0100 Subject: [PATCH 12/20] Adding company ownership matching code for goldman poc --- backend/SearchEpc.py | 34 ++++---- etl/customers/goldman/property_ownership.py | 87 +++++++++++++++++++++ etl/customers/livewest/route_march.py | 3 +- 3 files changed, 104 insertions(+), 20 deletions(-) create mode 100644 etl/customers/goldman/property_ownership.py diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 44178792..06eea258 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -193,33 +193,31 @@ class SearchEpc: @classmethod def get_house_number(cls, address: str) -> str | None: """ - This method will use the usaddress library to parse an address and extract the house number - :return: + This method uses the usaddress library to parse an address and extract the primary house or flat number. """ + try: + parsed = usaddress.parse(address) + # First, try to get the 'OccupancyIdentifier' if 'OccupancyType' is detected + for part, type_ in parsed: + if type_ == 'OccupancyIdentifier': + return part # This assumes the first 'OccupancyIdentifier' after 'OccupancyType' is the primary + # number - parsed = usaddress.parse(address) - parsed_house_number = [x for x in parsed if (x[1] == "AddressNumber")] - parsed_house_number = parsed_house_number[0][0] if parsed_house_number else None + # Fallback to 'AddressNumber' if no 'OccupancyIdentifier' is found + address_number = next((part for part, type_ in parsed if type_ == 'AddressNumber'), None) + if address_number: + return address_number.replace(",", "") # Remove any trailing commas - if parsed_house_number is None: - # Because usaddress isn't optimal for parsing addresses with some prefixes such as 'Flat', - # we also add a custom approach - - # Pattern to look for 'Flat' or 'Apartment' followed by a number, or just a number at the beginning + # Further fallback to custom regex (in case usaddress completely fails) pattern = r'(?i)(?:flat|apartment)\s*(\d+)|^\s*(\d+)' - match = re.search(pattern, address) - if match: - # Return the first non-None group found return next(g for g in match.groups() if g is not None) - else: - return None - # Remove training commas - parsed_house_number = parsed_house_number.replace(",", "") + except Exception as e: + print(f"Error parsing address: {e}") - return parsed_house_number + return None @staticmethod def extract_numeric_housenumber_part(house_number: str | None) -> int | None: diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py new file mode 100644 index 00000000..17db71b2 --- /dev/null +++ b/etl/customers/goldman/property_ownership.py @@ -0,0 +1,87 @@ +import pandas as pd +from tqdm import tqdm +from backend.SearchEpc import SearchEpc + + +def aggregate_matches(matching_lookup, company_ownership): + df = matching_lookup.merge(company_ownership, how="left", on="Title Number") + counts = ( + df.groupby(["Company Registration No. (1)", "Proprietor Name (1)"])["UPRN"] + .count() + .reset_index(name="number_of_properties") + ) + counts = counts.sort_values("number_of_properties", ascending=False) + + return counts + + +def app(): + """ + This script is for scoping property ownership for EPC F & G rated properties in Birmingam, for Goldman Sachs + """ + + properties = pd.read_excel("Birmingham EPC F & G Properties.xlsx") + company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_04.csv") + # FIlter on relevant postcodes + company_ownership = company_ownership[ + company_ownership["Postcode"].str.lower().isin(properties["POSTCODE"].str.lower().unique())] + + # Now we filter properties the other way around + properties = properties[properties["POSTCODE"].str.lower().isin(company_ownership["Postcode"].str.lower().unique())] + # We end up with 7.4k entires on a postcode match, however we need to now do a direct address match + + ignore_title_numbers = [ + "WM922695", # Land at the back of 17 Plumstead Road, Birmingham (B44 0EA): relates to WM154788 + "WM426374", # land on the south side of 15 Carlyle Road, Edgbaston, Birmingham (B16 9BH): relates to WM537591 + "WM44948", + ] + company_ownership = company_ownership[~company_ownership["Title Number"].isin(ignore_title_numbers)] + # Remove entries where the address begins with the term "land adjoining": + + company_ownership = company_ownership[~company_ownership["Property Address"].str.startswith("land adjoining")] + + freehold_matching_lookup = [] + leasehold_matching_lookup = [] + for _, address in tqdm(properties.iterrows(), total=len(properties)): + filtered = company_ownership[ + company_ownership["Postcode"].str.lower() == address["POSTCODE"].lower() + ].copy() + + filtered["house_number"] = filtered["Property Address"].apply(SearchEpc.get_house_number) + house_no = SearchEpc.get_house_number(address["ADDRESS1"]) + + filtered = filtered[filtered["house_number"] == house_no] + + if filtered.empty: + continue + + filtered_freehold = filtered[filtered["Tenure"] == "Freehold"] + filtered_leasehold = filtered[filtered["Tenure"] == "Leasehold"] + + if filtered_freehold.shape[0] > 1: + raise ValueError("Multiple freehold matches") + + if filtered_leasehold.shape[0] > 1: + raise ValueError("Multiple leasehold matches") + + if not filtered_leasehold.empty: + leasehold_matching_lookup.append( + { + "UPRN": address["UPRN"], + "Title Number": filtered_leasehold["Title Number"].values[0] + } + ) + + if not filtered_freehold.empty: + freehold_matching_lookup.append( + { + "UPRN": address["UPRN"], + "Title Number": filtered_freehold["Title Number"].values[0] + } + ) + + freehold_matching_lookup = pd.DataFrame(freehold_matching_lookup) + leasehold_matching_lookup = pd.DataFrame(leasehold_matching_lookup) + + freehold_aggregate = aggregate_matches(freehold_matching_lookup, company_ownership) + leasehold_aggregate = aggregate_matches(leasehold_matching_lookup, company_ownership) diff --git a/etl/customers/livewest/route_march.py b/etl/customers/livewest/route_march.py index 713ee56a..9e69fd43 100644 --- a/etl/customers/livewest/route_march.py +++ b/etl/customers/livewest/route_march.py @@ -22,9 +22,8 @@ def route_march_may_2024(): asset_list = read_excel_from_s3( bucket_name="retrofit-datalake-dev", file_key="customers/Livewest/Livewest proposed route march Apr-May 2024.xlsx", - header_row=1 + header_row=0 ) - asset_list = pd.read_excel("/Users/khalimconn-kowlessar/Downloads/Livewest proposed route march Apr-May 2024.xlsx") epc_data = [] for _, unit in tqdm(asset_list.iterrows(), total=len(asset_list)): From 76ef5c897a2471473058a39d765f55e452a82db5 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 2 May 2024 00:41:47 +0100 Subject: [PATCH 13/20] handling genuine dual leasehold ownership --- etl/customers/goldman/property_ownership.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py index 17db71b2..4a6faede 100644 --- a/etl/customers/goldman/property_ownership.py +++ b/etl/customers/goldman/property_ownership.py @@ -42,6 +42,7 @@ def app(): freehold_matching_lookup = [] leasehold_matching_lookup = [] + shared_leasehold_match = [] for _, address in tqdm(properties.iterrows(), total=len(properties)): filtered = company_ownership[ company_ownership["Postcode"].str.lower() == address["POSTCODE"].lower() @@ -62,9 +63,10 @@ def app(): raise ValueError("Multiple freehold matches") if filtered_leasehold.shape[0] > 1: - raise ValueError("Multiple leasehold matches") - - if not filtered_leasehold.empty: + matched = filtered_leasehold[["Title Number"]].copy() + matched.insert(0, "UPRN", address["UPRN"]) + shared_leasehold_match.append(matched) + elif not filtered_leasehold.empty: leasehold_matching_lookup.append( { "UPRN": address["UPRN"], From 5cb35e1d9eb3beec22d772293208fef09c18fbba Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 2 May 2024 18:33:25 +0100 Subject: [PATCH 14/20] working on property ownership pipeline --- backend/SearchEpc.py | 13 +- etl/customers/goldman/property_ownership.py | 369 ++++++++++++++++-- etl/customers/vander_elliot/__init__.py | 0 .../vander_elliot/single_property_pilot.py | 56 +++ recommendations/HeatingRecommender.py | 14 +- recommendations/Recommendations.py | 2 +- recommendations/SolarPvRecommendations.py | 2 +- 7 files changed, 418 insertions(+), 38 deletions(-) create mode 100644 etl/customers/vander_elliot/__init__.py create mode 100644 etl/customers/vander_elliot/single_property_pilot.py diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 06eea258..db9ec4ff 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -196,6 +196,13 @@ class SearchEpc: This method uses the usaddress library to parse an address and extract the primary house or flat number. """ try: + + # Custom regex to catch a broad range of cases + pattern = r'(?i)(?:flat|apartment)\s*(\d+)|^\s*(\d+)' + match = re.search(pattern, address) + if match: + return next(g for g in match.groups() if g is not None) + parsed = usaddress.parse(address) # First, try to get the 'OccupancyIdentifier' if 'OccupancyType' is detected for part, type_ in parsed: @@ -208,12 +215,6 @@ class SearchEpc: if address_number: return address_number.replace(",", "") # Remove any trailing commas - # Further fallback to custom regex (in case usaddress completely fails) - pattern = r'(?i)(?:flat|apartment)\s*(\d+)|^\s*(\d+)' - match = re.search(pattern, address) - if match: - return next(g for g in match.groups() if g is not None) - except Exception as e: print(f"Error parsing address: {e}") diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py index 4a6faede..abc2645d 100644 --- a/etl/customers/goldman/property_ownership.py +++ b/etl/customers/goldman/property_ownership.py @@ -1,27 +1,248 @@ +import re import pandas as pd from tqdm import tqdm +import Levenshtein from backend.SearchEpc import SearchEpc +# Average value of a property in the midlands in 2024 was £238,000. Since these are EPC F & G properties, we assume +# £207,000 since they trade at a discount. This is based on the rightmove study where moving from an EPC F/G -> C has a +# +15% impact on valuation and D -> C has a +3% impact on valuation. +# The mode EPC rating is D, so we associate the £238k valuation with an EPC D property +# Therefore value_of_F * 1.15 = value_of_D * 1.03 +# Therefore value_of_F = value_of_D * 1.03/1.15 = 238k * (1.03/1.15) = 213165 +PROPERTY_VALUE_ESTIMATE = 213_165 -def aggregate_matches(matching_lookup, company_ownership): - df = matching_lookup.merge(company_ownership, how="left", on="Title Number") + +def aggregate_matches(matching_lookup, company_ownership, properties): + df = matching_lookup.merge( + company_ownership, how="left", on="Title Number" + ).merge( + properties[["UPRN", "LOCAL_AUTHORITY_LABEL"]], how="left", on="UPRN" + ) counts = ( - df.groupby(["Company Registration No. (1)", "Proprietor Name (1)"])["UPRN"] + df.groupby(["Company Registration No. (1)", "Proprietor Name (1)", "LOCAL_AUTHORITY_LABEL"])["UPRN"] .count() .reset_index(name="number_of_properties") ) counts = counts.sort_values("number_of_properties", ascending=False) - return counts + pivot_counts = counts.pivot_table( + index=["Company Registration No. (1)", "Proprietor Name (1)"], # Rows: companies and proprietors + columns="LOCAL_AUTHORITY_LABEL", # Columns: each local authority + values="number_of_properties", # The counts of properties + fill_value=0 # Fill missing values with 0 (where there are no properties owned) + ).reset_index() + + total_counts = ( + df.groupby(["Company Registration No. (1)", "Proprietor Name (1)"])["UPRN"] + .count() + .reset_index(name="total_number_of_properties") + ) + + pivot_counts = pivot_counts.merge( + total_counts, how="left", on=["Company Registration No. (1)", "Proprietor Name (1)"] + ) + + pivot_counts = pivot_counts.sort_values("total_number_of_properties", ascending=False) + + pivot_counts["approx_value"] = PROPERTY_VALUE_ESTIMATE * pivot_counts["total_number_of_properties"] + pivot_counts["cumulative_value"] = pivot_counts["approx_value"].cumsum() + + return pivot_counts + + +def find_f_g_properties(paths): + data = [] + for path in tqdm(paths): + epc_data = pd.read_csv(path, low_memory=False) + + epc_data = epc_data[~pd.isnull(epc_data["UPRN"])] + epc_data["UPRN"] = epc_data["UPRN"].astype(int).astype(str) + + # Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this + epc_data["LODGEMENT_DATETIME"] = pd.to_datetime(epc_data["LODGEMENT_DATETIME"], format='mixed') + + epc_data = epc_data.sort_values("LODGEMENT_DATETIME", ascending=False).drop_duplicates("UPRN") + + # Get G & F properties + epc_data = epc_data[epc_data["CURRENT_ENERGY_RATING"].isin(["G", "F"])] + data.append(epc_data) + + data = pd.concat(data) + + # Save as an excel + data.to_excel("EPC F & G Properties.xlsx", index=False) + + +def remove_text_in_brackets(address: str) -> str: + """ + Removes any text within parentheses, including the parentheses themselves. + + Parameters: + - address (str): The address string to clean. + + Returns: + - str: The cleaned address with text in parentheses removed. + """ + # Regex to find and remove content in parentheses + cleaned_address = re.sub(r'\s*\([^)]*\)', '', address) + return cleaned_address + + +def extract_numeric_part(house_number: str) -> str: + """ + Extracts only the numeric part from a house number that may contain letters. + + Parameters: + - house_number (str): The house number string possibly containing letters. + + Returns: + - str: The numeric part of the house number. + """ + # Use regular expression to replace all non-digit characters with nothing + numeric_part = re.sub(r'\D', '', house_number) + return numeric_part + + +def levenstein_match(matching_string, df, address_col): + match_to = df[address_col].tolist() + # Strip out punctuation and spaces + match_to = [re.sub(r'[^\w\s]', '', x) for x in match_to] + match_to = [x.replace(" ", "") for x in match_to] + + # Perform matching between full key and match_to + distances = [Levenshtein.distance(matching_string, s) for s in match_to] + best_match_index = distances.index(min(distances)) + # We might want to consider a threshold for the distance, however for the momeny, + # we don't consider this for the moment + df = df.iloc[best_match_index:best_match_index + 1] + + return df + + +def extract_range_from_house_number(house_number_range: str): + """ + Detects if the house number includes a numeric range (formatted as 'x-y') and extracts all values within this range. + Non-numeric strings containing hyphens are ignored. + + Parameters: + - house_number_range (str): The house number string that might contain a range. + + Returns: + - list of str: A list of all numbers within the range if it is a range; otherwise, returns None. + """ + + if not house_number_range: + return None + + if '-' in house_number_range: + parts = house_number_range.split('-') + if len(parts) == 2 and parts[0].isdigit() and parts[1].isdigit(): + # Both parts are numeric, so it's a valid range + start, end = map(int, parts) # Convert parts to integers + return [str(x) for x in range(start, end + 1)] + else: + # Not a valid numeric range + return None + else: + # No hyphen present or not a range + return None + + +def is_in_range(row, house_no): + """ Check if the house number is within the range provided in the row. """ + if row and any(house_no == num for num in row): + return True + return False + + +def remove_duplicate_matches(matching_lookup, properties, company_ownership): + duplicated_titles = matching_lookup[matching_lookup["Title Number"].duplicated()]["Title Number"].unique() + + to_drop = [] + for dupe_title in duplicated_titles: + dupe_data = matching_lookup[matching_lookup["Title Number"] == dupe_title].copy() + matched_addresses = dupe_data.merge( + properties[["UPRN", "ADDRESS"]].rename(columns={"ADDRESS": "epc_address"}), + how="left", on="UPRN" + ).merge( + company_ownership[["Title Number", "Property Address"]], + how="left", on="Title Number" + ) + # We perform levenstein to get the best match + best_match = levenstein_match( + matching_string=matched_addresses["Property Address"].values[0], + df=matched_addresses, + address_col="epc_address" + ) + matches_to_drop = matched_addresses[ + ~matched_addresses["UPRN"].isin(best_match["UPRN"].values) + ] + + to_drop.append( + matches_to_drop[["UPRN", "Title Number"]].copy() + ) + + to_drop = pd.concat(to_drop) + + if not to_drop.empty: + merged = pd.merge(matching_lookup, to_drop, on=['UPRN', 'Title Number'], how='left', indicator=True) + merged[merged['_merge'] == 'left_only'].drop(columns=['_merge']) + + return merged + + return matching_lookup def app(): """ This script is for scoping property ownership for EPC F & G rated properties in Birmingam, for Goldman Sachs """ + # paths = [ + # "local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E08000031-Wolverhampton/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E08000026-Coventry/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E06000016-Leicester/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E06000015-Derby/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E06000021-Stoke-on-Trent/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E06000018-Nottingham/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E07000154-Northampton/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E06000061-North-Northamptonshire/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E06000062-West-Northamptonshire/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E07000152-East-Northamptonshire/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E07000155-South-Northamptonshire/certificates.csv", + # # + # "local_data/all-domestic-certificates/domestic-E08000027-Dudley/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E08000029-Solihull/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E07000234-Bromsgrove/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E08000030-Walsall/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E08000028-Sandwell/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E06000019-Herefordshire-County-of/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E06000020-Telford-and-Wrekin/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E07000218-North-Warwickshire/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E07000222-Warwick/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E07000237-Worcester/certificates.csv", + # # East midlands + # "local_data/all-domestic-certificates/domestic-E07000035-Derbyshire-Dales/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E07000038-North-East-Derbyshire/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E07000039-South-Derbyshire/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E06000012-North-East-Lincolnshire/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E06000013-North-Lincolnshire/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E07000138-Lincoln/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E07000134-North-West-Leicestershire/certificates.csv", + # "local_data/all-domestic-certificates/domestic-E06000017-Rutland/certificates.csv", + # ] + # paths = list(set(paths)) + # find_f_g_properties(paths) - properties = pd.read_excel("Birmingham EPC F & G Properties.xlsx") + properties = pd.read_excel("EPC F & G Properties.xlsx") company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_04.csv") + company_ownership["is_overseas"] = False + overseas_company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/OCOD_FULL_2024_04 2.csv") + overseas_company_ownership["is_overseas"] = True + + company_ownership = pd.concat([company_ownership, overseas_company_ownership]) + # FIlter on relevant postcodes company_ownership = company_ownership[ company_ownership["Postcode"].str.lower().isin(properties["POSTCODE"].str.lower().unique())] @@ -29,6 +250,10 @@ def app(): # Now we filter properties the other way around properties = properties[properties["POSTCODE"].str.lower().isin(company_ownership["Postcode"].str.lower().unique())] # We end up with 7.4k entires on a postcode match, however we need to now do a direct address match + # Take just private rentals + properties = properties[ + properties["TENURE"].isin(["rental (private)", "Rented (private)", "owner-occupied", "Owner-occupied"]) + ] ignore_title_numbers = [ "WM922695", # Land at the back of 17 Plumstead Road, Birmingham (B44 0EA): relates to WM154788 @@ -36,22 +261,78 @@ def app(): "WM44948", ] company_ownership = company_ownership[~company_ownership["Title Number"].isin(ignore_title_numbers)] - # Remove entries where the address begins with the term "land adjoining": - company_ownership = company_ownership[~company_ownership["Property Address"].str.startswith("land adjoining")] + # Remove entries where the address begins with the term "land adjoining", or other records that don't reference the + # the property itself + starting_terms = [ + "land adjoining", "land on the", "land to the rear of", "land and buildings on the", + "garage adjoining", "car park adjoining", "the land adjoining", "land and buildings adjoining", + "all royal mines" + ] + for starting_term in starting_terms: + company_ownership = company_ownership[ + ~company_ownership["Property Address"].str.lower().str.startswith() + ] - freehold_matching_lookup = [] - leasehold_matching_lookup = [] + biggest_ownership = ( + company_ownership + .groupby(["Company Registration No. (1)", "Proprietor Name (1)"])["Title Number"] + .count() + .reset_index(name="n_owned_properties") + ) + biggest_ownership = biggest_ownership.sort_values("n_owned_properties", ascending=False) + + freehold_matching_lookup = [] # 634 + leasehold_matching_lookup = [] # 86 shared_leasehold_match = [] + shared_freehold_match = [] for _, address in tqdm(properties.iterrows(), total=len(properties)): + match_type = "exact" filtered = company_ownership[ company_ownership["Postcode"].str.lower() == address["POSTCODE"].lower() ].copy() - filtered["house_number"] = filtered["Property Address"].apply(SearchEpc.get_house_number) + # Remove postcode and remove trailing commas + filtered["house_number"] = ( + filtered["Property Address"] + .apply(remove_text_in_brackets) + .apply(SearchEpc.get_house_number) + .str.lower() + .str.replace(",", "") + ) house_no = SearchEpc.get_house_number(address["ADDRESS1"]) + if house_no is not None: + house_no = house_no.replace(",", "") - filtered = filtered[filtered["house_number"] == house_no] + if house_no is None: + # It's hard for us to get a reliable match + # filtered = filtered[filtered["Property Address"].str.contains(address["ADDRESS1"])] + # if filtered.shape[0] > 1: + # raise Exception("No valid - maybe we should do levenstein?") + continue + + else: + + if house_no not in filtered["house_number"].values: + # If this happens, we check house_number for a x-y range of addresses + filtered["house_number_range"] = filtered["house_number"].apply(extract_range_from_house_number) + # If we have found a house number range, we check if the house number is in the range and if not, + # we drop the row + filtered['is_in_range'] = filtered['house_number_range'].apply(lambda x: is_in_range(x, house_no)) + + if filtered['is_in_range'].any(): + # If house_no is found in any range, keep only rows where it is in range + filtered = filtered[filtered['is_in_range']] + else: + # If house_no is not found in any range, filter out rows where 'house_number_range' is not None + filtered = filtered[filtered['house_number_range'].isnull()] + + # Strip out letters from house_no and house_number + house_no = extract_numeric_part(house_no) + filtered["house_number"] = filtered["house_number"].astype(str).apply(extract_numeric_part) + match_type = "approximate" + + filtered = filtered[filtered["house_number"] == house_no] if filtered.empty: continue @@ -60,7 +341,17 @@ def app(): filtered_leasehold = filtered[filtered["Tenure"] == "Leasehold"] if filtered_freehold.shape[0] > 1: - raise ValueError("Multiple freehold matches") + matched = filtered_leasehold[["Title Number"]].copy() + matched.insert(0, "UPRN", address["UPRN"]) + shared_freehold_match.append(matched) + elif not filtered_freehold.empty: + freehold_matching_lookup.append( + { + "UPRN": address["UPRN"], + "Title Number": filtered_freehold["Title Number"].values[0], + "match_type": match_type, + } + ) if filtered_leasehold.shape[0] > 1: matched = filtered_leasehold[["Title Number"]].copy() @@ -70,20 +361,52 @@ def app(): leasehold_matching_lookup.append( { "UPRN": address["UPRN"], - "Title Number": filtered_leasehold["Title Number"].values[0] - } - ) - - if not filtered_freehold.empty: - freehold_matching_lookup.append( - { - "UPRN": address["UPRN"], - "Title Number": filtered_freehold["Title Number"].values[0] + "Title Number": filtered_leasehold["Title Number"].values[0], + "match_type": match_type, } ) freehold_matching_lookup = pd.DataFrame(freehold_matching_lookup) leasehold_matching_lookup = pd.DataFrame(leasehold_matching_lookup) + shared_leasehold_match = pd.concat(shared_leasehold_match) - freehold_aggregate = aggregate_matches(freehold_matching_lookup, company_ownership) - leasehold_aggregate = aggregate_matches(leasehold_matching_lookup, company_ownership) + # The approximate matches aren't very good + freehold_matching_lookup = freehold_matching_lookup[freehold_matching_lookup["match_type"] == "exact"] + leasehold_matching_lookup = leasehold_matching_lookup[leasehold_matching_lookup["match_type"] == "exact"] + + # There are some cases where we have duplicates + freehold_matching_lookup = remove_duplicate_matches(freehold_matching_lookup, properties, company_ownership) + leasehold_matching_lookup = remove_duplicate_matches(leasehold_matching_lookup, properties, company_ownership) + + matched_addresses = freehold_matching_lookup.merge( + properties[["UPRN", "ADDRESS"]].rename(columns={"ADDRESS": "epc_address"}), + how="left", on="UPRN" + ).merge( + company_ownership[["Title Number", "Property Address"]], + how="left", on="Title Number" + ) + + # shared_freehold_match = pd.DataFrame(shared_freehold_match) + # Strore these files + freehold_matching_lookup.to_excel("freehold_matching_lookup.xlsx") + leasehold_matching_lookup.to_excel("leasehold_matching_lookup.xlsx") + shared_leasehold_match.to_excel("shared_leasehold_match.xlsx") + # shared_freehold_match.to_excel("shared_freehold_match.xlsx") + + freehold_aggregate = aggregate_matches(freehold_matching_lookup, company_ownership, properties) + leasehold_aggregate = aggregate_matches(leasehold_matching_lookup, company_ownership, properties) + + combined_aggregate = aggregate_matches( + pd.concat([freehold_matching_lookup, leasehold_matching_lookup]), company_ownership, properties + ) + + investment_20m = combined_aggregate[combined_aggregate["cumulative_value"] <= 20_500_000] + investment_50m = combined_aggregate[combined_aggregate["cumulative_value"] <= 51_000_000] + + z = company_ownership[ + (company_ownership["Company Registration No. (1)"] == freehold_aggregate["Company Registration No. (1)"].values[ + 0]) & + (company_ownership["Title Number"].isin(freehold_matching_lookup["Title Number"].values)) + ] + + df = freehold_matching_lookup.merge(company_ownership, how="left", on="Title Number") diff --git a/etl/customers/vander_elliot/__init__.py b/etl/customers/vander_elliot/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/etl/customers/vander_elliot/single_property_pilot.py b/etl/customers/vander_elliot/single_property_pilot.py new file mode 100644 index 00000000..99624dfc --- /dev/null +++ b/etl/customers/vander_elliot/single_property_pilot.py @@ -0,0 +1,56 @@ +import pandas as pd +from utils.s3 import read_excel_from_s3 +from utils.s3 import save_csv_to_s3 + +PORTFOLIO_ID = 77 +USER_ID = 8 + +patches = [ + { + "address": "79 Perryn Road", + "postcode": "W3 7LT", + "roof-description": "Pitched, no insulation (assumed)" + } +] + + +def app(): + asset_list = [ + { + 'uprn': 12103117, + "address": "79 Perryn Road", + "postcode": "W3 7LT", + }, + + ] + + asset_list = pd.DataFrame(asset_list) + + # Store the asset list in s3 + filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv" + save_csv_to_s3( + dataframe=asset_list, + bucket_name="retrofit-plan-inputs-dev", + file_name=filename + ) + + # Store patches in s3 + patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.json" + save_csv_to_s3( + dataframe=pd.DataFrame(patches), + bucket_name="retrofit-plan-inputs-dev", + file_name=patches_filename + ) + + body = { + "portfolio_id": str(PORTFOLIO_ID), + "housing_type": "Private", + "goal": "Increase EPC", + "goal_value": "B", + "trigger_file_path": filename, + "already_installed_file_path": "", + "patches_file_path": patches_filename, + "non_invasive_recommendations_file_path": "", + "budget": None, + } + print(body) diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index b197d817..b42a9d5b 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -93,13 +93,13 @@ class HeatingRecommender: # In the future, we'll allow overrides, so that non-intrusive surveys can contradict these conditions # and either allow or prevent the recommendation of an air source heat pump - suitable_property_types = self.property.data["property-type"] in ["House", "Bungalow"] - has_air_source_heat_pump = self.property.main_heating["has_air_source_heat_pump"] - - if suitable_property_types and not has_air_source_heat_pump: - self.recommend_air_source_heat_pump( - phase=phase, has_cavity_and_loft_recommendations=has_cavity_and_loft_recommendations - ) + # suitable_property_types = self.property.data["property-type"] in ["House", "Bungalow"] + # has_air_source_heat_pump = self.property.main_heating["has_air_source_heat_pump"] + # + # if suitable_property_types and not has_air_source_heat_pump: + # self.recommend_air_source_heat_pump( + # phase=phase, has_cavity_and_loft_recommendations=has_cavity_and_loft_recommendations + # ) return diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index 06dc2d61..1a6d7a1c 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -109,7 +109,7 @@ class Recommendations: # Heating and Electical systems if "heating" not in self.exclusions: - self.heating_recommender.recommend(phase=phase) + self.heating_recommender.recommend(phase=phase, has_cavity_and_loft_recommendations=None) if ( self.heating_recommender.heating_recommendations or self.heating_recommender.heating_control_recommendations diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py index b44557ab..58d4b123 100644 --- a/recommendations/SolarPvRecommendations.py +++ b/recommendations/SolarPvRecommendations.py @@ -44,7 +44,7 @@ class SolarPvRecommendations: :return: """ - is_valid_property_type = self.property.data["property-type"] in ["House", "Bungalow"] + is_valid_property_type = self.property.data["property-type"] in ["House", "Bungalow", "Maisonette"] is_valid_roof_type = ( self.property.roof["is_flat"] or self.property.roof["is_pitched"] or self.property.roof["is_roof_room"] ) From 9f9799cfa8a65a2714a91bd47a68dc57538758d0 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 3 May 2024 13:35:49 +0100 Subject: [PATCH 15/20] finishing property ownership| --- etl/customers/goldman/property_ownership.py | 27 +++------------------ 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py index abc2645d..89e7c976 100644 --- a/etl/customers/goldman/property_ownership.py +++ b/etl/customers/goldman/property_ownership.py @@ -255,13 +255,6 @@ def app(): properties["TENURE"].isin(["rental (private)", "Rented (private)", "owner-occupied", "Owner-occupied"]) ] - ignore_title_numbers = [ - "WM922695", # Land at the back of 17 Plumstead Road, Birmingham (B44 0EA): relates to WM154788 - "WM426374", # land on the south side of 15 Carlyle Road, Edgbaston, Birmingham (B16 9BH): relates to WM537591 - "WM44948", - ] - company_ownership = company_ownership[~company_ownership["Title Number"].isin(ignore_title_numbers)] - # Remove entries where the address begins with the term "land adjoining", or other records that don't reference the # the property itself starting_terms = [ @@ -271,17 +264,9 @@ def app(): ] for starting_term in starting_terms: company_ownership = company_ownership[ - ~company_ownership["Property Address"].str.lower().str.startswith() + ~company_ownership["Property Address"].str.lower().str.startswith(starting_term) ] - biggest_ownership = ( - company_ownership - .groupby(["Company Registration No. (1)", "Proprietor Name (1)"])["Title Number"] - .count() - .reset_index(name="n_owned_properties") - ) - biggest_ownership = biggest_ownership.sort_values("n_owned_properties", ascending=False) - freehold_matching_lookup = [] # 634 leasehold_matching_lookup = [] # 86 shared_leasehold_match = [] @@ -400,13 +385,9 @@ def app(): pd.concat([freehold_matching_lookup, leasehold_matching_lookup]), company_ownership, properties ) + df = pd.concat([freehold_matching_lookup, leasehold_matching_lookup]) + investment_20m = combined_aggregate[combined_aggregate["cumulative_value"] <= 20_500_000] investment_50m = combined_aggregate[combined_aggregate["cumulative_value"] <= 51_000_000] - z = company_ownership[ - (company_ownership["Company Registration No. (1)"] == freehold_aggregate["Company Registration No. (1)"].values[ - 0]) & - (company_ownership["Title Number"].isin(freehold_matching_lookup["Title Number"].values)) - ] - - df = freehold_matching_lookup.merge(company_ownership, how="left", on="Title Number") + properties["WALLS_DESCRIPTION"].value_counts(normalize=True) From 7ec795f5bb247d5a441501e64b5e4a9b61a0d53d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 3 May 2024 15:46:03 +0100 Subject: [PATCH 16/20] completing ashp recommendations --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- recommendations/Costs.py | 48 ++++++++++++++++++--------- recommendations/HeatingRecommender.py | 30 +++++++++-------- recommendations/Recommendations.py | 11 +++++- 5 files changed, 60 insertions(+), 33 deletions(-) diff --git a/.idea/Model.iml b/.idea/Model.iml index b0f9c00d..4413bb06 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 1122b380..6f308057 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/recommendations/Costs.py b/recommendations/Costs.py index 113bb6f8..fd3c1692 100644 --- a/recommendations/Costs.py +++ b/recommendations/Costs.py @@ -37,21 +37,22 @@ MCS_SOLAR_PV_COST_DATA = { "average_cost_per_kwh-Northern Ireland": 2126.09, } -# This data is based on the MCS database +# This data is based on the MCS database, We use the larger figure between the 2023 and 2024 average, +# to be conservative MCS_AIR_SOURCE_HEAT_PUMP_COST_DATA = { - "Outer London": None, - "Inner London": None, - "South East England": None, - "South West England": None, - "East of England": None, - "East Midlands": None, - "West Midlands": None, - "North East England": None, - "North West England": None, - "Yorkshire and the Humber": None, - "Wales": None, - "Scotland": None, - "Northern Ireland": None, + "Outer London": 13220, + "Inner London": 13220, + "South East England": 13547, + "South West England": 12776, + "East of England": 12585, + "East Midlands": 12239, + "West Midlands": 13182, + "North East England": 11829, + "North West England": 11714, + "Yorkshire and the Humber": 11919, + "Wales": 13701, + "Scotland": 12586, + "Northern Ireland": 12000, # There are hardly any air source heat pump installs going on in Northern Ireland } BOILER_UPGRADE_SCHEME_ASHP_VALUE = 7500 @@ -1264,8 +1265,23 @@ class Costs: Based on the region and type of property, this function will produce a cost estimation for an air source heat pump. This cost will include the boiler upgrade scheme grant - :return: """ + # This is the average cost of a project, we'll add some additional contingency regional_cost = MCS_AIR_SOURCE_HEAT_PUMP_COST_DATA[self.region] - pass + + total_cost = regional_cost * (1 + self.CONTINGENCY) - BOILER_UPGRADE_SCHEME_ASHP_VALUE + subtotal_before_vat = total_cost / (1 + self.VAT_RATE) + vat = total_cost - subtotal_before_vat + + # We assume 3 days installation + labour_days = 3 + labour_hours = labour_days * 8 + + return { + "total": total_cost, + "subtotal": subtotal_before_vat, + "vat": vat, + "labour_hours": labour_hours, + "labour_days": labour_days, + } diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index b42a9d5b..a51803f2 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -16,10 +16,10 @@ class HeatingRecommender: self.heating_recommendations = [] self.heating_control_recommendations = [] - def recommend(self, has_cavity_and_loft_recommendations, phase=0): + def recommend(self, has_cavity_or_loft_recommendations, phase=0): """ Produces heating recommendations - :param has_cavity_and_loft_recommendations: boolean indicating if we have produced a cavity or loft insulation + :param has_cavity_or_loft_recommendations: boolean indicating if we have produced a cavity or loft insulation recommendation. If there are cavity or loft recommendations, the property would need to complete those measures before being able to get the boiler upgrade scheme benefits. The messaging in the front end would be to :param phase: indicates the phase of the retrofit programme @@ -93,17 +93,17 @@ class HeatingRecommender: # In the future, we'll allow overrides, so that non-intrusive surveys can contradict these conditions # and either allow or prevent the recommendation of an air source heat pump - # suitable_property_types = self.property.data["property-type"] in ["House", "Bungalow"] - # has_air_source_heat_pump = self.property.main_heating["has_air_source_heat_pump"] - # - # if suitable_property_types and not has_air_source_heat_pump: - # self.recommend_air_source_heat_pump( - # phase=phase, has_cavity_and_loft_recommendations=has_cavity_and_loft_recommendations - # ) + suitable_property_type = self.property.data["property-type"] in ["House", "Bungalow"] + has_air_source_heat_pump = self.property.main_heating["has_air_source_heat_pump"] + + if suitable_property_type and not has_air_source_heat_pump: + self.recommend_air_source_heat_pump( + phase=phase, has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations + ) return - def recommend_air_source_heat_pump(self, phase, has_cavity_and_loft_recommendations): + def recommend_air_source_heat_pump(self, phase, has_cavity_or_loft_recommendations): """ This method will implement the recommendation for an air source heat pump This is ultimately an overhaul to the heating system and so is recommended as an alternative to other @@ -127,20 +127,20 @@ class HeatingRecommender: else: if controls_recommender.recommendation: description = ("Install an air source heat pump, and upgrade heating controls to Smart Thermostats, " - "room sensors and smart radiator valves (time & temperature zone control) ") + "room sensors and smart radiator valves (time & temperature zone control).") else: description = "Install an air source heat pump." # If the property does not have existing cavity and loft insulation, we include a note that the cost # includes the boiler upgrade scheme and that the cavity and loft need to be treated, to ensure access # to the funding - if has_cavity_and_loft_recommendations: - description = description + (f" The cost of works includes the £" + if has_cavity_or_loft_recommendations: + description = description + (f" The cost includes the £" f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant. " f"You must ensure that the property has an insulated cavity and " f"270mm+ loft insulation to qualify for the grant") else: - description = description + (f" The cost of works includes the £" + description = description + (f" The cost includes the £" f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant") simulation_config = { @@ -178,6 +178,8 @@ class HeatingRecommender: if controls_recommender.recommendation: # We should have just the single recommendation for heat controls, which is time # and temperature zone controls + if len(controls_recommender.recommendation) != 1: + raise NotImplementedError("More than one heat controls recommendation for air source heat pump") simulation_config = { **simulation_config, **controls_recommender.recommendation[0]["simulation_config"] diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index 1a6d7a1c..0942ab12 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -109,7 +109,16 @@ class Recommendations: # Heating and Electical systems if "heating" not in self.exclusions: - self.heating_recommender.recommend(phase=phase, has_cavity_and_loft_recommendations=None) + + cavity_or_loft_recommendations = [ + r for r in self.wall_recomender.recommendations + self.roof_recommender.recommendations + if r["type"] in ["cavity_wall_insulation", "loft_insulation"] + ] + has_cavity_or_loft_recommendations = len(cavity_or_loft_recommendations) > 0 + + self.heating_recommender.recommend( + phase=phase, has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations + ) if ( self.heating_recommender.heating_recommendations or self.heating_recommender.heating_control_recommendations From f21221d721049444c82bce084199421aab19ce23 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 3 May 2024 16:08:14 +0100 Subject: [PATCH 17/20] working on ashp recommendations --- recommendations/Recommendations.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index 0942ab12..c8113cdc 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -123,11 +123,28 @@ class Recommendations: self.heating_recommender.heating_recommendations or self.heating_recommender.heating_control_recommendations ): - if self.heating_recommender.heating_recommendations: - property_recommendations.append(self.heating_recommender.heating_recommendations) - if self.heating_recommender.heating_control_recommendations: - property_recommendations.append(self.heating_recommender.heating_control_recommendations) + # We split into first and second phase recommendations + first_phase_recommendations = [ + r for r in ( + self.heating_recommender.heating_recommendations + + self.heating_recommender.heating_control_recommendations + ) + if r["phase"] == phase + ] + second_phase_recommendations = [ + r for r in ( + self.heating_recommender.heating_recommendations + + self.heating_recommender.heating_control_recommendations + ) + if r["phase"] == phase + 1 + ] + + if first_phase_recommendations: + property_recommendations.append(first_phase_recommendations) + + if second_phase_recommendations: + property_recommendations.append(second_phase_recommendations) # We check if we have distinct heating and heating controls recommendations # If so, we increment by 2 (one of the heating system, one for the heating controls) From 56472f201e9dee48d8fa31b9dced73acc7fcc37d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 7 May 2024 16:56:14 +0100 Subject: [PATCH 18/20] Added ashp unit tests --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- etl/customers/goldman/property_ownership.py | 14 + .../tests/test_air_source_heat_pump.py | 867 ++++++++++++++++++ 4 files changed, 883 insertions(+), 2 deletions(-) diff --git a/.idea/Model.iml b/.idea/Model.iml index 4413bb06..b0f9c00d 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 6f308057..1122b380 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py index 89e7c976..24922f68 100644 --- a/etl/customers/goldman/property_ownership.py +++ b/etl/customers/goldman/property_ownership.py @@ -391,3 +391,17 @@ def app(): investment_50m = combined_aggregate[combined_aggregate["cumulative_value"] <= 51_000_000] properties["WALLS_DESCRIPTION"].value_counts(normalize=True) + + +def company_aggregation(): + company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_04.csv") + aggregation = ( + company_ownership + .groupby(["Proprietor Name (1)", "Company Registration No. (1)"]) + ["Property Address"] + .count() + .reset_index(name="Number of Properties") + ) + aggregation = aggregation.sort_values("Number of Properties", ascending=False) + + aggregation.to_excel("Company ownership aggregation.xlsx") diff --git a/recommendations/tests/test_air_source_heat_pump.py b/recommendations/tests/test_air_source_heat_pump.py index d80afc6e..0d69b10d 100644 --- a/recommendations/tests/test_air_source_heat_pump.py +++ b/recommendations/tests/test_air_source_heat_pump.py @@ -1,6 +1,154 @@ +import pandas as pd +import msgpack +from datetime import datetime + +from utils.s3 import read_dataframe_from_s3_parquet, read_from_s3 from backend.Property import Property from recommendations.HeatingRecommender import HeatingRecommender +from recommendations.Recommendations import Recommendations from etl.epc.Record import EPCRecord +from etl.solar.SolarPhotoSupply import SolarPhotoSupply +from backend.ml_models.api import ModelApi + + +def find_examples(): + """ Some scrappy helper code to find EPC examples""" + # Let's look for some testing data, where the only thing different pre and post is the installation of an + # air source heat pump + data = read_dataframe_from_s3_parquet( + bucket_name="retrofit-data-dev", + file_key="sap_change_model/2024-03-24-15-51-13/dataset_no_cleaning.parquet" + ) + + # Firstly, take records where before there was no air source heat pump and afterwards there was + data = data[ + data["has_air_source_heat_pump_ending"] & ~data["has_air_source_heat_pump"] + ] + + # Start with a property that has a boiler + data = data[data["has_boiler"]] + + static_columns = [ + # Walls + 'walls_thermal_transmittance_ending', + 'is_filled_cavity_ending', + 'is_park_home_ending', + 'walls_insulation_thickness_ending', + 'external_insulation_ending', + 'internal_insulation_ending', + # Floors + # 'floor_thermal_transmittance_ending', # Don't subset on this, because it changes based on floor area + 'floor_insulation_thickness_ending', + # Roof + 'roof_thermal_transmittance_ending', + 'is_at_rafters_ending', + 'roof_insulation_thickness_ending', + # Hot water - air source heat pump will shange the hot water system (probably from whatever it was -> main) + # 'heater_type_ending', + # 'system_type_ending', + # 'thermostat_characteristics_ending', + # 'heating_scope_ending', + # 'energy_recovery_ending', + # 'hotwater_tariff_type_ending', + # 'extra_features_ending', + # 'chp_systems_ending', + # 'distribution_system_ending', + # 'no_system_present_ending', + # 'appliance_ending', + # Heating - Will change when installing an ASHP + # 'has_radiators_ending', + # 'has_fan_coil_units_ending', + # 'has_pipes_in_screed_above_insulation_ending', + # 'has_pipes_in_insulated_timber_floor_ending', + # 'has_pipes_in_concrete_slab_ending', + # 'has_boiler_ending', + # 'has_air_source_heat_pump_ending', # We want the air source heat pump to change + # 'has_room_heaters_ending', + # 'has_electric_storage_heaters_ending', + # 'has_warm_air_ending', + # 'has_electric_underfloor_heating_ending', + # 'has_electric_ceiling_heating_ending', + # 'has_community_scheme_ending', + # 'has_ground_source_heat_pump_ending', + # 'has_no_system_present_ending', + # 'has_portable_electric_heaters_ending', + # 'has_water_source_heat_pump_ending', + # 'has_electric_heat_pump_ending', + # 'has_micro-cogeneration_ending', + # 'has_solar_assisted_heat_pump_ending', + # 'has_exhaust_source_heat_pump_ending', + # 'has_community_heat_pump_ending', + # 'has_electric_ending', + # 'has_mains_gas_ending', + # 'has_wood_logs_ending', 'has_coal_ending', 'has_oil_ending', + # 'has_wood_pellets_ending', 'has_anthracite_ending', 'has_dual_fuel_mineral_and_wood_ending', + # 'has_smokeless_fuel_ending', 'has_lpg_ending', 'has_b30k_ending', 'has_electricaire_ending', + # 'has_assumed_for_most_rooms_ending', 'has_underfloor_heating_ending', + # 'thermostatic_control_ending', + # 'charging_system_ending', + # 'switch_system_ending', + # 'no_control_ending', + # 'dhw_control_ending', + # 'community_heating_ending', + # 'multiple_room_thermostats_ending', + # 'auxiliary_systems_ending', + # 'trvs_ending', + # 'rate_control_ending', + # Window + 'glazing_type_ending', + # Fuel - could change with ASHP + # 'fuel_type_ending', + # 'main-fuel_tariff_type_ending', + # 'is_community_ending', + # 'no_individual_heating_or_community_network_ending', + # 'complex_fuel_type_ending', + + 'mechanical_ventilation_ending', 'secondheat_description_ending', 'glazed_type_ending', + 'multi_glaze_proportion_ending', 'low_energy_lighting_ending', 'number_open_fireplaces_ending', + 'solar_water_heating_flag_ending', + 'photo_supply_ending', + 'energy_tariff_ending', + 'extension_count_ending', + 'total_floor_area_ending', + # 'hot_water_energy_eff_ending', + 'floor_energy_eff_ending', + 'windows_energy_eff_ending', + 'walls_energy_eff_ending', + 'sheating_energy_eff_ending', + 'roof_energy_eff_ending', + # 'mainheat_energy_eff_ending', + # 'mainheatc_energy_eff_ending', + 'lighting_energy_eff_ending', + 'number_habitable_rooms_ending', + 'number_heated_rooms_ending', + ] + + for col in static_columns: + + base_starting = col.split("_ending")[0] + if base_starting + "_starting" in data.columns: + starting_col = base_starting + "_starting" + else: + starting_col = base_starting + # Filter + print("Column: %s" % col) + print("Starting size: %s" % data.shape[0]) + data = data[data[starting_col] == data[col]] + print("Ending size: %s" % data.shape[0]) + + z = data[['uprn', col, starting_col]] + + # Great example UPRNs + # 100030969273 + # 10034685399 - Completely transforms the heating and hot water systems in the home (goes from oil -> electricity) + # 100091200828 - goes from a liquid petroleum gas boiler to ashp + + # Look for starting with a gas boiler + data[ + data["has_boiler"] & data["has_radiators"] & data["has_mains_gas"] & ~data["has_boiler_ending"] + ] + + # UPRN: 100011776843 class TestAirSourceHeatPump: @@ -75,3 +223,722 @@ class TestAirSourceHeatPump: recommender.recommend(phase=0) assert recommender.recommendation is None + + def test_air_source_heat_pump_gas_boiler_starting(self): + starting_epc = { + 'low-energy-fixed-light-count': '', 'address': '430 Gidlow Lane', 'uprn-source': 'Energy Assessor', + 'floor-height': '2.62', 'heating-cost-potential': '599', 'unheated-corridor-length': '', + 'hot-water-cost-potential': '67', 'construction-age-band': 'England and Wales: 1950-1966', + 'potential-energy-rating': 'C', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Good', + 'lighting-energy-eff': 'Very Good', 'environment-impact-potential': '72', + 'glazed-type': 'double glazing installed during or after 2002', 'heating-cost-current': '913', + 'address3': '', 'mainheatcont-description': 'Programmer, no room thermostat', 'sheating-energy-eff': 'N/A', + 'property-type': 'House', 'local-authority-label': 'Wigan', 'fixed-lighting-outlets-count': '9', + 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '210', + 'county': '', 'postcode': 'WN6 8RG', 'solar-water-heating-flag': 'N', 'constituency': 'E14001039', + 'co2-emissions-potential': '2.6', 'number-heated-rooms': '4', + 'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '180', + 'local-authority': 'E08000010', 'built-form': 'Mid-Terrace', 'number-open-fireplaces': '0', + 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2022-02-15', + 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '78', 'address1': '430 Gidlow Lane', + 'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Wigan', + 'roof-energy-eff': 'Very Poor', 'total-floor-area': '80.0', 'building-reference-number': '10002334112', + 'environment-impact-current': '38', 'co2-emissions-current': '6.2', + 'roof-description': 'Pitched, no insulation (assumed)', 'floor-energy-eff': 'N/A', + 'number-habitable-rooms': '4', 'address2': '', 'hot-water-env-eff': 'Poor', 'posttown': 'WIGAN', + 'mainheatc-energy-eff': 'Very Poor', 'main-fuel': 'mains gas (not community)', + 'lighting-env-eff': 'Very Good', 'windows-energy-eff': 'Good', 'floor-env-eff': 'N/A', + 'sheating-env-eff': 'N/A', 'lighting-description': 'Low energy lighting in all fixed outlets', + 'roof-env-eff': 'Very Poor', 'walls-energy-eff': 'Average', 'photo-supply': '0.0', + 'lighting-cost-potential': '67', 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', + 'main-heating-controls': '', 'lodgement-datetime': '2022-02-23 16:39:41', 'flat-top-storey': '', + 'current-energy-rating': 'E', 'secondheat-description': 'Room heaters, mains gas', + 'walls-env-eff': 'Average', 'transaction-type': 'ECO assessment', 'uprn': '100011776843', + 'current-energy-efficiency': '45', 'energy-consumption-current': '441', + 'mainheat-description': 'Boiler and radiators, mains gas', 'lighting-cost-current': '67', + 'lodgement-date': '2022-02-23', 'extension-count': '1', 'mainheatc-env-eff': 'Very Poor', + 'lmk-key': '46cb404438a6d88ddff8965cab8b3027ec15c32d93e0b6a5f0381a5109b9bb0d', 'wind-turbine-count': '0', + 'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '77', + 'hot-water-energy-eff': 'Poor', 'low-energy-lighting': '100', + 'walls-description': 'Cavity wall, filled cavity', + 'hotwater-description': 'From main system, no cylinder thermostat' + } + + ending_epc = { + 'low-energy-fixed-light-count': '', 'address': '430 Gidlow Lane', 'uprn-source': 'Energy Assessor', + 'floor-height': '2.62', 'heating-cost-potential': '803', 'unheated-corridor-length': '', + 'hot-water-cost-potential': '292', 'construction-age-band': 'England and Wales: 1950-1966', + 'potential-energy-rating': 'C', 'mainheat-energy-eff': 'Very Good', 'windows-env-eff': 'Good', + 'lighting-energy-eff': 'Very Good', 'environment-impact-potential': '78', + 'glazed-type': 'double glazing installed during or after 2002', 'heating-cost-current': '861', + 'address3': '', 'mainheatcont-description': 'Time and temperature zone control', + 'sheating-energy-eff': 'N/A', 'property-type': 'House', 'local-authority-label': 'Wigan', + 'fixed-lighting-outlets-count': '9', 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', + 'hot-water-cost-current': '434', 'county': '', 'postcode': 'WN6 8RG', 'solar-water-heating-flag': 'N', + 'constituency': 'E14001039', 'co2-emissions-potential': '2.0', 'number-heated-rooms': '4', + 'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '147', + 'local-authority': 'E08000010', 'built-form': 'Mid-Terrace', 'number-open-fireplaces': '0', + 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2022-05-11', + 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '43', 'address1': '430 Gidlow Lane', + 'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Wigan', + 'roof-energy-eff': 'Very Poor', 'total-floor-area': '80.0', 'building-reference-number': '10002334112', + 'environment-impact-current': '63', 'co2-emissions-current': '3.4', + 'roof-description': 'Pitched, no insulation (assumed)', 'floor-energy-eff': 'N/A', + 'number-habitable-rooms': '4', 'address2': '', 'hot-water-env-eff': 'Poor', 'posttown': 'WIGAN', + 'mainheatc-energy-eff': 'Very Good', 'main-fuel': 'electricity (not community)', + 'lighting-env-eff': 'Very Good', 'windows-energy-eff': 'Good', 'floor-env-eff': 'N/A', + 'sheating-env-eff': 'N/A', 'lighting-description': 'Low energy lighting in all fixed outlets', + 'roof-env-eff': 'Very Poor', 'walls-energy-eff': 'Average', 'photo-supply': '0.0', + 'lighting-cost-potential': '67', 'mainheat-env-eff': 'Very Good', 'multi-glaze-proportion': '100', + 'main-heating-controls': '', 'lodgement-datetime': '2022-06-06 13:01:20', 'flat-top-storey': '', + 'current-energy-rating': 'E', 'secondheat-description': 'Room heaters, mains gas', + 'walls-env-eff': 'Average', 'transaction-type': 'ECO assessment', 'uprn': '100011776843', + 'current-energy-efficiency': '53', 'energy-consumption-current': '252', + 'mainheat-description': 'Air source heat pump, radiators, electric', 'lighting-cost-current': '67', + 'lodgement-date': '2022-06-06', 'extension-count': '1', 'mainheatc-env-eff': 'Very Good', + 'lmk-key': '672d5947f3d4a55d97255af71651d6127a939418fa66a687070af77e0ba90df2', 'wind-turbine-count': '0', + 'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '70', + 'hot-water-energy-eff': 'Very Poor', 'low-energy-lighting': '100', + 'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system' + } + + # differences = [] + # for k, v in ending_epc.items(): + # if v != starting_epc[k]: + # differences.append( + # { + # "variable": k, + # "starting_value": starting_epc[k], + # "ending_value": v + # } + # ) + # differences = pd.DataFrame(differences) + # + # diffs = differences[ + # differences["variable"].isin( + # [ + # "mainheat-energy-eff", + # "mainheatcont-description", + # "mainheatc-energy-eff", + # "main-fuel", + # "mainheat-env-eff", + # "mainheat-description", + # "hot-water-energy-eff", + # "hotwater-description" + # ] + # ) + # ] + + cleaning_data = read_dataframe_from_s3_parquet( + bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet", + ) + + cleaned = read_from_s3( + s3_file_name="cleaned_epc_data/cleaned.bson", + bucket_name="retrofit-data-dev" + ) + cleaned = msgpack.unpackb(cleaned, raw=False) + + photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev") + + epc = EPCRecord( + epc_records={ + 'original_epc': starting_epc, + 'full_sap_epc': {}, + 'old_data': [] + }, + run_mode="newdata", + cleaning_data=cleaning_data + ) + + home = Property( + id=0, + address="", + postcode="", + epc_record=epc, + already_installed={}, + non_invasive_recommendations={}, + ) + home.in_conservation_area = False + home.is_listed = False + home.is_heritage = False + home.restricted_measures = True + home.get_components( + cleaned=cleaned, + photo_supply_lookup=photo_supply_lookup, + floor_area_decile_thresholds=floor_area_decile_thresholds + ) + + recommender = HeatingRecommender(property_instance=home) + recommender.recommend_air_source_heat_pump(phase=0, has_cavity_or_loft_recommendations=False) + + # Patch - for this property, the hot water energy efficiency is very poor. it's not clear why this is, + # but we insert this for this test + recommender.heating_recommendations[0]["simulation_config"]["hot_water_energy_eff_ending"] = "Very Poor" + + property_recommendations = Recommendations.insert_temp_recommendation_id([recommender.heating_recommendations]) + + assert len(recommender.heating_recommendations) == 1 + + home.create_base_difference_epc_record(cleaned_lookup=cleaned) + home.adjust_difference_record_with_recommendations( + property_recommendations, [] + ) + + scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop( + columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", + "carbon_ending"] + ) + + model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat()) + model_api.MODEL_PREFIXES = ["sap_change_predictions"] + + predictions_dict = model_api.predict_all( + df=scoring_data, + bucket="retrofit-data-dev", + prediction_buckets={ + "sap_change_predictions": "retrofit-sap-predictions-dev", + } + ) + assert predictions_dict["sap_change_predictions"]["predictions"].values[0] == 52.2 + + def test_air_source_heat_pump_gas_boiler_starting_2(self): + """ + This property seems to have miniscule movement in SAP - just 2 poins + :return: + """ + + starting_epc = { + 'low-energy-fixed-light-count': '', 'address': '31 Whinney Hill Park', 'uprn-source': 'Energy Assessor', + 'floor-height': '2.3', 'heating-cost-potential': '394', 'unheated-corridor-length': '', + 'hot-water-cost-potential': '48', 'construction-age-band': 'England and Wales: 1967-1975', + 'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average', + 'lighting-energy-eff': 'Good', 'environment-impact-potential': '87', + 'glazed-type': 'double glazing, unknown install date', 'heating-cost-current': '487', 'address3': '', + 'mainheatcont-description': 'Programmer, room thermostat and TRVs', 'sheating-energy-eff': 'N/A', + 'property-type': 'Bungalow', 'local-authority-label': 'Calderdale', 'fixed-lighting-outlets-count': '5', + 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '86', + 'county': '', 'postcode': 'HD6 2PX', 'solar-water-heating-flag': 'N', 'constituency': 'E14000614', + 'co2-emissions-potential': '0.8', 'number-heated-rooms': '2', + 'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '105', + 'local-authority': 'E08000033', 'built-form': 'End-Terrace', 'number-open-fireplaces': '0', + 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2021-11-25', + 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '56', 'address1': '31 Whinney Hill Park', + 'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Calder Valley', + 'roof-energy-eff': 'Good', 'total-floor-area': '44.0', 'building-reference-number': '10001772583', + 'environment-impact-current': '62', 'co2-emissions-current': '2.5', + 'roof-description': 'Pitched, 250 mm loft insulation', 'floor-energy-eff': 'N/A', + 'number-habitable-rooms': '2', 'address2': '', 'hot-water-env-eff': 'Good', 'posttown': 'BRIGHOUSE', + 'mainheatc-energy-eff': 'Good', 'main-fuel': 'mains gas (not community)', 'lighting-env-eff': 'Good', + 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A', + 'lighting-description': 'Low energy lighting in 60% of fixed outlets', 'roof-env-eff': 'Good', + 'walls-energy-eff': 'Average', 'photo-supply': '0.0', 'lighting-cost-potential': '40', + 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '', + 'lodgement-datetime': '2021-11-25 11:39:35', 'flat-top-storey': '', 'current-energy-rating': 'D', + 'secondheat-description': 'Room heaters, electric', 'walls-env-eff': 'Average', + 'transaction-type': 'rental', 'uprn': '100051304421', 'current-energy-efficiency': '62', + 'energy-consumption-current': '322', 'mainheat-description': 'Boiler and radiators, mains gas', + 'lighting-cost-current': '56', 'lodgement-date': '2021-11-25', 'extension-count': '0', + 'mainheatc-env-eff': 'Good', 'lmk-key': '077f70657e9c3f1f0ce5392798398398616b159493b2a8ca2338961596631c27', + 'wind-turbine-count': '0', 'tenure': 'Rented (social)', 'floor-level': '', + 'potential-energy-efficiency': '86', 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '60', + 'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system' + } + + ending_epc = { + 'low-energy-fixed-light-count': '', 'address': '31 Whinney Hill Park', + 'uprn-source': 'Energy Assessor', 'floor-height': '2.3', 'heating-cost-potential': '277', + 'unheated-corridor-length': '', 'hot-water-cost-potential': '266', + 'construction-age-band': 'England and Wales: 1967-1975', 'potential-energy-rating': 'B', + 'mainheat-energy-eff': 'Very Good', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Good', + 'environment-impact-potential': '90', 'glazed-type': 'double glazing, unknown install date', + 'heating-cost-current': '331', 'address3': '', + 'mainheatcont-description': 'Programmer and room thermostat', 'sheating-energy-eff': 'N/A', + 'property-type': 'Bungalow', 'local-authority-label': 'Calderdale', + 'fixed-lighting-outlets-count': '5', 'energy-tariff': 'Single', + 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '404', 'county': '', + 'postcode': 'HD6 2PX', 'solar-water-heating-flag': 'N', 'constituency': 'E14000614', + 'co2-emissions-potential': '0.7', 'number-heated-rooms': '2', + 'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '92', + 'local-authority': 'E08000033', 'built-form': 'End-Terrace', 'number-open-fireplaces': '0', + 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', + 'inspection-date': '2021-11-25', 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '48', + 'address1': '31 Whinney Hill Park', 'heat-loss-corridor': '', 'flat-storey-count': '', + 'constituency-label': 'Calder Valley', 'roof-energy-eff': 'Good', 'total-floor-area': '44.0', + 'building-reference-number': '10001772583', 'environment-impact-current': '68', + 'co2-emissions-current': '2.1', 'roof-description': 'Pitched, 250 mm loft insulation', + 'floor-energy-eff': 'N/A', 'number-habitable-rooms': '2', 'address2': '', + 'hot-water-env-eff': 'Poor', 'posttown': 'BRIGHOUSE', 'mainheatc-energy-eff': 'Average', + 'main-fuel': 'electricity (not community)', 'lighting-env-eff': 'Good', + 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A', + 'lighting-description': 'Low energy lighting in 60% of fixed outlets', 'roof-env-eff': 'Good', + 'walls-energy-eff': 'Average', 'photo-supply': '0.0', 'lighting-cost-potential': '40', + 'mainheat-env-eff': 'Very Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '', + 'lodgement-datetime': '2022-03-23 16:06:21', 'flat-top-storey': '', 'current-energy-rating': 'D', + 'secondheat-description': 'Room heaters, electric', 'walls-env-eff': 'Average', + 'transaction-type': 'rental', 'uprn': '100051304421', 'current-energy-efficiency': '64', + 'energy-consumption-current': '283', + 'mainheat-description': 'Air source heat pump, radiators, electric', + 'lighting-cost-current': '57', 'lodgement-date': '2022-03-23', 'extension-count': '0', + 'mainheatc-env-eff': 'Average', + 'lmk-key': '6296248141447b53426a40f1c39da17dad5f4786485db55ee38737891111a4d4', + 'wind-turbine-count': '0', 'tenure': 'Rented (social)', 'floor-level': '', + 'potential-energy-efficiency': '89', 'hot-water-energy-eff': 'Very Poor', + 'low-energy-lighting': '60', 'walls-description': 'Cavity wall, filled cavity', + 'hotwater-description': 'From main system' + } + + # differences = [] + # for k, v in ending_epc.items(): + # if v != starting_epc[k]: + # differences.append( + # { + # "variable": k, + # "starting_value": starting_epc[k], + # "ending_value": v + # } + # ) + # differences = pd.DataFrame(differences) + # + # diffs = differences[ + # differences["variable"].isin( + # [ + # "mainheat-energy-eff", + # "mainheatcont-description", + # "mainheatc-energy-eff", + # "main-fuel", + # "mainheat-env-eff", + # "mainheat-description", + # "hot-water-energy-eff", + # "hotwater-description" + # ] + # ) + # ] + + cleaning_data = read_dataframe_from_s3_parquet( + bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet", + ) + + cleaned = read_from_s3( + s3_file_name="cleaned_epc_data/cleaned.bson", + bucket_name="retrofit-data-dev" + ) + cleaned = msgpack.unpackb(cleaned, raw=False) + + photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev") + + epc = EPCRecord( + epc_records={ + 'original_epc': starting_epc, + 'full_sap_epc': {}, + 'old_data': [] + }, + run_mode="newdata", + cleaning_data=cleaning_data + ) + + home = Property( + id=0, + address="", + postcode="", + epc_record=epc, + already_installed={}, + non_invasive_recommendations={}, + ) + home.in_conservation_area = False + home.is_listed = False + home.is_heritage = False + home.restricted_measures = True + home.get_components( + cleaned=cleaned, + photo_supply_lookup=photo_supply_lookup, + floor_area_decile_thresholds=floor_area_decile_thresholds + ) + + recommender = HeatingRecommender(property_instance=home) + recommender.recommend_air_source_heat_pump(phase=0, has_cavity_or_loft_recommendations=False) + property_recommendations = Recommendations.insert_temp_recommendation_id([recommender.heating_recommendations]) + + assert len(recommender.heating_recommendations) == 1 + + home.create_base_difference_epc_record(cleaned_lookup=cleaned) + home.adjust_difference_record_with_recommendations( + property_recommendations, [] + ) + + scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop( + columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", + "carbon_ending"] + ) + + model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat()) + model_api.MODEL_PREFIXES = ["sap_change_predictions"] + + predictions_dict = model_api.predict_all( + df=scoring_data, + bucket="retrofit-data-dev", + prediction_buckets={ + "sap_change_predictions": "retrofit-sap-predictions-dev", + } + ) + assert predictions_dict["sap_change_predictions"]["predictions"].values[0] == 69.3 + + # In actuality with this property, the heating controls get downgraded, so we test a manual patch of this + patched_simulation_config = { + 'mainheat_energy_eff_ending': "Very Good", + 'hot_water_energy_eff_ending': 'Very Poor', + 'has_boiler_ending': False, + 'has_air_source_heat_pump_ending': True, + 'has_electric_ending': True, + 'has_mains_gas_ending': False, + 'fuel_type_ending': 'electricity', + 'trvs_ending': None, + "mainheatc_energy_eff_ending": 'Average' + } + + # PATCHING + property_recommendations_patch = Recommendations.insert_temp_recommendation_id( + [recommender.heating_recommendations] + ) + property_recommendations_patch[0][0]["simulation_config"] = patched_simulation_config + + home.create_base_difference_epc_record(cleaned_lookup=cleaned) + home.adjust_difference_record_with_recommendations( + property_recommendations_patch, [] + ) + + scoring_data_patch = pd.DataFrame(home.recommendations_scoring_data).drop( + columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", + "carbon_ending"] + ) + + model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat()) + model_api.MODEL_PREFIXES = ["sap_change_predictions"] + + predictions_dict_patch = model_api.predict_all( + df=scoring_data_patch, + bucket="retrofit-data-dev", + prediction_buckets={ + "sap_change_predictions": "retrofit-sap-predictions-dev", + } + ) + # The error is only 0.3, so the model is working + assert predictions_dict_patch["sap_change_predictions"]["predictions"].values[0] == 64.3 + assert ending_epc["current-energy-efficiency"] == '64' + + def test_air_source_heat_pump_lpg_boiler(self): + starting_epc = { + 'low-energy-fixed-light-count': '', 'address': 'Holly Lodge, The Drive, Perry', + 'uprn-source': 'Energy Assessor', 'floor-height': '2.8', 'heating-cost-potential': '1628', + 'unheated-corridor-length': '', 'hot-water-cost-potential': '175', + 'construction-age-band': 'England and Wales: 1950-1966', 'potential-energy-rating': 'D', + 'mainheat-energy-eff': 'Poor', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Average', + 'environment-impact-potential': '70', 'glazed-type': 'double glazing, unknown install date', + 'heating-cost-current': '2158', 'address3': 'Perry', + 'mainheatcont-description': 'No time or thermostatic control of room temperature', + 'sheating-energy-eff': 'N/A', 'property-type': 'Bungalow', 'local-authority-label': 'Huntingdonshire', + 'fixed-lighting-outlets-count': '12', 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', + 'hot-water-cost-current': '257', 'county': 'Cambridgeshire', 'postcode': 'PE28 0SX', + 'solar-water-heating-flag': 'N', 'constituency': 'E14000757', 'co2-emissions-potential': '3.3', + 'number-heated-rooms': '5', 'floor-description': 'Solid, no insulation (assumed)', + 'energy-consumption-potential': '128', 'local-authority': 'E07000011', 'built-form': 'Semi-Detached', + 'number-open-fireplaces': '0', 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', + 'inspection-date': '2023-08-31', 'mains-gas-flag': 'N', 'co2-emiss-curr-per-floor-area': '51', + 'address1': 'Holly Lodge', 'heat-loss-corridor': '', 'flat-storey-count': '', + 'constituency-label': 'Huntingdon', 'roof-energy-eff': 'Good', 'total-floor-area': '117.0', + 'building-reference-number': '10005199915', 'environment-impact-current': '50', + 'co2-emissions-current': '5.9', 'roof-description': 'Pitched, 270 mm loft insulation', + 'floor-energy-eff': 'N/A', 'number-habitable-rooms': '5', 'address2': 'The Drive', + 'hot-water-env-eff': 'Good', 'posttown': 'HUNTINGDON', 'mainheatc-energy-eff': 'Very Poor', + 'main-fuel': 'LPG (not community)', 'lighting-env-eff': 'Average', 'windows-energy-eff': 'Average', + 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A', + 'lighting-description': 'Low energy lighting in 33% of fixed outlets', 'roof-env-eff': 'Good', + 'walls-energy-eff': 'Average', 'photo-supply': '0.0', 'lighting-cost-potential': '166', + 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '', + 'lodgement-datetime': '2023-10-30 13:46:54', 'flat-top-storey': '', 'current-energy-rating': 'F', + 'secondheat-description': 'Room heaters, electric', 'walls-env-eff': 'Average', + 'transaction-type': 'ECO assessment', 'uprn': '100091200828', 'current-energy-efficiency': '32', + 'energy-consumption-current': '243', 'mainheat-description': 'Boiler and radiators, LPG', + 'lighting-cost-current': '277', 'lodgement-date': '2023-10-30', 'extension-count': '0', + 'mainheatc-env-eff': 'Very Poor', + 'lmk-key': 'f1d3bd4b8b50bc9b006231ccb158537c408523b748b3f4ef7e98cd03b144afa5', 'wind-turbine-count': '0', + 'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '56', + 'hot-water-energy-eff': 'Poor', 'low-energy-lighting': '33', + 'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system' + } + + ending_epc = { + 'low-energy-fixed-light-count': '', 'address': 'Holly Lodge, The Drive, Perry', + 'uprn-source': 'Energy Assessor', 'floor-height': '2.8', 'heating-cost-potential': '917', + 'unheated-corridor-length': '', 'hot-water-cost-potential': '328', + 'construction-age-band': 'England and Wales: 1950-1966', 'potential-energy-rating': 'A', + 'mainheat-energy-eff': 'Very Good', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Average', + 'environment-impact-potential': '96', 'glazed-type': 'double glazing, unknown install date', + 'heating-cost-current': '1098', 'address3': 'Perry', + 'mainheatcont-description': 'Programmer, TRVs and bypass', 'sheating-energy-eff': 'N/A', + 'property-type': 'Bungalow', 'local-authority-label': 'Huntingdonshire', + 'fixed-lighting-outlets-count': '12', 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', + 'hot-water-cost-current': '328', 'county': 'Cambridgeshire', 'postcode': 'PE28 0SX', + 'solar-water-heating-flag': 'N', 'constituency': 'E14000757', 'co2-emissions-potential': '0.3', + 'number-heated-rooms': '5', 'floor-description': 'Solid, no insulation (assumed)', + 'energy-consumption-potential': '16', 'local-authority': 'E07000011', 'built-form': 'Semi-Detached', + 'number-open-fireplaces': '0', 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', + 'inspection-date': '2023-10-05', 'mains-gas-flag': 'N', 'co2-emiss-curr-per-floor-area': '6', + 'address1': 'Holly Lodge', 'heat-loss-corridor': '', 'flat-storey-count': '', + 'constituency-label': 'Huntingdon', 'roof-energy-eff': 'Good', 'total-floor-area': '117.0', + 'building-reference-number': '10005199915', 'environment-impact-current': '92', + 'co2-emissions-current': '0.7', 'roof-description': 'Pitched, 270 mm loft insulation', + 'floor-energy-eff': 'N/A', 'number-habitable-rooms': '5', 'address2': 'The Drive', + 'hot-water-env-eff': 'Very Good', 'posttown': 'HUNTINGDON', 'mainheatc-energy-eff': 'Average', + 'main-fuel': 'electricity (not community)', 'lighting-env-eff': 'Average', 'windows-energy-eff': 'Average', + 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A', + 'lighting-description': 'Low energy lighting in 33% of fixed outlets', 'roof-env-eff': 'Good', + 'walls-energy-eff': 'Average', 'photo-supply': '', 'lighting-cost-potential': '166', + 'mainheat-env-eff': 'Very Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '', + 'lodgement-datetime': '2023-11-01 16:29:16', 'flat-top-storey': '', 'current-energy-rating': 'A', + 'secondheat-description': 'Room heaters, electric', 'walls-env-eff': 'Average', + 'transaction-type': 'ECO assessment', 'uprn': '100091200828', 'current-energy-efficiency': '92', + 'energy-consumption-current': '37', 'mainheat-description': 'Air source heat pump, radiators, electric', + 'lighting-cost-current': '277', 'lodgement-date': '2023-11-01', 'extension-count': '0', + 'mainheatc-env-eff': 'Average', + 'lmk-key': 'cb7f2838b727907767c8c2a385cd22f722b1e4745463391d910d228e52124515', 'wind-turbine-count': '0', + 'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '95', + 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '33', + 'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system' + } + + cleaning_data = read_dataframe_from_s3_parquet( + bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet", + ) + + cleaned = read_from_s3( + s3_file_name="cleaned_epc_data/cleaned.bson", + bucket_name="retrofit-data-dev" + ) + cleaned = msgpack.unpackb(cleaned, raw=False) + + photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev") + + epc = EPCRecord( + epc_records={ + 'original_epc': starting_epc, + 'full_sap_epc': {}, + 'old_data': [] + }, + run_mode="newdata", + cleaning_data=cleaning_data + ) + + home = Property( + id=0, + address="", + postcode="", + epc_record=epc, + already_installed={}, + non_invasive_recommendations={}, + ) + home.in_conservation_area = False + home.is_listed = False + home.is_heritage = False + home.restricted_measures = True + home.get_components( + cleaned=cleaned, + photo_supply_lookup=photo_supply_lookup, + floor_area_decile_thresholds=floor_area_decile_thresholds + ) + + recommender = HeatingRecommender(property_instance=home) + recommender.recommend_air_source_heat_pump(phase=0, has_cavity_or_loft_recommendations=False) + property_recommendations = Recommendations.insert_temp_recommendation_id([recommender.heating_recommendations]) + + assert len(recommender.heating_recommendations) == 1 + + home.create_base_difference_epc_record(cleaned_lookup=cleaned) + home.adjust_difference_record_with_recommendations( + property_recommendations, [] + ) + + scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop( + columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", + "carbon_ending"] + ) + + model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat()) + model_api.MODEL_PREFIXES = ["sap_change_predictions"] + + predictions_dict = model_api.predict_all( + df=scoring_data, + bucket="retrofit-data-dev", + prediction_buckets={ + "sap_change_predictions": "retrofit-sap-predictions-dev", + } + ) + # We predict a huge uplift but not quite as much as the EPC, due to some distinct differences between our + # recommendation and the EPC + assert predictions_dict["sap_change_predictions"]["predictions"].values[0] == 81.3 + assert ending_epc['current-energy-efficiency'] == '92' + + # PATCH + # We patch the simulation config, to reflect the ending EPC, to see if we get the ending EPC's config + patched_simulation_config = { + 'mainheat_energy_eff_ending': "Very Good", + 'hot_water_energy_eff_ending': 'Good', + 'has_boiler_ending': False, + 'has_air_source_heat_pump_ending': True, + 'has_electric_ending': True, + 'has_lpg_ending': False, + 'fuel_type_ending': 'electricity', + 'switch_system_ending': 'programmer', + 'no_control_ending': None, + 'auxiliary_systems_ending': 'bypass', + 'trvs_ending': 'trvs', + "mainheatc_energy_eff_ending": 'Average' + } + + # PATCHING + property_recommendations_patch = Recommendations.insert_temp_recommendation_id( + [recommender.heating_recommendations] + ) + property_recommendations_patch[0][0]["simulation_config"] = patched_simulation_config + + home.create_base_difference_epc_record(cleaned_lookup=cleaned) + home.adjust_difference_record_with_recommendations( + property_recommendations_patch, [] + ) + + scoring_data_patch = pd.DataFrame(home.recommendations_scoring_data).drop( + columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", + "carbon_ending"] + ) + + model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat()) + model_api.MODEL_PREFIXES = ["sap_change_predictions"] + + predictions_dict_patch = model_api.predict_all( + df=scoring_data_patch, + bucket="retrofit-data-dev", + prediction_buckets={ + "sap_change_predictions": "retrofit-sap-predictions-dev", + } + ) + + assert predictions_dict_patch["sap_change_predictions"]["predictions"].values[0] == 88.9 + # We still underpredict but the improvement is notable + + def test_offgrid(self): + """ + We test on a property we've worked with before, where we compare two options + a) Upgrading to a boiler + b) Upgrading to a heat pump + :return: + """ + + starting_epc = { + 'low-energy-fixed-light-count': '', 'address': '6 Beech Road', 'uprn-source': 'Energy Assessor', + 'floor-height': '2.4', 'heating-cost-potential': '612', 'unheated-corridor-length': '', + 'hot-water-cost-potential': '123', 'construction-age-band': 'England and Wales: 1930-1949', + 'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Very Poor', 'windows-env-eff': 'Good', + 'lighting-energy-eff': 'Good', 'environment-impact-potential': '87', + 'glazed-type': 'double glazing installed during or after 2002', 'heating-cost-current': '2278', + 'address3': '', 'mainheatcont-description': 'Appliance thermostats', 'sheating-energy-eff': 'N/A', + 'property-type': 'House', 'local-authority-label': 'Dudley', 'fixed-lighting-outlets-count': '9', + 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '604', + 'county': '', 'postcode': 'DY1 4BP', 'solar-water-heating-flag': 'N', 'constituency': 'E14000671', + 'co2-emissions-potential': '1.0', 'number-heated-rooms': '4', + 'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '93', + 'local-authority': 'E08000027', 'built-form': 'End-Terrace', 'number-open-fireplaces': '0', + 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2024-03-13', + 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '83', 'address1': '6 Beech Road', + 'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Dudley North', + 'roof-energy-eff': 'Very Poor', 'total-floor-area': '60.0', 'building-reference-number': '10005780080', + 'environment-impact-current': '41', 'co2-emissions-current': '5.0', + 'roof-description': 'Pitched, 12 mm loft insulation', 'floor-energy-eff': 'N/A', + 'number-habitable-rooms': '4', 'address2': '', 'hot-water-env-eff': 'Poor', 'posttown': 'DUDLEY', + 'mainheatc-energy-eff': 'Good', 'main-fuel': 'electricity (not community)', 'lighting-env-eff': 'Good', + 'windows-energy-eff': 'Good', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A', + 'lighting-description': 'Low energy lighting in 67% of fixed outlets', 'roof-env-eff': 'Very Poor', + 'walls-energy-eff': 'Average', 'photo-supply': '0.0', 'lighting-cost-potential': '113', + 'mainheat-env-eff': 'Poor', 'multi-glaze-proportion': '100', 'main-heating-controls': '', + 'lodgement-datetime': '2024-03-13 11:29:11', 'flat-top-storey': '', 'current-energy-rating': 'F', + 'secondheat-description': 'None', 'walls-env-eff': 'Average', 'transaction-type': 'rental', + 'uprn': '90055152', 'current-energy-efficiency': '32', 'energy-consumption-current': '491', + 'mainheat-description': 'Room heaters, electric', 'lighting-cost-current': '113', + 'lodgement-date': '2024-03-13', 'extension-count': '1', 'mainheatc-env-eff': 'Good', + 'lmk-key': '78ddf851b660e599a0894924d0e6b503980f5e0ad1aa711f8411718dc2989c44', 'wind-turbine-count': '0', + 'tenure': 'Rented (social)', 'floor-level': '', 'potential-energy-efficiency': '87', + 'hot-water-energy-eff': 'Very Poor', 'low-energy-lighting': '67', + 'walls-description': 'Cavity wall, filled cavity', + 'hotwater-description': 'Electric immersion, standard tariff' + } + + cleaning_data = read_dataframe_from_s3_parquet( + bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet", + ) + + cleaned = read_from_s3( + s3_file_name="cleaned_epc_data/cleaned.bson", + bucket_name="retrofit-data-dev" + ) + cleaned = msgpack.unpackb(cleaned, raw=False) + + photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev") + + epc = EPCRecord( + epc_records={ + 'original_epc': starting_epc, + 'full_sap_epc': {}, + 'old_data': [] + }, + run_mode="newdata", + cleaning_data=cleaning_data + ) + + home = Property( + id=0, + address="", + postcode="", + epc_record=epc, + already_installed={}, + non_invasive_recommendations={}, + ) + home.in_conservation_area = False + home.is_listed = False + home.is_heritage = False + home.restricted_measures = True + home.get_components( + cleaned=cleaned, + photo_supply_lookup=photo_supply_lookup, + floor_area_decile_thresholds=floor_area_decile_thresholds + ) + + recommender = HeatingRecommender(property_instance=home) + recommender.recommend_air_source_heat_pump(phase=0, has_cavity_or_loft_recommendations=False) + recommender.recommend_boiler_upgrades(phase=0, system_change=True, exising_room_heaters=False) + + assert len(recommender.heating_recommendations) == 3 + + property_recommendations = Recommendations.insert_temp_recommendation_id([recommender.heating_recommendations]) + + home.create_base_difference_epc_record(cleaned_lookup=cleaned) + home.adjust_difference_record_with_recommendations( + property_recommendations, [] + ) + + scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop( + columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", + "carbon_ending"] + ) + + model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat()) + model_api.MODEL_PREFIXES = ["sap_change_predictions"] + + predictions_dict = model_api.predict_all( + df=scoring_data, + bucket="retrofit-data-dev", + prediction_buckets={ + "sap_change_predictions": "retrofit-sap-predictions-dev", + } + ) + + # The ASHP isn't better under SAP, compared to a gas boiler with good heat controls + assert predictions_dict["sap_change_predictions"]["predictions"].tolist() == [66.9, 65.5, 65.9] From f0936bd1d48e70e0afc726d9e34e44de61b92ab8 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 7 May 2024 17:46:51 +0100 Subject: [PATCH 19/20] Added an extra test for solar --- .../tests/test_solar_pv_recommendations.py | 161 ++++++++++++++++++ 1 file changed, 161 insertions(+) diff --git a/recommendations/tests/test_solar_pv_recommendations.py b/recommendations/tests/test_solar_pv_recommendations.py index 5481cb17..e912f373 100644 --- a/recommendations/tests/test_solar_pv_recommendations.py +++ b/recommendations/tests/test_solar_pv_recommendations.py @@ -2,6 +2,13 @@ import pytest from recommendations.SolarPvRecommendations import SolarPvRecommendations from backend.Property import Property from etl.epc.Record import EPCRecord +import pandas as pd +from datetime import datetime +from utils.s3 import read_dataframe_from_s3_parquet, read_from_s3 +from etl.solar.SolarPhotoSupply import SolarPhotoSupply +from recommendations.Recommendations import Recommendations +from backend.ml_models.api import ModelApi +import msgpack class TestSolarPvRecommendations: @@ -82,3 +89,157 @@ class TestSolarPvRecommendations: 'photo_supply': 4000 } ] + + def test_model(self): + """ + This function tests the recommendation engine, in conjunction with the model + :return: + """ + + starting_epc = { + 'low-energy-fixed-light-count': '', 'address': '27 Cromwell Street', 'uprn-source': 'Energy Assessor', + 'floor-height': '2.5', 'heating-cost-potential': '443', 'unheated-corridor-length': '', + 'hot-water-cost-potential': '53', 'construction-age-band': 'England and Wales: before 1900', + 'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average', + 'lighting-energy-eff': 'Very Poor', 'environment-impact-potential': '85', + 'glazed-type': 'double glazing installed before 2002', 'heating-cost-current': '904', 'address3': '', + 'mainheatcont-description': 'Programmer, room thermostat and TRVs', 'sheating-energy-eff': 'N/A', + 'property-type': 'House', 'local-authority-label': 'West Lindsey', 'fixed-lighting-outlets-count': '10', + 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '79', + 'county': 'Lincolnshire', 'postcode': 'DN21 1DH', 'solar-water-heating-flag': 'N', + 'constituency': 'E14000707', 'co2-emissions-potential': '1.5', 'number-heated-rooms': '5', + 'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '92', + 'local-authority': 'E07000142', 'built-form': 'Mid-Terrace', 'number-open-fireplaces': '0', + 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2021-11-17', + 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '61', 'address1': '27 Cromwell Street', + 'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Gainsborough', + 'roof-energy-eff': 'Very Poor', 'total-floor-area': '89.0', 'building-reference-number': '10001989430', + 'environment-impact-current': '47', 'co2-emissions-current': '5.4', + 'roof-description': 'Pitched, no insulation (assumed)', 'floor-energy-eff': 'N/A', + 'number-habitable-rooms': '5', 'address2': '', 'hot-water-env-eff': 'Good', 'posttown': 'GAINSBOROUGH', + 'mainheatc-energy-eff': 'Good', 'main-fuel': 'mains gas (not community)', 'lighting-env-eff': 'Very Poor', + 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A', + 'lighting-description': 'No low energy lighting', 'roof-env-eff': 'Very Poor', + 'walls-energy-eff': 'Very Poor', 'photo-supply': '0.0', 'lighting-cost-potential': '72', + 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '', + 'lodgement-datetime': '2021-12-01 10:12:23', 'flat-top-storey': '', 'current-energy-rating': 'E', + 'secondheat-description': 'Room heaters, mains gas', 'walls-env-eff': 'Very Poor', + 'transaction-type': 'ECO assessment', 'uprn': '100030949912', 'current-energy-efficiency': '54', + 'energy-consumption-current': '346', 'mainheat-description': 'Boiler and radiators, mains gas', + 'lighting-cost-current': '144', 'lodgement-date': '2021-12-01', 'extension-count': '2', + 'mainheatc-env-eff': 'Good', 'lmk-key': '3ec5533af02ec78361c1f9bea8dd2e878c2c6fa6cf59e5cc505c3eeb038e0f91', + 'wind-turbine-count': '0', 'tenure': 'Owner-occupied', 'floor-level': '', + 'potential-energy-efficiency': '86', 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '0', + 'walls-description': 'Solid brick, as built, no insulation (assumed)', + 'hotwater-description': 'From main system' + } + + ending_epc = { + 'low-energy-fixed-light-count': '', 'address': '27 Cromwell Street', 'uprn-source': 'Energy Assessor', + 'floor-height': '2.5', 'heating-cost-potential': '443', 'unheated-corridor-length': '', + 'hot-water-cost-potential': '53', 'construction-age-band': 'England and Wales: before 1900', + 'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average', + 'lighting-energy-eff': 'Very Poor', 'environment-impact-potential': '86', + 'glazed-type': 'double glazing installed before 2002', 'heating-cost-current': '904', 'address3': '', + 'mainheatcont-description': 'Programmer, room thermostat and TRVs', 'sheating-energy-eff': 'N/A', + 'property-type': 'House', 'local-authority-label': 'West Lindsey', 'fixed-lighting-outlets-count': '10', + 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '79', + 'county': 'Lincolnshire', 'postcode': 'DN21 1DH', 'solar-water-heating-flag': 'N', + 'constituency': 'E14000707', 'co2-emissions-potential': '1.4', 'number-heated-rooms': '5', + 'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '84', + 'local-authority': 'E07000142', 'built-form': 'Mid-Terrace', 'number-open-fireplaces': '0', + 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2021-12-21', + 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '49', 'address1': '27 Cromwell Street', + 'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Gainsborough', + 'roof-energy-eff': 'Very Poor', 'total-floor-area': '89.0', 'building-reference-number': '10001989430', + 'environment-impact-current': '55', 'co2-emissions-current': '4.4', + 'roof-description': 'Pitched, no insulation (assumed)', 'floor-energy-eff': 'N/A', + 'number-habitable-rooms': '5', 'address2': '', 'hot-water-env-eff': 'Good', 'posttown': 'GAINSBOROUGH', + 'mainheatc-energy-eff': 'Good', 'main-fuel': 'mains gas (not community)', 'lighting-env-eff': 'Very Poor', + 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A', + 'lighting-description': 'No low energy lighting', 'roof-env-eff': 'Very Poor', + 'walls-energy-eff': 'Very Poor', 'photo-supply': '50.0', 'lighting-cost-potential': '72', + 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '', + 'lodgement-datetime': '2021-12-21 17:33:09', 'flat-top-storey': '', 'current-energy-rating': 'D', + 'secondheat-description': 'Room heaters, mains gas', 'walls-env-eff': 'Very Poor', + 'transaction-type': 'ECO assessment', 'uprn': '100030949912', 'current-energy-efficiency': '65', + 'energy-consumption-current': '277', 'mainheat-description': 'Boiler and radiators, mains gas', + 'lighting-cost-current': '144', 'lodgement-date': '2021-12-21', 'extension-count': '2', + 'mainheatc-env-eff': 'Good', 'lmk-key': 'b0b19583c59afbc69db12f4d6c98cd8837e80da3214d577c426eb3e672d424fc', + 'wind-turbine-count': '0', 'tenure': 'Owner-occupied', 'floor-level': '', + 'potential-energy-efficiency': '88', 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '0', + 'walls-description': 'Solid brick, as built, no insulation (assumed)', + 'hotwater-description': 'From main system' + } + + cleaning_data = read_dataframe_from_s3_parquet( + bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet", + ) + + cleaned = read_from_s3( + s3_file_name="cleaned_epc_data/cleaned.bson", + bucket_name="retrofit-data-dev" + ) + cleaned = msgpack.unpackb(cleaned, raw=False) + + photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev") + + epc = EPCRecord( + epc_records={ + 'original_epc': starting_epc, + 'full_sap_epc': {}, + 'old_data': [] + }, + run_mode="newdata", + cleaning_data=cleaning_data + ) + + home = Property( + id=0, + address="", + postcode="", + epc_record=epc, + already_installed={}, + non_invasive_recommendations={}, + ) + home.in_conservation_area = False + home.is_listed = False + home.is_heritage = False + home.restricted_measures = True + home.get_components( + cleaned=cleaned, + photo_supply_lookup=photo_supply_lookup, + floor_area_decile_thresholds=floor_area_decile_thresholds + ) + + recommender = SolarPvRecommendations(property_instance=home) + recommender.recommend(phase=0) + + coverage_50_percent = [x for x in recommender.recommendation if x["photo_supply"] == 50] + assert len(coverage_50_percent) == 2 + + property_recommendations = Recommendations.insert_temp_recommendation_id([coverage_50_percent]) + + home.create_base_difference_epc_record(cleaned_lookup=cleaned) + home.adjust_difference_record_with_recommendations( + property_recommendations, [] + ) + + scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop( + columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", + "carbon_ending"] + ) + + model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat()) + model_api.MODEL_PREFIXES = ["sap_change_predictions"] + + predictions_dict = model_api.predict_all( + df=scoring_data, + bucket="retrofit-data-dev", + prediction_buckets={ + "sap_change_predictions": "retrofit-sap-predictions-dev", + } + ) + + assert predictions_dict["sap_change_predictions"]["predictions"].tolist() == [65.9, 65.9] + assert ending_epc["current-energy-efficiency"] == '65' From 732f3eb356e61b444f7fff002d7f22f13051d5c3 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 7 May 2024 17:59:30 +0100 Subject: [PATCH 20/20] Added additional test for solar --- .../tests/test_solar_pv_recommendations.py | 164 ++++++++++++++++++ 1 file changed, 164 insertions(+) diff --git a/recommendations/tests/test_solar_pv_recommendations.py b/recommendations/tests/test_solar_pv_recommendations.py index e912f373..fbbfe3a1 100644 --- a/recommendations/tests/test_solar_pv_recommendations.py +++ b/recommendations/tests/test_solar_pv_recommendations.py @@ -243,3 +243,167 @@ class TestSolarPvRecommendations: assert predictions_dict["sap_change_predictions"]["predictions"].tolist() == [65.9, 65.9] assert ending_epc["current-energy-efficiency"] == '65' + + def test_model2(self): + data[["uprn", "sap_ending"]] + # + + searcher = SearchEpc( + address1="", + postcode="", + auth_token="a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA=", + os_api_key="", + full_address="", + uprn=100030952942, + ) + searcher.find_property(False) + + ending_epc = { + 'low-energy-fixed-light-count': '', 'address': '6 Kenmare Crescent', + 'uprn-source': 'Energy Assessor', 'floor-height': '2.49', 'heating-cost-potential': '464', + 'unheated-corridor-length': '', 'hot-water-cost-potential': '46', + 'construction-age-band': 'England and Wales: 1967-1975', 'potential-energy-rating': 'B', + 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Very Good', + 'environment-impact-potential': '91', 'glazed-type': 'not defined', 'heating-cost-current': '535', + 'address3': '', 'mainheatcont-description': 'Programmer, room thermostat and TRVs', + 'sheating-energy-eff': 'N/A', 'property-type': 'Bungalow', + 'local-authority-label': 'West Lindsey', 'fixed-lighting-outlets-count': '9', + 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '69', + 'county': 'Lincolnshire', 'postcode': 'DN21 1PR', 'solar-water-heating-flag': 'N', + 'constituency': 'E14000707', 'co2-emissions-potential': '0.7', 'number-heated-rooms': '3', + 'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '56', + 'local-authority': 'E07000142', 'built-form': 'Semi-Detached', 'number-open-fireplaces': '0', + 'windows-description': 'Fully double glazed', 'glazed-area': 'Much More Than Typical', + 'inspection-date': '2022-08-24', 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '18', + 'address1': '6 Kenmare Crescent', 'heat-loss-corridor': '', 'flat-storey-count': '', + 'constituency-label': 'Gainsborough', 'roof-energy-eff': 'Very Good', 'total-floor-area': '66.0', + 'building-reference-number': '10002845316', 'environment-impact-current': '85', + 'co2-emissions-current': '1.2', 'roof-description': 'Pitched, 300 mm loft insulation', + 'floor-energy-eff': 'N/A', 'number-habitable-rooms': '3', 'address2': '', + 'hot-water-env-eff': 'Good', 'posttown': 'GAINSBOROUGH', 'mainheatc-energy-eff': 'Good', + 'main-fuel': 'mains gas (not community)', 'lighting-env-eff': 'Very Good', + 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A', + 'lighting-description': 'Low energy lighting in all fixed outlets', 'roof-env-eff': 'Very Good', + 'walls-energy-eff': 'Average', 'photo-supply': '40.0', 'lighting-cost-potential': '65', + 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '', + 'lodgement-datetime': '2022-08-24 15:39:42', 'flat-top-storey': '', 'current-energy-rating': 'B', + 'secondheat-description': 'Room heaters, electric', 'walls-env-eff': 'Average', + 'transaction-type': 'ECO assessment', 'uprn': '100030952942', 'current-energy-efficiency': '87', + 'energy-consumption-current': '100', 'mainheat-description': 'Boiler and radiators, mains gas', + 'lighting-cost-current': '65', 'lodgement-date': '2022-08-24', 'extension-count': '0', + 'mainheatc-env-eff': 'Good', + 'lmk-key': 'e20be883431b1fed15db7fa1f52634fb7655d2b80c2fdad37df779f93ec4dafd', + 'wind-turbine-count': '0', 'tenure': 'Owner-occupied', 'floor-level': '', + 'potential-energy-efficiency': '91', 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '100', + 'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system' + } + starting_epc = { + 'low-energy-fixed-light-count': '', 'address': '6 Kenmare Crescent', 'uprn-source': 'Energy Assessor', + 'floor-height': '2.49', 'heating-cost-potential': '464', 'unheated-corridor-length': '', + 'hot-water-cost-potential': '46', 'construction-age-band': 'England and Wales: 1967-1975', + 'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average', + 'lighting-energy-eff': 'Very Good', 'environment-impact-potential': '85', 'glazed-type': 'not defined', + 'heating-cost-current': '535', 'address3': '', + 'mainheatcont-description': 'Programmer, room thermostat and TRVs', 'sheating-energy-eff': 'N/A', + 'property-type': 'Bungalow', 'local-authority-label': 'West Lindsey', 'fixed-lighting-outlets-count': '9', + 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '69', + 'county': 'Lincolnshire', 'postcode': 'DN21 1PR', 'solar-water-heating-flag': 'N', + 'constituency': 'E14000707', 'co2-emissions-potential': '1.2', 'number-heated-rooms': '3', + 'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '102', + 'local-authority': 'E07000142', 'built-form': 'Semi-Detached', 'number-open-fireplaces': '0', + 'windows-description': 'Fully double glazed', 'glazed-area': 'Much More Than Typical', + 'inspection-date': '2022-05-31', 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '40', + 'address1': '6 Kenmare Crescent', 'heat-loss-corridor': '', 'flat-storey-count': '', + 'constituency-label': 'Gainsborough', 'roof-energy-eff': 'Very Good', 'total-floor-area': '66.0', + 'building-reference-number': '10002845316', 'environment-impact-current': '68', + 'co2-emissions-current': '2.6', 'roof-description': 'Pitched, 300 mm loft insulation', + 'floor-energy-eff': 'N/A', 'number-habitable-rooms': '3', 'address2': '', 'hot-water-env-eff': 'Good', + 'posttown': 'GAINSBOROUGH', 'mainheatc-energy-eff': 'Good', 'main-fuel': 'mains gas (not community)', + 'lighting-env-eff': 'Very Good', 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', + 'sheating-env-eff': 'N/A', 'lighting-description': 'Low energy lighting in all fixed outlets', + 'roof-env-eff': 'Very Good', 'walls-energy-eff': 'Average', 'photo-supply': '0.0', + 'lighting-cost-potential': '65', 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', + 'main-heating-controls': '', 'lodgement-datetime': '2022-06-15 08:38:02', 'flat-top-storey': '', + 'current-energy-rating': 'D', 'secondheat-description': 'Room heaters, electric', + 'walls-env-eff': 'Average', 'transaction-type': 'ECO assessment', 'uprn': '100030952942', + 'current-energy-efficiency': '68', 'energy-consumption-current': '227', + 'mainheat-description': 'Boiler and radiators, mains gas', 'lighting-cost-current': '65', + 'lodgement-date': '2022-06-15', 'extension-count': '0', 'mainheatc-env-eff': 'Good', + 'lmk-key': 'ce181970b7077cb9b4626242bfb010b30a0e48541b5f22427e81f1adbeeec4f2', 'wind-turbine-count': '0', + 'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '85', + 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '100', + 'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system' + } + + cleaning_data = read_dataframe_from_s3_parquet( + bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet", + ) + + cleaned = read_from_s3( + s3_file_name="cleaned_epc_data/cleaned.bson", + bucket_name="retrofit-data-dev" + ) + cleaned = msgpack.unpackb(cleaned, raw=False) + + photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev") + + epc = EPCRecord( + epc_records={ + 'original_epc': starting_epc, + 'full_sap_epc': {}, + 'old_data': [] + }, + run_mode="newdata", + cleaning_data=cleaning_data + ) + + home = Property( + id=0, + address="", + postcode="", + epc_record=epc, + already_installed={}, + non_invasive_recommendations={}, + ) + home.in_conservation_area = False + home.is_listed = False + home.is_heritage = False + home.restricted_measures = True + home.get_components( + cleaned=cleaned, + photo_supply_lookup=photo_supply_lookup, + floor_area_decile_thresholds=floor_area_decile_thresholds + ) + + recommender = SolarPvRecommendations(property_instance=home) + recommender.recommend(phase=0) + + coverage_40_percent = [x for x in recommender.recommendation if x["photo_supply"] == 40] + assert len(coverage_40_percent) == 2 + + property_recommendations = Recommendations.insert_temp_recommendation_id([coverage_40_percent]) + + home.create_base_difference_epc_record(cleaned_lookup=cleaned) + home.adjust_difference_record_with_recommendations( + property_recommendations, [] + ) + + scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop( + columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", + "carbon_ending"] + ) + + model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat()) + model_api.MODEL_PREFIXES = ["sap_change_predictions"] + + predictions_dict = model_api.predict_all( + df=scoring_data, + bucket="retrofit-data-dev", + prediction_buckets={ + "sap_change_predictions": "retrofit-sap-predictions-dev", + } + ) + + assert predictions_dict["sap_change_predictions"]["predictions"].tolist() == [87.1, 87.1] + assert ending_epc["current-energy-efficiency"] == '87' + assert starting_epc["current-energy-efficiency"] == '68'