From 178fac1ffee52577d2cf34d340ccd5f1eeda687c Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 4 Jan 2024 18:05:41 +0000 Subject: [PATCH 01/26] working on implementing solar recommendations --- backend/Property.py | 40 +++++++ etl/testing_data/solar_research.py | 105 ++++++++++++++++++ recommendations/SolarPvRecommendations.py | 37 ++++++ .../tests/test_solar_pv_recommendations.py | 0 4 files changed, 182 insertions(+) create mode 100644 etl/testing_data/solar_research.py create mode 100644 recommendations/SolarPvRecommendations.py create mode 100644 recommendations/tests/test_solar_pv_recommendations.py diff --git a/backend/Property.py b/backend/Property.py index be60784c..f1c7e65c 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -829,3 +829,43 @@ class Property(Definitions): number_habitable_rooms=self.number_of_rooms, extension_count=float(self.data["extension-count"]), ) + + def set_solar_panel_area(self, photo_supply_data): + """ + Sets the approximate area of the solar panels + :return: + """ + + # Approximate area of the solar panels + solar_panel_area = 1.6 + # Wattage per pan + solar_panel_wattage = 360 + + photo_supply_lookup = photo_supply_data["photo_supply_lookup"] + floor_area_decile_thresholds = photo_supply_data["floor_area_decile_thresholds"] + + # TODO: Create a class for the solar etl process and make this one of the functions, which applies a different + # method depending on the data type + def classify_floor_area(new_area, thresholds): + for i, threshold in enumerate(thresholds): + if new_area <= threshold: + return i # Returns the decile index (0 to 9) + return len(thresholds) + + floor_area_decile = classify_floor_area(self.floor_area, floor_area_decile_thresholds) + + # Given the photo_supply_lookup, we esimate the percentage of the roof that is suitable for solar panels + + # TODO: Move this to the ETL process, since we need to know that tenure should be lower + tenure = self.data["tenure"].lower() + photo_supply_matched = photo_supply_lookup[ + (photo_supply_lookup["tenure"] == tenure) & + (photo_supply_lookup["built_form"] == self.data["built-form"]) & + (photo_supply_lookup["property_type"] == self.data["property-type"]) & + (photo_supply_lookup["construction_age_band"] == self.construction_age_band) & + (photo_supply_lookup["is_flat"] == self.roof["is_flat"]) & + (photo_supply_lookup["is_pitched"] == self.roof["is_pitched"]) & + (photo_supply_lookup["is_roof_room"] == self.roof["is_roof_room"]) + ] + + # n_panels = np.floor(solar_panel_area * ) diff --git a/etl/testing_data/solar_research.py b/etl/testing_data/solar_research.py new file mode 100644 index 00000000..9abacdc3 --- /dev/null +++ b/etl/testing_data/solar_research.py @@ -0,0 +1,105 @@ +import pandas as pd +from pathlib import Path +from tqdm import tqdm +from etl.epc.property_change_app import get_cleaned +from utils.s3 import save_dataframe_to_s3_parquet + +DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates" + + +def app(): + """ + This code reads in the EPC data and attempt to produce a reasonable figure for the photo-supply variable, which + is the following: + "Percentage of photovoltaic area as a percentage of total roof area. 0% indicates that a Photovoltaic Supply + is not present in the property." + + When recommending solar, we want to simulate the retrofit by increasing this value from 0, so we need a sensible + figure to increase this to. This script will pull the data for that, to allow us to try and deduce what + a sensible figure would be + :return: + """ + + directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()] + results = [] + for dir in tqdm(directories): + filepath = dir / "certificates.csv" + df = pd.read_csv(filepath, low_memory=False) + df = df[~pd.isnull(df["UPRN"])] + df["UPRN"] = df["UPRN"].astype(int).astype(str) + # Drop rows that have a missing PROPERTY_TYPE, BUILT_FORM, CONSTRUCTION_AGE_BAND, TOTAL_FLOOR_AREA + for col in ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA"]: + df = df[~pd.isnull(df[col])] + # Take newest LODGEMENT_DATE per UPRN + df = df.sort_values(by="LODGEMENT_DATE", ascending=False).drop_duplicates(subset=["UPRN"]) + + data = df[ + ["UPRN", "PROPERTY_TYPE", "TENURE", "BUILT_FORM", "ROOF_DESCRIPTION", "PHOTO_SUPPLY", "TOTAL_FLOOR_AREA", + "CONSTRUCTION_AGE_BAND"] + ].copy() + data["PHOTO_SUPPLY"] = data["PHOTO_SUPPLY"].fillna(0) + data = data[data["PHOTO_SUPPLY"] != 0] + results.append(data) + + results = pd.concat(results) + + # Convert total floor area to deciles + decile_thresholds = results["TOTAL_FLOOR_AREA"].quantile([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]).values + + def classify_floor_area(new_area, thresholds): + for i, threshold in enumerate(thresholds): + if new_area <= threshold: + return i # Returns the decile index (0 to 9) + return len(thresholds) + + # Assuming 'new_data' is your new DataFrame with floor area data + results["floor_area_decile"] = pd.cut( + results["TOTAL_FLOOR_AREA"], + bins=[0] + list(decile_thresholds) + [float('inf')], + labels=False, + include_lowest=True + ) + + # Convert tenure to lower + results["TENURE"] = results["TENURE"].str.lower() + + # Append on the roof details + cleaned_lookup = get_cleaned() + lookup = pd.DataFrame(cleaned_lookup["roof-description"]) + + results = results.merge( + lookup.drop( + columns=[ + "clean_description", "thermal_transmittance", "thermal_transmittance_unit", "insulation_thickness", + "is_assumed" + ] + ), + left_on="ROOF_DESCRIPTION", + right_on="original_description", + how="left" + ) + + aggregated = results.groupby( + [ + "PROPERTY_TYPE", "BUILT_FORM", "TENURE", "is_pitched", "is_roof_room", "is_loft", "is_flat", "is_thatched", + "is_at_rafters", "has_dwelling_above", "CONSTRUCTION_AGE_BAND", "floor_area_decile" + ], + observed=True + ).agg( + { + "PHOTO_SUPPLY": ["median", "mean"], + } + ).reset_index() + + aggregated.columns = ['_'.join(col).strip() for col in aggregated.columns.values] + # Remove trailing underscore from columns + aggregated.columns = [col[:-1] if col.endswith("_") else col for col in aggregated.columns.values] + # Convert columns to lowercase + aggregated.columns = [col.lower() for col in aggregated.columns.values] + + # Store this data in s3 as a parquet file + save_dataframe_to_s3_parquet( + df=aggregated, + bucket_name="retrofit-data-dev", + file_key=f"solar_pv_supply/photo_supply_lookup.parquet", + ) diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py new file mode 100644 index 00000000..addbeb3f --- /dev/null +++ b/recommendations/SolarPvRecommendations.py @@ -0,0 +1,37 @@ +from recommendations.Costs import Costs + + +class SolarPvRecommendations: + + def __init__(self, property_instance): + """ + :param property_instance: Instance of the Property class, for the home associated to property_id + :param photo_supply_lookup: Lookup table of photo supply percentages + """ + + self.property = property_instance + self.costs = Costs(self.property) + + self.recommendations = [] + + def recommend(self): + """ + We check if a property is potentially suitable for solar PV based on the following criteria: + - The property is a house or bungalow + - The property has a flat or pitched roof + - The property does not have existing solar pv + :return: + """ + + is_valid_property_type = self.property.data["property-type"] in ["House", "Bungalow"] + is_valid_roof_type = ( + self.property.roof["is_flat"] or self.property.roof["is_pitched"] or self.property.roof["is_roof_room"] + ) + has_no_existing_solar_pv = not self.property.data["photo-supply"] in [ + None, 0, self.property.DATA_ANOMALY_MATCHES + ] + + if not is_valid_property_type or not is_valid_roof_type or has_no_existing_solar_pv: + return + + # We now have a property which is potentially suitable for solar PV diff --git a/recommendations/tests/test_solar_pv_recommendations.py b/recommendations/tests/test_solar_pv_recommendations.py new file mode 100644 index 00000000..e69de29b From fcc4dc6b5136221bd989894f9440c6388618ef9a Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 4 Jan 2024 18:47:04 +0000 Subject: [PATCH 02/26] collected data for solar pv estimates --- backend/Property.py | 28 +++++++++++++++-------- etl/testing_data/solar_research.py | 11 +++++++-- recommendations/Costs.py | 22 ++++++++++++++++++ recommendations/SolarPvRecommendations.py | 11 ++++++++- 4 files changed, 59 insertions(+), 13 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index f1c7e65c..f5325722 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -10,7 +10,6 @@ from etl.epc.settings import POTENTIAL_COLUMNS, EFFICIENCY_FEATURES, BUILT_FORM_ from etl.epc_clean.epc_attributes.all_cleaners import all_cleaner_map from utils.logger import setup_logger from utils.s3 import read_dataframe_from_s3_parquet -from epc_api.client import EpcClient from BaseUtility import Definitions from recommendations.rdsap_tables import england_wales_age_band_lookup, FLOOR_LEVEL_MAP from recommendations.recommendation_utils import ( @@ -89,6 +88,7 @@ class Property(Definitions): self.number_lighting_outlets = None self.floor_level = None self.number_of_windows = None + self.solar_pv_roof_area = None self.current_adjusted_energy = None self.expected_adjusted_energy = None @@ -830,19 +830,16 @@ class Property(Definitions): extension_count=float(self.data["extension-count"]), ) - def set_solar_panel_area(self, photo_supply_data): + def set_solar_panel_area(self, photo_supply_lookup, floor_area_decile_thresholds): """ Sets the approximate area of the solar panels :return: """ - # Approximate area of the solar panels - solar_panel_area = 1.6 - # Wattage per pan - solar_panel_wattage = 360 - - photo_supply_lookup = photo_supply_data["photo_supply_lookup"] - floor_area_decile_thresholds = photo_supply_data["floor_area_decile_thresholds"] + if (self.insulation_floor_area is None) and (self.pitched_roof_area is None): + raise ValueError( + "Need to set insulation floor area and pitched roof area before setting solar pv roof area" + ) # TODO: Create a class for the solar etl process and make this one of the functions, which applies a different # method depending on the data type @@ -868,4 +865,15 @@ class Property(Definitions): (photo_supply_lookup["is_roof_room"] == self.roof["is_roof_room"]) ] - # n_panels = np.floor(solar_panel_area * ) + if floor_area_decile in photo_supply_matched["floor_area_decile"].values: + photo_supply_matched = photo_supply_matched[ + photo_supply_matched["floor_area_decile"] == floor_area_decile + ] + + percentage_of_roof = photo_supply_matched["photo_supply_median"].mean() + percentage_of_roof = percentage_of_roof / 100 + + self.solar_pv_roof_area = ( + self.insulation_floor_area * percentage_of_roof if self.roof["is_flat"] else + self.pitched_roof_area * percentage_of_roof + ) diff --git a/etl/testing_data/solar_research.py b/etl/testing_data/solar_research.py index 9abacdc3..4e60fa7a 100644 --- a/etl/testing_data/solar_research.py +++ b/etl/testing_data/solar_research.py @@ -81,8 +81,8 @@ def app(): aggregated = results.groupby( [ - "PROPERTY_TYPE", "BUILT_FORM", "TENURE", "is_pitched", "is_roof_room", "is_loft", "is_flat", "is_thatched", - "is_at_rafters", "has_dwelling_above", "CONSTRUCTION_AGE_BAND", "floor_area_decile" + "PROPERTY_TYPE", "BUILT_FORM", "TENURE", "is_pitched", "is_roof_room", "is_flat", + "CONSTRUCTION_AGE_BAND", "floor_area_decile" ], observed=True ).agg( @@ -103,3 +103,10 @@ def app(): bucket_name="retrofit-data-dev", file_key=f"solar_pv_supply/photo_supply_lookup.parquet", ) + + floor_area_decile_thresholds = pd.DataFrame(decile_thresholds, columns=["floor_area_decile_thresholds"]) + save_dataframe_to_s3_parquet( + df=floor_area_decile_thresholds, + bucket_name="retrofit-data-dev", + file_key=f"solar_pv_supply/floor_area_decile_thresholds.parquet", + ) diff --git a/recommendations/Costs.py b/recommendations/Costs.py index 24ea0584..cc993143 100644 --- a/recommendations/Costs.py +++ b/recommendations/Costs.py @@ -18,6 +18,25 @@ regional_labour_variations = [ {"Region": "Northern Ireland", "Adjustment_Factor": 0.76} ] +# This data is based on the MCS database +MCS_SOLAR_PV_COST_DATA = { + "last_updated": "2024-01-04", + "average_cost_per_kwh": 2013.94, + "average_cost_per_kwh-Outer London": 2618.75, + "average_cost_per_kwh-Inner London": 2618.75, + "average_cost_per_kwh-South East England": 2083.33, + "average_cost_per_kwh-South West England": 2113, + "average_cost_per_kwh-East of England": 1973.86, + "average_cost_per_kwh-East Midlands": 1981.86, + "average_cost_per_kwh-West Midlands": 1926.55, + "average_cost_per_kwh-North East England": 2028.49, + "average_cost_per_kwh-North West England": 1620.42, + "average_cost_per_kwh-Yorkshire and the Humber": 2060.9, + "average_cost_per_kwh-Wales": 1898.83, + "average_cost_per_kwh-Scotland": 1967.97, + "average_cost_per_kwh-Northern Ireland": 2126.09, +} + class Costs: """ @@ -811,3 +830,6 @@ class Costs: "labour_cost": labour_cost, "labour_days": labour_days } + + def solar_pv(self, wattage): + pass diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py index addbeb3f..e8b76988 100644 --- a/recommendations/SolarPvRecommendations.py +++ b/recommendations/SolarPvRecommendations.py @@ -1,12 +1,16 @@ +import numpy as np from recommendations.Costs import Costs class SolarPvRecommendations: + # Approximate area of the solar panels + SOLAR_PANEL_AREA = 1.6 + # Wattage per panel + SOLAR_PANEL_WATTAGE = 360 def __init__(self, property_instance): """ :param property_instance: Instance of the Property class, for the home associated to property_id - :param photo_supply_lookup: Lookup table of photo supply percentages """ self.property = property_instance @@ -35,3 +39,8 @@ class SolarPvRecommendations: return # We now have a property which is potentially suitable for solar PV + number_solar_panels = np.floor(self.property.solar_pv_roof_area / self.SOLAR_PANEL_AREA) + solar_panel_capacity = number_solar_panels * self.SOLAR_PANEL_WATTAGE + + # Given the wattage, we estimate the cost of the solar PV system. This is based on the MCS database + # of solar PV installations From c3dbec670399c39072d528d109c99d7ef69e4ff1 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 5 Jan 2024 12:22:20 +0000 Subject: [PATCH 03/26] set up the rest of the solar recommendation --- backend/Property.py | 13 +++++++-- backend/app/plan/router.py | 8 +++++- etl/testing_data/solar_research.py | 2 +- recommendations/Costs.py | 34 +++++++++++++++++++++-- recommendations/SolarPvRecommendations.py | 21 ++++++++++++-- 5 files changed, 69 insertions(+), 9 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index f5325722..0cb295a7 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -221,11 +221,15 @@ class Property(Definitions): setattr(self, attribute, value) - def get_components(self, cleaned): + def get_components(self, cleaned, photo_supply_lookup, floor_area_decile_thresholds): """ Given the cleaning that has been performed, we'll use this to identify the property components, from roof to walls to windows, heating and hot water :param cleaned: This is the dictionary of components found in cleaner.cleaned + :param photo_supply_lookup: This is the lookup table for the photo supply, used to estimate the percentage + of the roof that is suitable for solar panels + :param floor_area_decile_thresholds: This is the decile thresholds for the floor area, used in estimating the + solar pv roof area :return: """ @@ -295,6 +299,9 @@ class Property(Definitions): self.set_floor_type() self.set_floor_level() self.set_windows_count() + self.set_solar_panel_area( + photo_supply_lookup=photo_supply_lookup, floor_area_decile_thresholds=floor_area_decile_thresholds + ) def set_age_band(self): """ @@ -849,7 +856,9 @@ class Property(Definitions): return i # Returns the decile index (0 to 9) return len(thresholds) - floor_area_decile = classify_floor_area(self.floor_area, floor_area_decile_thresholds) + floor_area_decile = classify_floor_area( + self.floor_area, floor_area_decile_thresholds["floor_area_decile_thresholds"].values + ) # Given the photo_supply_lookup, we esimate the percentage of the roof that is suitable for solar panels diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 89347be2..77ee9869 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -67,6 +67,12 @@ async def trigger_plan(body: PlanTriggerRequest): cleaning_data = read_parquet_from_s3( bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet", ) + photo_supply_lookup = read_parquet_from_s3( + bucket_name=get_settings().DATA_BUCKET, file_key="solar_pv_supply/photo_supply_lookup.parquet", + ) + floor_area_decile_thresholds = read_parquet_from_s3( + bucket_name=get_settings().DATA_BUCKET, file_key="solar_pv_supply/floor_area_decile_thresholds.parquet", + ) input_properties = [] for config in plan_input: @@ -129,7 +135,7 @@ async def trigger_plan(body: PlanTriggerRequest): for p in input_properties: # Property recommendations - p.get_components(cleaned) + p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds) # This is temp - this should happen after scoring cleaned_property_data = DataProcessor.apply_averages_cleaning( diff --git a/etl/testing_data/solar_research.py b/etl/testing_data/solar_research.py index 4e60fa7a..8ce8a6ac 100644 --- a/etl/testing_data/solar_research.py +++ b/etl/testing_data/solar_research.py @@ -101,7 +101,7 @@ def app(): save_dataframe_to_s3_parquet( df=aggregated, bucket_name="retrofit-data-dev", - file_key=f"solar_pv_supply/photo_supply_lookup.parquet", + file_key="solar_pv_supply/photo_supply_lookup.parquet", ) floor_area_decile_thresholds = pd.DataFrame(decile_thresholds, columns=["floor_area_decile_thresholds"]) diff --git a/recommendations/Costs.py b/recommendations/Costs.py index cc993143..654dd7a8 100644 --- a/recommendations/Costs.py +++ b/recommendations/Costs.py @@ -831,5 +831,35 @@ class Costs: "labour_days": labour_days } - def solar_pv(self, wattage): - pass + def solar_pv(self, wattage: float): + + """ + Calculates the total cost for solar PV based data provided by the MCS dashboard, which contains + costing data for installations of renewable and clean energy measures. + + The data in the dashboard is filtered on domestic building installations and then the data across the + various regions is manually collected. There is currently no automated way to get the data from the MCS + dashboard + :param wattage: + :return: + """ + + # Get the cost data relevant to the region + regional_cost = MCS_SOLAR_PV_COST_DATA["-".join(["average_cost_per_kwh", self.region])] + + kw = wattage / 1000 + total_cost = kw * regional_cost + + subtotal_before_vat = total_cost / (1 + self.VAT_RATE) + vat = total_cost - subtotal_before_vat + + # Labour hours are based on estimates from online research but an average team seems to consist of 3 people + # and most jobs take around 2 days. Assuming an 8 hour day for 3 people across 2 days, gives us 72 hours of + # labour + return { + "total": total_cost, + "subtotal": subtotal_before_vat, + "vat": vat, + "labour_hours": 72, + "labour_days": 2, + } diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py index e8b76988..ebe774bf 100644 --- a/recommendations/SolarPvRecommendations.py +++ b/recommendations/SolarPvRecommendations.py @@ -16,7 +16,7 @@ class SolarPvRecommendations: self.property = property_instance self.costs = Costs(self.property) - self.recommendations = [] + self.recommendation = [] def recommend(self): """ @@ -35,12 +35,27 @@ class SolarPvRecommendations: None, 0, self.property.DATA_ANOMALY_MATCHES ] - if not is_valid_property_type or not is_valid_roof_type or has_no_existing_solar_pv: + if not is_valid_property_type or not is_valid_roof_type or not has_no_existing_solar_pv: return # We now have a property which is potentially suitable for solar PV number_solar_panels = np.floor(self.property.solar_pv_roof_area / self.SOLAR_PANEL_AREA) - solar_panel_capacity = number_solar_panels * self.SOLAR_PANEL_WATTAGE + solar_panel_wattage = number_solar_panels * self.SOLAR_PANEL_WATTAGE # Given the wattage, we estimate the cost of the solar PV system. This is based on the MCS database # of solar PV installations + cost_result = self.costs.solar_pv(wattage=solar_panel_wattage) + + kw = int(np.round(solar_panel_wattage / 1000)) + + self.recommendation = [ + { + "parts": [], + "type": "solar_pv", + "description": f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) panel system on the roof", + "starting_u_value": None, + "new_u_value": None, + "sap_points": None, + **cost_result, + } + ] From 74a4cc1068978a91a1076d0e920301355bc58a32 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 5 Jan 2024 12:50:27 +0000 Subject: [PATCH 04/26] Added solar recommendations to Recommendation class --- backend/Property.py | 3 +++ recommendations/Recommendations.py | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/backend/Property.py b/backend/Property.py index 0cb295a7..45c0b406 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -89,6 +89,7 @@ class Property(Definitions): self.floor_level = None self.number_of_windows = None self.solar_pv_roof_area = None + self.solar_pv_percentage = None self.current_adjusted_energy = None self.expected_adjusted_energy = None @@ -886,3 +887,5 @@ class Property(Definitions): self.insulation_floor_area * percentage_of_roof if self.roof["is_flat"] else self.pitched_roof_area * percentage_of_roof ) + + self.solar_pv_percentage = percentage_of_roof diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index a5d1f35c..2b35ffea 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -6,6 +6,7 @@ from recommendations.RoofRecommendations import RoofRecommendations from recommendations.VentilationRecommendations import VentilationRecommendations from recommendations.FireplaceRecommendations import FireplaceRecommendations from recommendations.LightingRecommendations import LightingRecommendations +from recommendations.SolarPvRecommendations import SolarPvRecommendations from recommendations.WindowsRecommendations import WindowsRecommendations from backend.ml_models.AnnualBillSavings import AnnualBillSavings @@ -37,6 +38,7 @@ class Recommendations: self.fireplace_recommender = FireplaceRecommendations(property_instance=property_instance) self.lighting_recommender = LightingRecommendations(property_instance=property_instance, materials=materials) self.windows_recommender = WindowsRecommendations(property_instance=property_instance, materials=materials) + self.solar_recommender = SolarPvRecommendations(property_instance=property_instance) def recommend(self): @@ -84,6 +86,11 @@ class Recommendations: if self.windows_recommender.recommendation: property_recommendations.append(self.windows_recommender.recommendation) + # Solar recommendations + self.solar_recommender.recommend() + if self.solar_recommender.recommendation: + property_recommendations.append(self.solar_recommender.recommendation) + # We insert temporary ids into the recommendations which is important for the optimiser later property_recommendations = self.insert_temp_recommendation_id(property_recommendations) From 49b0a1d9011ef146134d46cb14906e18eabcc12d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 5 Jan 2024 13:04:07 +0000 Subject: [PATCH 05/26] Added in rest of solar --- backend/app/plan/utils.py | 5 ++++- etl/testing_data/solar_research.py | 2 +- recommendations/Costs.py | 5 ++++- recommendations/SolarPvRecommendations.py | 1 + 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/backend/app/plan/utils.py b/backend/app/plan/utils.py index b82be297..fe7939f3 100644 --- a/backend/app/plan/utils.py +++ b/backend/app/plan/utils.py @@ -194,12 +194,15 @@ def create_recommendation_scoring_data( else: raise ValueError("Invalid glazing type - implement me") + if recommendation["type"] == "solar_pv": + scoring_dict["PHOTO_SUPPLY_ENDING"] = property.solar_pv_percentage + if recommendation["type"] not in [ "mechanical_ventilation", "sealing_open_fireplace", "low_energy_lighting", "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation", "loft_insulation", "room_roof_insulation", "flat_roof_insulation", "solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation", - "windows_glazing" + "windows_glazing", "solar_pv" ]: raise NotImplementedError("Implement me") diff --git a/etl/testing_data/solar_research.py b/etl/testing_data/solar_research.py index 8ce8a6ac..e66e992c 100644 --- a/etl/testing_data/solar_research.py +++ b/etl/testing_data/solar_research.py @@ -35,7 +35,7 @@ def app(): data = df[ ["UPRN", "PROPERTY_TYPE", "TENURE", "BUILT_FORM", "ROOF_DESCRIPTION", "PHOTO_SUPPLY", "TOTAL_FLOOR_AREA", - "CONSTRUCTION_AGE_BAND"] + "CONSTRUCTION_AGE_BAND", "SOLAR_WATER_HEATING_FLAG"] ].copy() data["PHOTO_SUPPLY"] = data["PHOTO_SUPPLY"].fillna(0) data = data[data["PHOTO_SUPPLY"] != 0] diff --git a/recommendations/Costs.py b/recommendations/Costs.py index 654dd7a8..8dbb9cc9 100644 --- a/recommendations/Costs.py +++ b/recommendations/Costs.py @@ -840,7 +840,10 @@ class Costs: The data in the dashboard is filtered on domestic building installations and then the data across the various regions is manually collected. There is currently no automated way to get the data from the MCS dashboard - :param wattage: + + Price can also be benchmarked against this checkatrade article: + https://www.checkatrade.com/blog/cost-guides/cost-of-solar-panel-installation/ + :param wattage: Peak wattage of the solar PV system :return: """ diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py index ebe774bf..4dcf6104 100644 --- a/recommendations/SolarPvRecommendations.py +++ b/recommendations/SolarPvRecommendations.py @@ -57,5 +57,6 @@ class SolarPvRecommendations: "new_u_value": None, "sap_points": None, **cost_result, + "photo_supply": self.property.solar_pv_percentage # This is required for simulating the SAP impact } ] From c5361706efc9d383b4bab9f0a6db9ecc54135af7 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 5 Jan 2024 15:25:48 +0000 Subject: [PATCH 06/26] setting up solar client --- backend/app/plan/router.py | 40 +++---- backend/app/plan/utils.py | 2 +- backend/app/utils.py | 13 --- etl/solar/SolarPhotoSupply.py | 136 ++++++++++++++++++++++ etl/solar/app.py | 34 ++++++ recommendations/SolarPvRecommendations.py | 4 +- 6 files changed, 194 insertions(+), 35 deletions(-) create mode 100644 etl/solar/SolarPhotoSupply.py create mode 100644 etl/solar/app.py diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 77ee9869..d28e6518 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -23,7 +23,7 @@ from backend.app.db.models.portfolio import rating_lookup from backend.app.dependencies import validate_token from backend.app.plan.schemas import PlanTriggerRequest from backend.app.plan.utils import create_recommendation_scoring_data, get_cleaned -from backend.app.utils import epc_to_sap_lower_bound, read_csv_from_s3, read_parquet_from_s3, sap_to_epc +from backend.app.utils import epc_to_sap_lower_bound, read_csv_from_s3, sap_to_epc from backend.ml_models.api import ModelApi from backend.Property import Property @@ -34,7 +34,7 @@ from recommendations.optimiser.GainOptimiser import GainOptimiser from recommendations.optimiser.optimiser_functions import prepare_input_measures from recommendations.Recommendations import Recommendations from utils.logger import setup_logger -from utils.s3 import read_dataframe_from_s3_parquet +from utils.s3 import read_dataframe_from_s3_parquet, read_dataframe_from_s3_parquet from backend.ml_models.Valuation import PropertyValuation from backend.ml_models.AnnualBillSavings import AnnualBillSavings @@ -61,19 +61,6 @@ async def trigger_plan(body: PlanTriggerRequest): logger.info("Getting the inputs") plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path) - uprn_filenames = read_dataframe_from_s3_parquet( - bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet" - ) - cleaning_data = read_parquet_from_s3( - bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet", - ) - photo_supply_lookup = read_parquet_from_s3( - bucket_name=get_settings().DATA_BUCKET, file_key="solar_pv_supply/photo_supply_lookup.parquet", - ) - floor_area_decile_thresholds = read_parquet_from_s3( - bucket_name=get_settings().DATA_BUCKET, file_key="solar_pv_supply/floor_area_decile_thresholds.parquet", - ) - input_properties = [] for config in plan_input: # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly @@ -115,17 +102,30 @@ async def trigger_plan(body: PlanTriggerRequest): if not input_properties: return Response(status_code=204) - logger.info("Getting spatial data") - for p in input_properties: - p.get_spatial_data(uprn_filenames) - # The materials data could be cached or local so we don't need to make # consistent requests to the backend for # the same data - logger.info("Reading in materials and cleaned datasets") + logger.info("Reading in data sources required for the engine") materials = get_materials(session) cleaned = get_cleaned() + uprn_filenames = read_dataframe_from_s3_parquet( + bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet" + ) + cleaning_data = read_dataframe_from_s3_parquet( + bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet", + ) + photo_supply_lookup = read_dataframe_from_s3_parquet( + bucket_name=get_settings().DATA_BUCKET, file_key="solar_pv_supply/photo_supply_lookup.parquet", + ) + floor_area_decile_thresholds = read_dataframe_from_s3_parquet( + bucket_name=get_settings().DATA_BUCKET, file_key="solar_pv_supply/floor_area_decile_thresholds.parquet", + ) + + logger.info("Getting spatial data") + for p in input_properties: + p.get_spatial_data(uprn_filenames) + logger.info("Getting components and epc recommendations") recommendations = {} diff --git a/backend/app/plan/utils.py b/backend/app/plan/utils.py index fe7939f3..7672c316 100644 --- a/backend/app/plan/utils.py +++ b/backend/app/plan/utils.py @@ -195,7 +195,7 @@ def create_recommendation_scoring_data( raise ValueError("Invalid glazing type - implement me") if recommendation["type"] == "solar_pv": - scoring_dict["PHOTO_SUPPLY_ENDING"] = property.solar_pv_percentage + scoring_dict["PHOTO_SUPPLY_ENDING"] = recommendation["photo_supply"] if recommendation["type"] not in [ "mechanical_ventilation", "sealing_open_fireplace", "low_energy_lighting", diff --git a/backend/app/utils.py b/backend/app/utils.py index d912a94a..9a03ab21 100644 --- a/backend/app/utils.py +++ b/backend/app/utils.py @@ -121,19 +121,6 @@ def epc_to_sap_lower_bound(epc: str): raise ValueError("EPC rating should be between A and G") -def read_parquet_from_s3(bucket_name, file_key): - client = boto3.client('s3') - - # Get the object - s3_object = client.get_object(Bucket=bucket_name, Key=file_key) - - # Read the CSV body into a DataFrame - csv_body = s3_object["Body"].read() - df = pd.read_parquet(BytesIO(csv_body)) - - return df - - def save_dataframe_to_s3_parquet(df, bucket_name, file_key): """ Save a pandas DataFrame to S3 as a Parquet file. diff --git a/etl/solar/SolarPhotoSupply.py b/etl/solar/SolarPhotoSupply.py new file mode 100644 index 00000000..c6e2f9cb --- /dev/null +++ b/etl/solar/SolarPhotoSupply.py @@ -0,0 +1,136 @@ +import pandas as pd +from tqdm import tqdm +from utils.s3 import save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet +from utils.logger import setup_logger + +logger = setup_logger() + + +class SolarPhotoSupply: + DATASET_COLUMNS = [ + "UPRN", "PROPERTY_TYPE", "TENURE", "BUILT_FORM", "ROOF_DESCRIPTION", "PHOTO_SUPPLY", "TOTAL_FLOOR_AREA", + "CONSTRUCTION_AGE_BAND", "SOLAR_WATER_HEATING_FLAG" + ] + + def __init__(self, file_directories, cleaned_lookup): + self.file_directories = file_directories + + self.results = [] + self.decile_thresholds = None + + self.roof_lookup = pd.DataFrame(cleaned_lookup["roof-description"]) + + self.photo_supply_lookup = pd.DataFrame() + self.floor_area_decile_thresholds = pd.DataFrame() + + def create_dataset(self): + + results = [] + + logger.info("Creating solar photo supply dataset") + for dir in tqdm(self.file_directories): + filepath = dir / "certificates.csv" + df = pd.read_csv(filepath, low_memory=False) + df = df[~pd.isnull(df["UPRN"])] + df["UPRN"] = df["UPRN"].astype(int).astype(str) + # Drop rows that have a missing PROPERTY_TYPE, BUILT_FORM, CONSTRUCTION_AGE_BAND, TOTAL_FLOOR_AREA + for col in ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA"]: + df = df[~pd.isnull(df[col])] + # Take newest LODGEMENT_DATE per UPRN + df = df.sort_values(by="LODGEMENT_DATE", ascending=False).drop_duplicates(subset=["UPRN"]) + + data = df[self.DATASET_COLUMNS].copy() + data["PHOTO_SUPPLY"] = data["PHOTO_SUPPLY"].fillna(0) + data = data[data["PHOTO_SUPPLY"] != 0] + results.append(data) + + self.results = pd.concat(results) + + # Convert total floor area to deciles + self.decile_thresholds = self.results["TOTAL_FLOOR_AREA"].quantile( + [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] + ).values + + self.results["floor_area_decile"] = pd.cut( + self.results["TOTAL_FLOOR_AREA"], + bins=[0] + list(self.decile_thresholds) + [float('inf')], + labels=False, + include_lowest=True + ) + + # Convert tenure to lower + self.results["TENURE"] = self.results["TENURE"].str.lower() + + self.results = self.results.merge( + self.roof_lookup.drop( + columns=[ + "clean_description", "thermal_transmittance", "thermal_transmittance_unit", "insulation_thickness", + "is_assumed" + ] + ), + left_on="ROOF_DESCRIPTION", + right_on="original_description", + how="left" + ) + + self.photo_supply_lookup = self.results.groupby( + [ + "PROPERTY_TYPE", "BUILT_FORM", "TENURE", "is_pitched", "is_roof_room", "is_flat", + "CONSTRUCTION_AGE_BAND", "floor_area_decile" + ], + observed=True + ).agg( + { + "PHOTO_SUPPLY": ["median", "mean"], + } + ).reset_index() + + self.photo_supply_lookup.columns = ['_'.join(col).strip() for col in self.photo_supply_lookup.columns.values] + # Remove trailing underscore from columns + self.photo_supply_lookup.columns = [ + col[:-1] if col.endswith("_") else col for col in self.photo_supply_lookup.columns.values + ] + # Convert columns to lowercase + self.photo_supply_lookup.columns = [col.lower() for col in self.photo_supply_lookup.columns.values] + + self.floor_area_decile_thresholds = pd.DataFrame( + self.decile_thresholds, + columns=["floor_area_decile_thresholds"] + ) + + @staticmethod + def classify_floor_area(new_area, thresholds): + + for i, threshold in enumerate(thresholds): + if new_area <= threshold: + return i # Returns the decile index (0 to 9) + return len(thresholds) + + def save(self): + if self.photo_supply_lookup.empty: + raise ValueError("No data to save") + + # Store this data in s3 as a parquet file + + save_dataframe_to_s3_parquet( + df=self.photo_supply_lookup, + bucket_name="retrofit-data-dev", + file_key="solar_pv_supply/photo_supply_lookup.parquet", + ) + + save_dataframe_to_s3_parquet( + df=self.floor_area_decile_thresholds, + bucket_name="retrofit-data-dev", + file_key=f"solar_pv_supply/floor_area_decile_thresholds.parquet", + ) + + @staticmethod + def load(bucket): + photo_supply_lookup = read_dataframe_from_s3_parquet( + bucket_name=bucket, file_key="solar_pv_supply/photo_supply_lookup.parquet", + ) + floor_area_decile_thresholds = read_dataframe_from_s3_parquet( + bucket_name=bucket, file_key="solar_pv_supply/floor_area_decile_thresholds.parquet", + ) + + return photo_supply_lookup, floor_area_decile_thresholds diff --git a/etl/solar/app.py b/etl/solar/app.py new file mode 100644 index 00000000..29802e72 --- /dev/null +++ b/etl/solar/app.py @@ -0,0 +1,34 @@ +import pandas as pd +from pathlib import Path +from tqdm import tqdm +from etl.epc.property_change_app import get_cleaned +from etl.solar.SolarPhotoSupply import SolarPhotoSupply +from utils.s3 import save_dataframe_to_s3_parquet + +DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates" + + +def app(): + """ + This code reads in the EPC data and attempt to produce a reasonable figure for the photo-supply variable, which + is the following: + "Percentage of photovoltaic area as a percentage of total roof area. 0% indicates that a Photovoltaic Supply + is not present in the property." + + When recommending solar, we want to simulate the retrofit by increasing this value from 0, so we need a sensible + figure to increase this to. This script will pull the data for that, to allow us to try and deduce what + a sensible figure would be + :return: + """ + + directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()] + cleaned_lookup = get_cleaned() + + solar_data_client = SolarPhotoSupply( + file_directories=directories, + cleaned_lookup=cleaned_lookup + ) + + solar_data_client.create_dataset() + + solar_data_client.save() diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py index 4dcf6104..5163c1cb 100644 --- a/recommendations/SolarPvRecommendations.py +++ b/recommendations/SolarPvRecommendations.py @@ -57,6 +57,8 @@ class SolarPvRecommendations: "new_u_value": None, "sap_points": None, **cost_result, - "photo_supply": self.property.solar_pv_percentage # This is required for simulating the SAP impact + # This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we scale + # back up here + "photo_supply": 100 * self.property.solar_pv_percentage } ] From e9d3577cf61d36170ee3f6452a586efce202a467 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 5 Jan 2024 16:07:23 +0000 Subject: [PATCH 07/26] set up solar etc process and deleted research script --- etl/solar/SolarPhotoSupply.py | 1 + etl/solar/app.py | 3 - etl/testing_data/solar_research.py | 112 ----------------------------- 3 files changed, 1 insertion(+), 115 deletions(-) delete mode 100644 etl/testing_data/solar_research.py diff --git a/etl/solar/SolarPhotoSupply.py b/etl/solar/SolarPhotoSupply.py index c6e2f9cb..dadb71f0 100644 --- a/etl/solar/SolarPhotoSupply.py +++ b/etl/solar/SolarPhotoSupply.py @@ -110,6 +110,7 @@ class SolarPhotoSupply: if self.photo_supply_lookup.empty: raise ValueError("No data to save") + logger.info("Storing outputs to S3") # Store this data in s3 as a parquet file save_dataframe_to_s3_parquet( diff --git a/etl/solar/app.py b/etl/solar/app.py index 29802e72..50a3d282 100644 --- a/etl/solar/app.py +++ b/etl/solar/app.py @@ -1,9 +1,6 @@ -import pandas as pd from pathlib import Path -from tqdm import tqdm from etl.epc.property_change_app import get_cleaned from etl.solar.SolarPhotoSupply import SolarPhotoSupply -from utils.s3 import save_dataframe_to_s3_parquet DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates" diff --git a/etl/testing_data/solar_research.py b/etl/testing_data/solar_research.py deleted file mode 100644 index e66e992c..00000000 --- a/etl/testing_data/solar_research.py +++ /dev/null @@ -1,112 +0,0 @@ -import pandas as pd -from pathlib import Path -from tqdm import tqdm -from etl.epc.property_change_app import get_cleaned -from utils.s3 import save_dataframe_to_s3_parquet - -DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates" - - -def app(): - """ - This code reads in the EPC data and attempt to produce a reasonable figure for the photo-supply variable, which - is the following: - "Percentage of photovoltaic area as a percentage of total roof area. 0% indicates that a Photovoltaic Supply - is not present in the property." - - When recommending solar, we want to simulate the retrofit by increasing this value from 0, so we need a sensible - figure to increase this to. This script will pull the data for that, to allow us to try and deduce what - a sensible figure would be - :return: - """ - - directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()] - results = [] - for dir in tqdm(directories): - filepath = dir / "certificates.csv" - df = pd.read_csv(filepath, low_memory=False) - df = df[~pd.isnull(df["UPRN"])] - df["UPRN"] = df["UPRN"].astype(int).astype(str) - # Drop rows that have a missing PROPERTY_TYPE, BUILT_FORM, CONSTRUCTION_AGE_BAND, TOTAL_FLOOR_AREA - for col in ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA"]: - df = df[~pd.isnull(df[col])] - # Take newest LODGEMENT_DATE per UPRN - df = df.sort_values(by="LODGEMENT_DATE", ascending=False).drop_duplicates(subset=["UPRN"]) - - data = df[ - ["UPRN", "PROPERTY_TYPE", "TENURE", "BUILT_FORM", "ROOF_DESCRIPTION", "PHOTO_SUPPLY", "TOTAL_FLOOR_AREA", - "CONSTRUCTION_AGE_BAND", "SOLAR_WATER_HEATING_FLAG"] - ].copy() - data["PHOTO_SUPPLY"] = data["PHOTO_SUPPLY"].fillna(0) - data = data[data["PHOTO_SUPPLY"] != 0] - results.append(data) - - results = pd.concat(results) - - # Convert total floor area to deciles - decile_thresholds = results["TOTAL_FLOOR_AREA"].quantile([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]).values - - def classify_floor_area(new_area, thresholds): - for i, threshold in enumerate(thresholds): - if new_area <= threshold: - return i # Returns the decile index (0 to 9) - return len(thresholds) - - # Assuming 'new_data' is your new DataFrame with floor area data - results["floor_area_decile"] = pd.cut( - results["TOTAL_FLOOR_AREA"], - bins=[0] + list(decile_thresholds) + [float('inf')], - labels=False, - include_lowest=True - ) - - # Convert tenure to lower - results["TENURE"] = results["TENURE"].str.lower() - - # Append on the roof details - cleaned_lookup = get_cleaned() - lookup = pd.DataFrame(cleaned_lookup["roof-description"]) - - results = results.merge( - lookup.drop( - columns=[ - "clean_description", "thermal_transmittance", "thermal_transmittance_unit", "insulation_thickness", - "is_assumed" - ] - ), - left_on="ROOF_DESCRIPTION", - right_on="original_description", - how="left" - ) - - aggregated = results.groupby( - [ - "PROPERTY_TYPE", "BUILT_FORM", "TENURE", "is_pitched", "is_roof_room", "is_flat", - "CONSTRUCTION_AGE_BAND", "floor_area_decile" - ], - observed=True - ).agg( - { - "PHOTO_SUPPLY": ["median", "mean"], - } - ).reset_index() - - aggregated.columns = ['_'.join(col).strip() for col in aggregated.columns.values] - # Remove trailing underscore from columns - aggregated.columns = [col[:-1] if col.endswith("_") else col for col in aggregated.columns.values] - # Convert columns to lowercase - aggregated.columns = [col.lower() for col in aggregated.columns.values] - - # Store this data in s3 as a parquet file - save_dataframe_to_s3_parquet( - df=aggregated, - bucket_name="retrofit-data-dev", - file_key="solar_pv_supply/photo_supply_lookup.parquet", - ) - - floor_area_decile_thresholds = pd.DataFrame(decile_thresholds, columns=["floor_area_decile_thresholds"]) - save_dataframe_to_s3_parquet( - df=floor_area_decile_thresholds, - bucket_name="retrofit-data-dev", - file_key=f"solar_pv_supply/floor_area_decile_thresholds.parquet", - ) From ce2229f817e1c856f1d6750fc7925410dc4935ec Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 5 Jan 2024 16:18:21 +0000 Subject: [PATCH 08/26] creating the filter_photo_supply_lookup method --- backend/app/plan/router.py | 8 +--- backend/ml_models/api.py | 5 +-- etl/solar/SolarPhotoSupply.py | 85 ++++++++++++++++++++++++++++++++++- 3 files changed, 88 insertions(+), 10 deletions(-) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index d28e6518..1a499d27 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -29,6 +29,7 @@ from backend.ml_models.api import ModelApi from backend.Property import Property from etl.epc.DataProcessor import DataProcessor from etl.epc.settings import COLUMNS_TO_MERGE_ON +from etl.solar.SolarPhotoSupply import SolarPhotoSupply from recommendations.optimiser.CostOptimiser import CostOptimiser from recommendations.optimiser.GainOptimiser import GainOptimiser from recommendations.optimiser.optimiser_functions import prepare_input_measures @@ -115,12 +116,7 @@ async def trigger_plan(body: PlanTriggerRequest): cleaning_data = read_dataframe_from_s3_parquet( bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet", ) - photo_supply_lookup = read_dataframe_from_s3_parquet( - bucket_name=get_settings().DATA_BUCKET, file_key="solar_pv_supply/photo_supply_lookup.parquet", - ) - floor_area_decile_thresholds = read_dataframe_from_s3_parquet( - bucket_name=get_settings().DATA_BUCKET, file_key="solar_pv_supply/floor_area_decile_thresholds.parquet", - ) + photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket=get_settings().DATA_BUCKET) logger.info("Getting spatial data") for p in input_properties: diff --git a/backend/ml_models/api.py b/backend/ml_models/api.py index e6947906..bc09f26c 100644 --- a/backend/ml_models/api.py +++ b/backend/ml_models/api.py @@ -2,8 +2,7 @@ import pandas as pd import requests from requests.exceptions import RequestException from utils.logger import setup_logger -from utils.s3 import save_dataframe_to_s3_parquet -from backend.app.utils import read_parquet_from_s3 +from utils.s3 import save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet logger = setup_logger() @@ -125,7 +124,7 @@ class ModelApi: # Retrieve the predictions predictions_df = pd.DataFrame( - read_parquet_from_s3( + read_dataframe_from_s3_parquet( bucket_name=predictions_bucket, file_key=response["storage_filepath"].split(predictions_bucket + "/")[1] ) diff --git a/etl/solar/SolarPhotoSupply.py b/etl/solar/SolarPhotoSupply.py index dadb71f0..9fad1831 100644 --- a/etl/solar/SolarPhotoSupply.py +++ b/etl/solar/SolarPhotoSupply.py @@ -13,6 +13,13 @@ class SolarPhotoSupply: ] def __init__(self, file_directories, cleaned_lookup): + """ + Initialize the SolarPhotoSupply class with file directories and a cleaned lookup. Currently, this class + just works with locally stored data, but this could be extended to work with data stored in S3. + + :param file_directories: A list of directories where files are stored. + :param cleaned_lookup: A dictionary containing cleaned lookup data. + """ self.file_directories = file_directories self.results = [] @@ -24,7 +31,10 @@ class SolarPhotoSupply: self.floor_area_decile_thresholds = pd.DataFrame() def create_dataset(self): - + """ + Create a dataset from the provided file directories. This method processes the data files, + applies transformations, and aggregates data into a useful format. + """ results = [] logger.info("Creating solar photo supply dataset") @@ -100,6 +110,13 @@ class SolarPhotoSupply: @staticmethod def classify_floor_area(new_area, thresholds): + """ + Classify a given floor area into a decile based on provided thresholds. + + :param new_area: The new floor area to be classified. + :param thresholds: A list of thresholds used for classification. + :return: An integer representing the decile index. + """ for i, threshold in enumerate(thresholds): if new_area <= threshold: @@ -107,6 +124,10 @@ class SolarPhotoSupply: return len(thresholds) def save(self): + """ + Save the processed data to an S3 bucket in the parquet format. This method also handles + logging and validation to ensure data is present before saving. + """ if self.photo_supply_lookup.empty: raise ValueError("No data to save") @@ -127,6 +148,12 @@ class SolarPhotoSupply: @staticmethod def load(bucket): + """ + Load datasets from an S3 bucket. + + :param bucket: The name of the S3 bucket to load data from. + :return: A tuple containing photo supply lookup and floor area decile thresholds dataframes. + """ photo_supply_lookup = read_dataframe_from_s3_parquet( bucket_name=bucket, file_key="solar_pv_supply/photo_supply_lookup.parquet", ) @@ -135,3 +162,59 @@ class SolarPhotoSupply: ) return photo_supply_lookup, floor_area_decile_thresholds + + @classmethod + def filter_photo_supply_lookup( + cls, + photo_supply_lookup: pd.DataFrame, + floor_area_decile_thresholds: pd.DataFrame, + tenure: str, + built_form: str, + property_type: str, + construction_age_band: str, + is_flat: bool, + is_pitched: bool, + is_roof_room: bool, + floor_area: float + ): + + """ + Filter the photo supply lookup to find the most appropriate photo supply for a given property. + :param photo_supply_lookup: The photo supply lookup dataframe. + :param floor_area_decile_thresholds: The floor area decile thresholds dataframe. + :param tenure: The tenure of the property. + :param built_form: The built form of the property. + :param property_type: The property type of the property. + :param construction_age_band: The construction age band of the property. + :param is_flat: Whether the property has a flat roof. + :param is_pitched: Whether the property has a pitched roof. + :param is_roof_room: Whether the property has a roof room. + :param floor_area: The floor area of the property. + :return: + """ + + # Convert the tenure to lower case, as is done in the creation of the dataset + tenure = tenure.lower() + photo_supply_matched = photo_supply_lookup[ + (photo_supply_lookup["tenure"] == tenure) & + (photo_supply_lookup["built_form"] == built_form) & + (photo_supply_lookup["property_type"] == property_type) & + (photo_supply_lookup["construction_age_band"] == construction_age_band) & + (photo_supply_lookup["is_flat"] == is_flat) & + (photo_supply_lookup["is_pitched"] == is_pitched) & + (photo_supply_lookup["is_roof_room"] == is_roof_room) + ] + + if photo_supply_matched.empty: + raise ValueError("No photo supply matched") + + floor_area_decile = cls.classify_floor_area( + floor_area, floor_area_decile_thresholds["floor_area_decile_thresholds"].values + ) + + if floor_area_decile in photo_supply_matched["floor_area_decile"].values: + photo_supply_matched = photo_supply_matched[ + photo_supply_matched["floor_area_decile"] == floor_area_decile + ] + + return photo_supply_matched From 7c532d3c87d01fd4760c37da226310db98d3ad9c Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 5 Jan 2024 16:23:41 +0000 Subject: [PATCH 09/26] implementing filter_photo_supply_lookup in property class --- backend/Property.py | 41 ++++++++++++----------------------------- 1 file changed, 12 insertions(+), 29 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index 45c0b406..3dbcc2b8 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -8,6 +8,7 @@ import pandas as pd from etl.epc.DataProcessor import DataProcessor from etl.epc.settings import POTENTIAL_COLUMNS, EFFICIENCY_FEATURES, BUILT_FORM_REMAP from etl.epc_clean.epc_attributes.all_cleaners import all_cleaner_map +from etl.solar.SolarPhotoSupply import SolarPhotoSupply from utils.logger import setup_logger from utils.s3 import read_dataframe_from_s3_parquet from BaseUtility import Definitions @@ -849,37 +850,19 @@ class Property(Definitions): "Need to set insulation floor area and pitched roof area before setting solar pv roof area" ) - # TODO: Create a class for the solar etl process and make this one of the functions, which applies a different - # method depending on the data type - def classify_floor_area(new_area, thresholds): - for i, threshold in enumerate(thresholds): - if new_area <= threshold: - return i # Returns the decile index (0 to 9) - return len(thresholds) - - floor_area_decile = classify_floor_area( - self.floor_area, floor_area_decile_thresholds["floor_area_decile_thresholds"].values + photo_supply_matched = SolarPhotoSupply.filter_photo_supply_lookup( + photo_supply_lookup=photo_supply_lookup, + floor_area_decile_thresholds=floor_area_decile_thresholds, + tenure=self.data["tenure"], + built_form=self.data["built-form"], + property_type=self.data["property-type"], + construction_age_band=self.construction_age_band, + is_flat=self.roof["is_flat"], + is_pitched=self.roof["is_pitched"], + is_roof_room=self.roof["is_roof_room"], + floor_area=self.floor_area ) - # Given the photo_supply_lookup, we esimate the percentage of the roof that is suitable for solar panels - - # TODO: Move this to the ETL process, since we need to know that tenure should be lower - tenure = self.data["tenure"].lower() - photo_supply_matched = photo_supply_lookup[ - (photo_supply_lookup["tenure"] == tenure) & - (photo_supply_lookup["built_form"] == self.data["built-form"]) & - (photo_supply_lookup["property_type"] == self.data["property-type"]) & - (photo_supply_lookup["construction_age_band"] == self.construction_age_band) & - (photo_supply_lookup["is_flat"] == self.roof["is_flat"]) & - (photo_supply_lookup["is_pitched"] == self.roof["is_pitched"]) & - (photo_supply_lookup["is_roof_room"] == self.roof["is_roof_room"]) - ] - - if floor_area_decile in photo_supply_matched["floor_area_decile"].values: - photo_supply_matched = photo_supply_matched[ - photo_supply_matched["floor_area_decile"] == floor_area_decile - ] - percentage_of_roof = photo_supply_matched["photo_supply_median"].mean() percentage_of_roof = percentage_of_roof / 100 From 7740c31874d500f93ecaaee53f4a626f7c1b60a6 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 5 Jan 2024 16:37:05 +0000 Subject: [PATCH 10/26] set up tests for TestSolarPhotoSupply --- backend/app/plan/router.py | 2 +- etl/solar/SolarPhotoSupply.py | 6 +++++- etl/solar/tests/test_solar_photo_supply.py | 25 ++++++++++++++++++++++ 3 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 etl/solar/tests/test_solar_photo_supply.py diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 1a499d27..217672fd 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -35,7 +35,7 @@ from recommendations.optimiser.GainOptimiser import GainOptimiser from recommendations.optimiser.optimiser_functions import prepare_input_measures from recommendations.Recommendations import Recommendations from utils.logger import setup_logger -from utils.s3 import read_dataframe_from_s3_parquet, read_dataframe_from_s3_parquet +from utils.s3 import read_dataframe_from_s3_parquet from backend.ml_models.Valuation import PropertyValuation from backend.ml_models.AnnualBillSavings import AnnualBillSavings diff --git a/etl/solar/SolarPhotoSupply.py b/etl/solar/SolarPhotoSupply.py index 9fad1831..6a225b5a 100644 --- a/etl/solar/SolarPhotoSupply.py +++ b/etl/solar/SolarPhotoSupply.py @@ -25,7 +25,7 @@ class SolarPhotoSupply: self.results = [] self.decile_thresholds = None - self.roof_lookup = pd.DataFrame(cleaned_lookup["roof-description"]) + self.roof_lookup = pd.DataFrame(cleaned_lookup.get("roof-description")) self.photo_supply_lookup = pd.DataFrame() self.floor_area_decile_thresholds = pd.DataFrame() @@ -35,6 +35,10 @@ class SolarPhotoSupply: Create a dataset from the provided file directories. This method processes the data files, applies transformations, and aggregates data into a useful format. """ + + if self.roof_lookup.empty: + raise ValueError("No roof lookup data") + results = [] logger.info("Creating solar photo supply dataset") diff --git a/etl/solar/tests/test_solar_photo_supply.py b/etl/solar/tests/test_solar_photo_supply.py new file mode 100644 index 00000000..79d39fc4 --- /dev/null +++ b/etl/solar/tests/test_solar_photo_supply.py @@ -0,0 +1,25 @@ +import unittest +from etl.solar.SolarPhotoSupply import SolarPhotoSupply + + +class TestSolarPhotoSupply(unittest.TestCase): + def test_classify_floor_area(self): + # Setup + thresholds = [10, 20, 30, 40, 50] + solar_photo_supply = SolarPhotoSupply([], {}) + + # Test Case 1: Valid floor area + floor_area = 25 + expected_decile = 2 + result = solar_photo_supply.classify_floor_area(floor_area, thresholds) + self.assertEqual(result, expected_decile, "Decile classification did not match expected result") + + # Test Case 2: Out of range floor area + floor_area = 60 + expected_decile = len(thresholds) + result = solar_photo_supply.classify_floor_area(floor_area, thresholds) + self.assertEqual(result, expected_decile, "Decile classification for out of range value is incorrect") + + +if __name__ == '__main__': + unittest.main() From 9c941233661163e87d139e59c4f916bb0d67eaee Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 5 Jan 2024 16:50:06 +0000 Subject: [PATCH 11/26] added solar etl client unit tests --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- etl/solar/tests/test_solar_photo_supply.py | 84 ++++++++++++++++++++++ 3 files changed, 86 insertions(+), 2 deletions(-) diff --git a/.idea/Model.iml b/.idea/Model.iml index 4413bb06..b0f9c00d 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 6f308057..1122b380 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/etl/solar/tests/test_solar_photo_supply.py b/etl/solar/tests/test_solar_photo_supply.py index 79d39fc4..b9b7c09c 100644 --- a/etl/solar/tests/test_solar_photo_supply.py +++ b/etl/solar/tests/test_solar_photo_supply.py @@ -1,8 +1,92 @@ import unittest +import pandas as pd from etl.solar.SolarPhotoSupply import SolarPhotoSupply class TestSolarPhotoSupply(unittest.TestCase): + + def setUp(self): + # Mock data for photo_supply_lookup and floor_area_decile_thresholds + self.photo_supply_lookup = pd.DataFrame({ + "tenure": ["leasehold", "freehold"], + "built_form": ["detached", "semi-detached"], + "property_type": ["house", "flat"], + "construction_age_band": ["pre-1900", "1900-1929"], + "is_flat": [False, True], + "is_pitched": [True, False], + "is_roof_room": [False, True], + "floor_area_decile": [0, 1], + "photo_supply": [100, 200] + }) + + self.floor_area_decile_thresholds = pd.DataFrame({ + "floor_area_decile_thresholds": [50, 100] + }) + + self.solar_photo_supply = SolarPhotoSupply([], {}) + + def test_correct_filtering(self): + result = self.solar_photo_supply.filter_photo_supply_lookup( + self.photo_supply_lookup, + self.floor_area_decile_thresholds, + "leasehold", + "detached", + "house", + "pre-1900", + False, + True, + False, + 45 + ) + self.assertEqual(len(result), 1) + self.assertEqual(result.iloc[0]["photo_supply"], 100) + + def test_no_matches(self): + with self.assertRaises(ValueError): + self.solar_photo_supply.filter_photo_supply_lookup( + self.photo_supply_lookup, + self.floor_area_decile_thresholds, + "leasehold", + "unknown", + "house", + "pre-1900", + False, + True, + False, + 45 + ) + + def test_floor_area_decile_matching(self): + result = self.solar_photo_supply.filter_photo_supply_lookup( + self.photo_supply_lookup, + self.floor_area_decile_thresholds, + "freehold", + "semi-detached", + "flat", + "1900-1929", + True, + False, + True, + 60 + ) + self.assertEqual(len(result), 1) + self.assertEqual(result.iloc[0]["photo_supply"], 200) + + def test_invalid_parameters(self): + with self.assertRaises(AttributeError): + self.solar_photo_supply.filter_photo_supply_lookup( + self.photo_supply_lookup, + self.floor_area_decile_thresholds, + 123, # Invalid type for tenure + "detached", + "house", + "pre-1900", + False, + True, + False, + 45 + ) + def test_classify_floor_area(self): # Setup thresholds = [10, 20, 30, 40, 50] From ac556d5507e724d8085946b58fa1c976df7a347e Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 6 Jan 2024 17:59:03 +0000 Subject: [PATCH 12/26] making fixes to eligibility pipeline with updates to property class --- backend/SearchEpc.py | 7 ++++-- etl/eligibility/ha_15_32/app.py | 16 ++++++------- etl/eligibility/ha_15_32/ha7_app.py | 36 ++++++++++++++++++----------- etl/testing_data/estimate_epc.py | 6 ++++- 4 files changed, 39 insertions(+), 26 deletions(-) diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index b3f58b04..2a2cdfba 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -458,7 +458,7 @@ class SearchEpc: if not epc_data.empty: # Further processing of the EPC data - epc_data['lodgement-datetime'] = pd.to_datetime(epc_data['lodgement-datetime']) + epc_data['lodgement-datetime'] = pd.to_datetime(epc_data['lodgement-datetime'], format='mixed') epc_data = epc_data.sort_values("lodgement-datetime", ascending=False).groupby("uprn").head(1) epc_data["house_number"] = epc_data["address"].apply(lambda add1: self.get_house_number(add1)) epc_data["numeric_house_number"] = epc_data["house_number"].apply( @@ -646,7 +646,7 @@ class SearchEpc: return agg[key].values[0] - def find_property(self): + def find_property(self, skip_os=False): """ This method will attempt to identify a property. It will, at first, use the EPC api to try and find the EPC for the property and the associated UPRN. If this fails, it will use the Ordnance Survey API to @@ -669,6 +669,9 @@ class SearchEpc: return # Step 2: If we don't have an EPC, we use the ordnance survey api to find the uprn + if skip_os: + return + os_response = self.ordnance_survey_client.get_places_api() if os_response["status"] != 200: diff --git a/etl/eligibility/ha_15_32/app.py b/etl/eligibility/ha_15_32/app.py index 48bfeb2c..9a563770 100644 --- a/etl/eligibility/ha_15_32/app.py +++ b/etl/eligibility/ha_15_32/app.py @@ -11,13 +11,12 @@ import numpy as np import msgpack from datetime import datetime, timedelta from utils.logger import setup_logger -from utils.s3 import read_from_s3 +from utils.s3 import read_from_s3, read_dataframe_from_s3_parquet from dotenv import load_dotenv from backend.SearchEpc import SearchEpc from backend.Property import Property from etl.eligibility.Eligibility import Eligibility from etl.epc.DataProcessor import DataProcessor -from backend.app.utils import read_parquet_from_s3 from backend.app.plan.utils import create_recommendation_scoring_data from etl.epc.settings import COLUMNS_TO_MERGE_ON from backend.ml_models.api import ModelApi @@ -348,14 +347,13 @@ def prepare_model_data_row( p = Property( id=property_id, postcode=modelling_epc["postcode"], - address1=modelling_epc["address1"], - epc_client=None, - data=modelling_epc + address=modelling_epc["address1"], + data=modelling_epc, + old_data=old_data, + full_sap_epc=full_sap_epc ) - p.old_data = old_data - p.full_sap_epc = full_sap_epc - p.get_components(cleaned) + p.get_components(cleaned, None, None) # This is temp - this should happen after scoring cleaned_property_data = DataProcessor.apply_averages_cleaning( data_to_clean=pd.DataFrame([dict(**p.get_model_data(), LOCAL_AUTHORITY=p.data["local-authority"])]), @@ -1087,7 +1085,7 @@ def app(): ) cleaned = msgpack.unpackb(cleaned, raw=False) - cleaning_data = read_parquet_from_s3( + cleaning_data = read_dataframe_from_s3_parquet( bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet", ) diff --git a/etl/eligibility/ha_15_32/ha7_app.py b/etl/eligibility/ha_15_32/ha7_app.py index 7d856366..0152ab91 100644 --- a/etl/eligibility/ha_15_32/ha7_app.py +++ b/etl/eligibility/ha_15_32/ha7_app.py @@ -1,3 +1,4 @@ +import os import msgpack import openpyxl from openpyxl.styles.colors import COLOR_INDEX @@ -5,10 +6,9 @@ from pathlib import Path from datetime import datetime import pandas as pd import numpy as np -from utils.s3 import read_from_s3 +from utils.s3 import read_from_s3, read_dataframe_from_s3_parquet from utils.logger import setup_logger from dotenv import load_dotenv -from backend.app.utils import read_parquet_from_s3 from tqdm import tqdm from backend.SearchEpc import SearchEpc from etl.eligibility.Eligibility import Eligibility @@ -17,13 +17,14 @@ from etl.epc.DataProcessor import DataProcessor from etl.epc.settings import COLUMNS_TO_MERGE_ON from backend.ml_models.api import ModelApi -import re - ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env" logger = setup_logger() load_dotenv(ENV_FILE) +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") +OS_API_KEY = os.getenv("ORDNANCE_SURVEY_API_KEY") + def load_data(): """ @@ -79,20 +80,27 @@ def get_ha7_data(data, cleaned, cleaning_data, created_at): nodata = [] for _, house in tqdm(data.iterrows(), total=len(data)): + if house["Address"] is not None: + address = house["Address"] + else: + address = house["Address2"] + searcher = SearchEpc( - address1=house["Address"], - postcode=house["Postcode"] + address1=address, + postcode=house["Postcode"], + auth_token=EPC_AUTH_TOKEN, + os_api_key=None ) - response = searcher.search() - if response["status"] == 204: - nodata.append(house) + searcher.find_property(skip_os=True) + + if searcher.newest_epc is None: + nodata.append(house["row_id"]) continue - newest_epc, older_epcs, full_sap_epc = searcher.retrieve( - property_type=property_type_lookup.get(house["Property Type"], None), - address=house["Address"], - ) + newest_epc = searcher.newest_epc + older_epcs = searcher.older_epcs + full_sap_epc = searcher.full_sap_epc eligibility = Eligibility(epc=newest_epc, cleaned=cleaned) eligibility.check_gbis_warmfront() @@ -273,7 +281,7 @@ def app(): ) cleaned = msgpack.unpackb(cleaned, raw=False) - cleaning_data = read_parquet_from_s3( + cleaning_data = read_dataframe_from_s3_parquet( bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet", ) diff --git a/etl/testing_data/estimate_epc.py b/etl/testing_data/estimate_epc.py index 9e460678..cd91a540 100644 --- a/etl/testing_data/estimate_epc.py +++ b/etl/testing_data/estimate_epc.py @@ -73,7 +73,9 @@ def app(): df["UPRN"] = df["UPRN"].astype("Int64").astype("str") df = df[~pd.isnull(df["UPRN"])] - uprn_sample = sample(df["UPRN"].unique().tolist(), DIR_SAMPLE_SIZE) + # uprn_sample = sample(df["UPRN"].unique().tolist(), DIR_SAMPLE_SIZE) + # Take a fixed sample based on the first DIR_SAMPLE_SIZE uprns + uprn_sample = sorted(df["UPRN"].unique().tolist())[:DIR_SAMPLE_SIZE] df_sample = df[df["UPRN"].isin(uprn_sample)] # Take the record with the newest LODGEMENT_DATETIME by uprn df_sample = df_sample.sort_values("LODGEMENT_DATETIME", ascending=False).drop_duplicates("UPRN") @@ -149,6 +151,8 @@ def app(): # 0.7859617377809409 # 0.5348837209302325 + # Fixed sample, sqrt weights + # Group by tenure by_tenure = results_df.groupby("tenure").agg( {"numeric_success": "median", "categorical_success": "median", "uprn": "count"} From 756dca12badcaec56ece5454aa699878fa9f5ab0 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 6 Jan 2024 18:20:54 +0000 Subject: [PATCH 13/26] updated property unit tests --- backend/tests/test_property.py | 157 ++++++++++++++++++--------------- 1 file changed, 88 insertions(+), 69 deletions(-) diff --git a/backend/tests/test_property.py b/backend/tests/test_property.py index 0113d690..09594a40 100644 --- a/backend/tests/test_property.py +++ b/backend/tests/test_property.py @@ -9,6 +9,7 @@ from etl.epc_clean.EpcClean import EpcClean mock_epc_response = { "rows": [ { + "tenure": "rental (social)", "lmk-key": 1, "uprn": 1, "number-habitable-rooms": 5, @@ -17,7 +18,7 @@ mock_epc_response = { "inspection-date": "2023-06-01", 'lodgement-datetime': '2023-06-01 20:29:01', "some-other-key": "some-value", - "roof-description": "Roof Description", + "roof-description": "pitched, no insulation", "walls-description": "Walls Description", "windows-description": "Windows Description", "mainheat-description": "Main Heating Description", @@ -168,29 +169,54 @@ mock_epc_response_dupe = { class TestProperty: + @pytest.fixture(autouse=True) - def property_instance(self, mock_epc_client, mock_cleaner): - property_instance = Property(1, "AB12CD", "Test Address", epc_client=mock_epc_client) + def mock_photo_supply_lookup(self): + return pd.DataFrame( + [ + dict( + tenure="rental (social)", + built_form="Detached", + property_type="House", + construction_age_band="England and Wales: 1967-1975", + is_flat=False, + is_pitched=True, + is_roof_room=False, + floor_area_decile=2, + photo_supply_median=40 + ) + ] + ) + + @pytest.fixture(autouse=True) + def mock_floor_area_decile_thresholds(self): + return pd.DataFrame( + {"floor_area_decile_thresholds": [0, 10, 30, 50]} + ) + + @pytest.fixture(autouse=True) + def property_instance(self, mock_cleaner): + property_instance = Property(id=1, postcode="AB12CD", address="Test Address", data=mock_epc_response["rows"][0]) return property_instance @pytest.fixture(autouse=True) - def property_instance_dupe_data(self, mock_epc_client_dupe_data): - property_instance_dupe_data = Property(2, "AB12CD", "Test Address", epc_client=mock_epc_client_dupe_data) + def property_instance_dupe_data(self): + property_instance_dupe_data = Property(id=2, postcode="AB12CD", address="Test Address") return property_instance_dupe_data - @pytest.fixture - def mock_epc_client(self): - mock_epc_client = Mock(spec=EpcClient(auth_token="mocked_auth_token")) - mock_epc_client.domestic.search.return_value = mock_epc_response.copy() - mock_epc_client.auth_token = "mocked_auth_token" - return mock_epc_client - - @pytest.fixture - def mock_epc_client_dupe_data(self): - mock_epc_client_dupe_data = Mock(spec=EpcClient(auth_token="mocked_auth_token")) - mock_epc_client_dupe_data.domestic.search.return_value = mock_epc_response_dupe.copy() - mock_epc_client_dupe_data.auth_token = "mocked_auth_token" - return mock_epc_client_dupe_data + # @pytest.fixture + # def mock_epc_client(self): + # mock_epc_client = Mock(spec=EpcClient(auth_token="mocked_auth_token")) + # mock_epc_client.domestic.search.return_value = mock_epc_response.copy() + # mock_epc_client.auth_token = "mocked_auth_token" + # return mock_epc_client + # + # @pytest.fixture + # def mock_epc_client_dupe_data(self): + # mock_epc_client_dupe_data = Mock(spec=EpcClient(auth_token="mocked_auth_token")) + # mock_epc_client_dupe_data.domestic.search.return_value = mock_epc_response_dupe.copy() + # mock_epc_client_dupe_data.auth_token = "mocked_auth_token" + # return mock_epc_client_dupe_data @pytest.fixture def mock_cleaner(self): @@ -229,7 +255,11 @@ class TestProperty: } mock_cleaner.cleaned = { - "roof-description": [{"original_description": "Roof Description"}], + "roof-description": [ + {"original_description": "Roof Description"}, + {"original_description": "pitched, no insulation", "is_pitched": True, "is_flat": False, + "is_roof_room": False} + ], "walls-description": [walls_data], "windows-description": [{"original_description": "Windows Description"}], "mainheat-description": [{"original_description": "Main Heating Description"}], @@ -240,37 +270,32 @@ class TestProperty: } return mock_cleaner - def test_init(self, mock_epc_client): - inst1 = Property(0, "AB12CD", "Test Address", epc_client=mock_epc_client) - # Should be mocked auth token - assert inst1.epc_client.auth_token == "mocked_auth_token" + def test_init(self): + inst1 = Property(0, postcode="AB12CD", address="Test Address") - inst2 = Property(3, "AB12CD", "Test Address", epc_client=mock_epc_client) - assert inst2.epc_client.auth_token + assert inst1.data is None - inst3 = Property(4, "AB12CD", "Test Address", data={"some": "data"}, epc_client=mock_epc_client) - assert inst3.data == {"some": "data"} + inst2 = Property(3, "AB12CD", "Test Address") + assert inst2.id == 3 - data = inst3.search_address_epc() - assert data is None + inst3 = Property(4, "AB12CD", "Test Address", data={"some": "data", "uprn": 123}) + assert inst3.data == {"some": "data", "uprn": 123} - def test_search_address_epc(self, property_instance): - # Call the method to test - property_instance.search_address_epc() - - # Verify that the correct data is being returned - assert property_instance.data == mock_epc_response["rows"][0] - - def test_search_address_epc_multiple_results(self, property_instance_dupe_data, mock_epc_client_dupe_data): - with pytest.raises(Exception, match="More than one result found for this address - investigate me"): - property_instance_dupe_data.search_address_epc() - - def test_get_components(self, property_instance, mock_cleaner, mock_epc_client): - property_instance.search_address_epc() - property_instance.get_components(mock_cleaner.cleaned) + def test_get_components( + self, property_instance, mock_cleaner, mock_photo_supply_lookup, mock_floor_area_decile_thresholds + ): + property_instance.get_components( + mock_cleaner.cleaned, + photo_supply_lookup=mock_photo_supply_lookup, + floor_area_decile_thresholds=mock_floor_area_decile_thresholds + ) # Verify that the components are set correctly - assert property_instance.roof == {"original_description": "Roof Description"} + assert property_instance.roof == { + 'original_description': 'pitched, no insulation', 'is_pitched': True, + 'is_flat': False, 'is_roof_room': False + } + assert property_instance.walls == { "original_description": "Walls Description", "is_cavity_wall": True, @@ -294,24 +319,15 @@ class TestProperty: # Verify that ValueError is raised when EpcClean doesn't contain cleaned data with pytest.raises(ValueError, match="Cleaner does not contain cleaned data"): - property_instance.get_components(mock_cleaner.cleaned) + property_instance.get_components(mock_cleaner.cleaned, pd.DataFrame(), pd.DataFrame()) - def test_get_components_no_data(self, property_instance, mock_cleaner): + def test_get_components_no_attributes( + self, property_instance, mock_cleaner, mock_photo_supply_lookup, mock_floor_area_decile_thresholds + ): # Modify the mock cleaner to have no attributes for a specific description mock_cleaner.cleaned = { "roof-description": [] } - - # Verify that ValueError is raised when no attributes are found - with pytest.raises(ValueError, match="Property does not contain data"): - property_instance.get_components(mock_cleaner.cleaned) - - def test_get_components_no_attributes(self, property_instance, mock_cleaner): - # Modify the mock cleaner to have no attributes for a specific description - mock_cleaner.cleaned = { - "roof-description": [] - } - property_instance.search_address_epc() property_instance.data["roof-description"] = "Pitched, no insulation" property_instance.walls = { "original_description": "Walls Description", @@ -332,14 +348,17 @@ class TestProperty: } # Assert backup cleaning has been applied - property_instance.get_components(mock_cleaner.cleaned) + property_instance.get_components( + mock_cleaner.cleaned, mock_photo_supply_lookup, mock_floor_area_decile_thresholds + ) assert property_instance.roof["clean_description"] == "Pitched, no insulation" assert property_instance.roof["is_pitched"] - def test_get_components_multiple_attributes(self, property_instance, mock_cleaner): + def test_get_components_multiple_attributes( + self, property_instance, mock_cleaner, mock_photo_supply_lookup, mock_floor_area_decile_thresholds + ): # This shouldn't happen - it would mean a cleaning error - property_instance.search_address_epc() property_instance.data["roof-description"] = "Roof Description" cleaned = { "roof-description": [ @@ -350,10 +369,10 @@ class TestProperty: # Verify that ValueError is raised when multiple attributes are found with pytest.raises(ValueError, match="Either No attributes or multiple found for roof-description"): - property_instance.get_components(cleaned) + property_instance.get_components(cleaned, mock_photo_supply_lookup, mock_floor_area_decile_thresholds) - def test_set_spatial(self, mock_epc_client): - prop = Property(1, "AB12CD", "Test Address", mock_epc_client) + def test_set_spatial(self): + prop = Property(1, postcode="AB12CD", address="Test Address") spatial1 = pd.DataFrame([{ 'X_COORDINATE': 411143.0, 'Y_COORDINATE': 281701.0, 'LATITUDE': 52.4331896, 'LONGITUDE': -1.8375238, @@ -367,7 +386,7 @@ class TestProperty: assert prop.is_heritage assert prop.restricted_measures - prop2 = Property(1, "AB12CD", "Test Address", mock_epc_client) + prop2 = Property(1, "AB12CD", "Test Address") spatial2 = pd.DataFrame([{ 'X_COORDINATE': 411143.0, 'Y_COORDINATE': 281701.0, 'LATITUDE': 52.4331896, 'LONGITUDE': -1.8375238, @@ -381,10 +400,10 @@ class TestProperty: assert not prop2.is_heritage assert not prop2.restricted_measures - def test_set_floor_level(self, mock_epc_client): + def test_set_floor_level(self): # In this case, we have a flat which looks looks it's on the first floor, but it's actually on the ground # floor, so we should set floor_level to 0 - prop = Property(1, "AB12CD", "Test Address", mock_epc_client) + prop = Property(1, postcode="AB12CD", address="Test Address") prop.data = {'floor-level': '01', 'property-type': 'Flat'} prop.floor = { 'original_description': 'Solid, no insulation (assumed)', 'clean_description': 'Solid, no insulation', @@ -400,7 +419,7 @@ class TestProperty: # This property is labelled as being on the ground floor but actually has another property below # so we set floor level to 1 - prop2 = Property(1, "AB12CD", "Test Address", mock_epc_client) + prop2 = Property(1, postcode="AB12CD", address="Test Address") prop2.data = {'floor-level': 'Ground', 'property-type': 'Flat'} prop2.floor = { 'original_description': '(Another dwelling below)', 'clean_description': 'Solid, no insulation', @@ -415,7 +434,7 @@ class TestProperty: assert prop2.floor_level == 1 # this property is correctly labelled as being on the 2nd floor - prop3 = Property(1, "AB12CD", "Test Address", mock_epc_client) + prop3 = Property(1, postcode="AB12CD", address="Test Address") prop3.data = {'floor-level': '02', 'property-type': 'Flat'} prop3.floor = { 'original_description': '(Another dwelling below)', 'clean_description': 'Solid, no insulation', @@ -430,7 +449,7 @@ class TestProperty: assert prop3.floor_level == 2 # Example of a house - prop4 = Property(1, "AB12CD", "Test Address", mock_epc_client) + prop4 = Property(1, postcode="AB12CD", address="Test Address") prop4.data = {'floor-level': '', 'property-type': 'House'} prop4.floor = { 'original_description': '(Another dwelling below)', 'clean_description': 'Solid, no insulation', From 6335be531f1f3c4bab85cd3f739ac7b482aad6c5 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 6 Jan 2024 18:22:37 +0000 Subject: [PATCH 14/26] partially fixed test_sap_model_prep --- backend/tests/test_sap_model_prep.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/backend/tests/test_sap_model_prep.py b/backend/tests/test_sap_model_prep.py index f20e4993..c1ff514e 100644 --- a/backend/tests/test_sap_model_prep.py +++ b/backend/tests/test_sap_model_prep.py @@ -2,7 +2,6 @@ from backend.Property import Property from etl.epc.DataProcessor import DataProcessor from backend.app.plan.utils import create_recommendation_scoring_data, get_cleaned from etl.epc.settings import COLUMNS_TO_MERGE_ON -from epc_api.client import EpcClient import pandas as pd import pytest import msgpack @@ -288,8 +287,7 @@ class TestSapModelPrep: home = Property( id=0, postcode=starting_epc["postcode"], - address1=starting_epc["address1"], - epc_client=EpcClient(auth_token="notoken"), + address=starting_epc["address1"], data=starting_epc ) home.get_components(cleaned) @@ -508,8 +506,7 @@ class TestSapModelPrep: home2 = Property( id=0, postcode=starting_epc2["postcode"], - address1=starting_epc2["address1"], - epc_client=EpcClient(auth_token="notoken"), + address=starting_epc2["address1"], data=starting_epc2 ) home2.get_components(cleaned) @@ -728,8 +725,7 @@ class TestSapModelPrep: home3 = Property( id=0, postcode=starting_epc3["postcode"], - address1=starting_epc3["address1"], - epc_client=EpcClient(auth_token="notoken"), + address=starting_epc3["address1"], data=starting_epc3 ) home3.get_components(cleaned) @@ -937,8 +933,7 @@ class TestSapModelPrep: home4 = Property( id=0, postcode=starting_epc4["postcode"], - address1=starting_epc4["address1"], - epc_client=EpcClient(auth_token="notoken"), + address=starting_epc4["address1"], data=starting_epc4 ) home4.get_components(cleaned) From 3eead1928fc54392a5fcd1dc24ba4240e6730f27 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 6 Jan 2024 18:24:01 +0000 Subject: [PATCH 15/26] fixed unit tests for fireplace recs --- recommendations/tests/test_fireplace_recommendations.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/recommendations/tests/test_fireplace_recommendations.py b/recommendations/tests/test_fireplace_recommendations.py index 570fbb5c..a91d6697 100644 --- a/recommendations/tests/test_fireplace_recommendations.py +++ b/recommendations/tests/test_fireplace_recommendations.py @@ -6,7 +6,7 @@ from recommendations.FireplaceRecommendations import FireplaceRecommendations class TestFirepaceRecommendations: def test_no_fireplaces(self): - property_instance = Property(id=0, address1="fake", postcode="fake", epc_client=Mock()) + property_instance = Property(id=0, address="fake", postcode="fake") property_instance.data = { "number-open-fireplaces": 0 } @@ -22,7 +22,7 @@ class TestFirepaceRecommendations: assert recommender.recommendation is None def test_one_fireplace(self): - property_instance = Property(id=0, address1="fake", postcode="fake", epc_client=Mock()) + property_instance = Property(id=0, address="fake", postcode="fake") property_instance.data = { "number-open-fireplaces": 1 } @@ -40,7 +40,7 @@ class TestFirepaceRecommendations: assert recommender.recommendation[0]["total"] == 300 def test_multiple_fireplaces(self): - property_instance = Property(id=0, address1="fake", postcode="fake", epc_client=Mock()) + property_instance = Property(id=0, address="fake", postcode="fake") property_instance.data = { "number-open-fireplaces": 3 } From dc6369fa6d04fc696fbc5a22226bf6f3d9d948fd Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 6 Jan 2024 18:26:16 +0000 Subject: [PATCH 16/26] removed unnecessary code from floor recs --- recommendations/tests/test_floor_recommendations.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/recommendations/tests/test_floor_recommendations.py b/recommendations/tests/test_floor_recommendations.py index 700d33d3..555f9a27 100644 --- a/recommendations/tests/test_floor_recommendations.py +++ b/recommendations/tests/test_floor_recommendations.py @@ -21,16 +21,6 @@ class TestFloorRecommendations: ) as f: return pickle.load(f) - @pytest.fixture - def mock_floor_rec_instance(self): - # Creating a mock instance of WallRecommendations with the necessary attributes - property_mock = Mock() - property_mock.full_sap_epc = {"lodgement-date": "2000-01-01"} - property_mock.data = {"county": "York"} - - mock_wall_rec_instance = FloorRecommendations(property_mock, materials) - return mock_wall_rec_instance - def test_init(self, input_properties): input_properties[0].insulation_floor_area = 50 input_properties[0].insulation_wall_area = 90 From d7e8cf889dad6e3c9df415672780d2bf660dbb21 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 6 Jan 2024 18:27:12 +0000 Subject: [PATCH 17/26] fixing lighting recommendations tests --- recommendations/tests/test_lighting_recommendations.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/recommendations/tests/test_lighting_recommendations.py b/recommendations/tests/test_lighting_recommendations.py index 5a4545eb..964f1da0 100644 --- a/recommendations/tests/test_lighting_recommendations.py +++ b/recommendations/tests/test_lighting_recommendations.py @@ -9,7 +9,7 @@ from recommendations.tests.test_data.materials import materials class TestLightingRecommendations: def test_init_invalid_materials(self): - input_property0 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock()) + input_property0 = Property(id=1, postcode="F4k3 6", address="623 fake street") input_property0.lighting = {"low_energy_proportion": 0} input_property0.data = {"county": "Greater London Authority"} # Test for invalid materials @@ -18,7 +18,7 @@ class TestLightingRecommendations: def test_recommend_no_action_needed(self): # Case where no recommendation is needed - input_property1 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock()) + input_property1 = Property(id=1, postcode="F4k3 6", address="623 fake street") input_property1.lighting = {"low_energy_proportion": 100} input_property1.data = {"county": "Greater London Authority"} @@ -28,7 +28,7 @@ class TestLightingRecommendations: def test_recommend_action_needed(self): # Case where recommendation is needed - input_property1 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock()) + input_property1 = Property(id=1, postcode="F4k3 6", address="623 fake street") input_property1.lighting = {"low_energy_proportion": 100} input_property1.data = {"county": "Greater London Authority"} input_property1.lighting = {"low_energy_proportion": 0.80} From b4d4c2128b5469474c93d9126848f27c57a219ed Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 6 Jan 2024 18:29:10 +0000 Subject: [PATCH 18/26] fix roof recs --- .../tests/test_roof_recommendations.py | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/recommendations/tests/test_roof_recommendations.py b/recommendations/tests/test_roof_recommendations.py index 903f598b..c1a7dfd9 100644 --- a/recommendations/tests/test_roof_recommendations.py +++ b/recommendations/tests/test_roof_recommendations.py @@ -1,5 +1,4 @@ from backend.Property import Property -from unittest.mock import Mock from recommendations.RoofRecommendations import RoofRecommendations from recommendations.tests.test_data.materials import materials @@ -7,7 +6,7 @@ from recommendations.tests.test_data.materials import materials class TestRoofRecommendations: def test_loft_insulation_recommendation_no_insulation(self): - property_instance = Property(id=0, address1="fake", postcode="fake", epc_client=Mock()) + property_instance = Property(id=0, address="fake", postcode="fake") property_instance.age_band = "F" property_instance.insulation_floor_area = 100 property_instance.roof = { @@ -32,7 +31,7 @@ class TestRoofRecommendations: assert len(roof_recommender.recommendations) def test_loft_insulation_recommendation_50mm_insulation(self): - property_instance2 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock()) + property_instance2 = Property(id=0, address="fake", postcode="fake") property_instance2.age_band = "F" property_instance2.insulation_floor_area = 100 property_instance2.roof = { @@ -58,7 +57,7 @@ class TestRoofRecommendations: assert roof_recommender2.recommendations[0]["new_u_value"] == 0.14 assert roof_recommender2.recommendations[0]["starting_u_value"] == 0.68 - property_instance3 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock()) + property_instance3 = Property(id=0, address="fake", postcode="fake") property_instance3.age_band = "F" property_instance3.insulation_floor_area = 100 property_instance3.roof = { @@ -83,7 +82,7 @@ class TestRoofRecommendations: assert roof_recommender3.recommendations[0]["parts"][0]["depth"] == 270 def test_loft_insulation_recommendation_150mm_insulation(self): - property_instance4 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock()) + property_instance4 = Property(id=0, address="fake", postcode="fake") property_instance4.age_band = "F" property_instance4.insulation_floor_area = 100 property_instance4.roof = { @@ -110,7 +109,7 @@ class TestRoofRecommendations: assert roof_recommender4.recommendations[0]["starting_u_value"] == 0.3 assert roof_recommender4.recommendations[0]["parts"][0]["depth"] == 150 - property_instance5 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock()) + property_instance5 = Property(id=0, address="fake", postcode="fake") property_instance5.age_band = "F" property_instance5.insulation_floor_area = 100 property_instance5.roof = { @@ -137,7 +136,7 @@ class TestRoofRecommendations: def test_loft_insulation_recommendation_270mm_insulation(self): # We shouldn't recommend anything in this case - property_instance6 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock()) + property_instance6 = Property(id=0, address="fake", postcode="fake") property_instance6.age_band = "F" property_instance6.insulation_floor_area = 100 property_instance6.roof = { @@ -278,7 +277,7 @@ class TestRoofRecommendations: # "Insulate your room roof with 270mm of Example room roof insulation" def test_flat_no_insulation(self): - property_instance11 = Property(id=11, address1="fake", postcode="fake", epc_client=Mock()) + property_instance11 = Property(id=11, address="fake", postcode="fake") property_instance11.age_band = "D" property_instance11.insulation_floor_area = 33.5 property_instance11.perimeter = 24 @@ -307,7 +306,7 @@ class TestRoofRecommendations: "Insulate the home's flat roof with 150mm of Ecotherm Eco-Versal General Purpose Insulation Board" def test_flat_insulated(self): - property_instance12 = Property(id=12, address1="fake", postcode="fake", epc_client=Mock()) + property_instance12 = Property(id=12, address="fake", postcode="fake") property_instance12.age_band = "D" property_instance12.insulation_floor_area = 40 property_instance12.perimeter = 30 @@ -331,7 +330,7 @@ class TestRoofRecommendations: assert not roof_recommender12.recommendations def test_flat_limited_insulation(self): - property_instance13 = Property(id=12, address1="fake", postcode="fake", epc_client=Mock()) + property_instance13 = Property(id=12, address="fake", postcode="fake") property_instance13.age_band = "D" property_instance13.insulation_floor_area = 40 property_instance13.perimeter = 40 @@ -363,7 +362,7 @@ class TestRoofRecommendations: "Insulate the home's flat roof with 150mm of Ecotherm Eco-Versal General Purpose Insulation Board" def test_property_above(self): - property_instance14 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock()) + property_instance14 = Property(id=0, address="fake", postcode="fake") property_instance14.age_band = "F" property_instance14.insulation_floor_area = 100 property_instance14.roof = { From f78078384b238fcd618c94cfcde49f96631c50a8 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 9 Jan 2024 10:36:30 +0000 Subject: [PATCH 19/26] working on wft sales analysis --- backend/Property.py | 20 ++-- backend/SearchEpc.py | 37 +++++++- etl/eligibility/Eligibility.py | 10 +- etl/eligibility/ha_15_32/app.py | 8 +- etl/eligibility/ha_15_32/ha16_app.py | 94 +++++++++++++++---- etl/eligibility/ha_15_32/ha24_app.py | 93 +++++++++++++----- etl/eligibility/ha_15_32/ha7_app.py | 30 ++++-- etl/solar/SolarPhotoSupply.py | 15 ++- .../tests/test_ventilation_recommendations.py | 11 +-- .../tests/test_wall_recommendations.py | 14 +-- .../tests/test_window_recommendations.py | 28 ++---- 11 files changed, 261 insertions(+), 99 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index 3dbcc2b8..5713c179 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -150,7 +150,7 @@ class Property(Definitions): """ solar_pv = self.data["photo-supply"] - if solar_pv == "": + if solar_pv in ["", None]: solar_pv = None else: solar_pv = float(solar_pv) @@ -170,6 +170,7 @@ class Property(Definitions): "Y": True, "N": False, "": None, + None: None, } self.solar_hot_water = { @@ -245,8 +246,8 @@ class Property(Definitions): # it self.data["built-form"] = BUILT_FORM_REMAP.get(self.data["built-form"], self.data["built-form"]) if self.data["built-form"] in self.DATA_ANOMALY_MATCHES: - if self.data["property-type"] == "Flat": - self.data["built-form"] = "Semi-Detached" + if self.data["property-type"] in ["Flat", "Maisonette"]: + self.data["built-form"] = "End-Terrace" self.set_year_built() self.set_energy() @@ -394,7 +395,8 @@ class Property(Definitions): map = { "no corridor": False, "unheated corridor": True, - "heated corridor": False + "heated corridor": False, + None: False } if self.data["heat-loss-corridor"] in self.DATA_ANOMALY_MATCHES: @@ -403,7 +405,7 @@ class Property(Definitions): has_heat_loss_corridor = map[self.data["heat-loss-corridor"]] length = self.data["unheated-corridor-length"] - if length == "": + if length in ["", None]: length = None else: length = float(length) @@ -579,7 +581,7 @@ class Property(Definitions): self.floor_area = float(self.data["total-floor-area"]) if not self.data["number-habitable-rooms"] or ( - self.data["floor-height"] == "" or self.data["floor-height"] in self.DATA_ANOMALY_MATCHES + self.data["floor-height"] in ["", None] or self.data["floor-height"] in self.DATA_ANOMALY_MATCHES ): if self.property_dimensions is None: property_dimensions = read_dataframe_from_s3_parquet( @@ -601,7 +603,7 @@ class Property(Definitions): else: raise NotImplementedError("Implement me") - if self.data["floor-height"] == "" or self.data["floor-height"] in self.DATA_ANOMALY_MATCHES: + if self.data["floor-height"] in [None, ""] or self.data["floor-height"] in self.DATA_ANOMALY_MATCHES: self.floor_height = float(self.property_dimensions["FLOOR_HEIGHT"].round(2)) else: self.floor_height = float(self.data["floor-height"]) @@ -626,7 +628,7 @@ class Property(Definitions): def set_floor_level(self): self.floor_level = ( FLOOR_LEVEL_MAP[self.data["floor-level"]] if - self.data["floor-level"] not in self.DATA_ANOMALY_MATCHES else None + self.data["floor-level"] not in list(self.DATA_ANOMALY_MATCHES) + [None] else None ) if self.floor_level is None: @@ -794,7 +796,7 @@ class Property(Definitions): :return: """ - if self.data["fixed-lighting-outlets-count"] == "": + if self.data["fixed-lighting-outlets-count"] in [None, ""]: # We check old EPCs and the full SAP EPC diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 2a2cdfba..d69d8d86 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -146,6 +146,7 @@ class SearchEpc: max_retries: int = None, uprn: [int, None] = None, size=None, + property_type=None, ): """ Address lines 1 and postcode are mandatory fields. The other address lines are optional @@ -157,6 +158,7 @@ class SearchEpc: :param uprn: int, optional, the uprn of the property :param size: int, optional, the number of results to return. If not provided, defaults to 25 which is the api's default + :param property_type: str, optional, the property type of the property, if known before hand """ self.address1 = address1 @@ -184,6 +186,8 @@ class SearchEpc: self.size = size if size is not None else 25 + self.property_type = property_type + @classmethod def get_house_number(cls, address: str) -> str | None: """ @@ -335,7 +339,7 @@ class SearchEpc: return address, postcode - def extract_epc_data(self, property_type=None, address=None): + def extract_epc_data(self, address=None): """ Given a successful search, this method will format the data and return it @@ -351,7 +355,7 @@ class SearchEpc: # Firstly, we should only have 1 urpn so if we have multiple, we'll need to filter down the # property further - rows = self.filter_rows(rows, property_type=property_type, address=None) + rows = self.filter_rows(rows, property_type=self.property_type, address=None) rows = self.filter_rows(rows, property_type=None, address=address) # We now check for a full sap epc: @@ -366,9 +370,19 @@ class SearchEpc: # Ge the uprn from the newest record for this home uprns = {r["uprn"] for r in rows if r["uprn"]} - if len(uprns) != 1: - raise ValueError("Multiple UPRNs found - investigate me") - uprn = uprns.pop() + # We can sometimes have no uprn for a property + if (len(uprns) == 0) and len(rows) > 0: + logger.warning("Found data but missing uprn") + elif len(uprns) != 1: + # There is a possibility that we have multiple UPRNs for a single property, which is an error + addresses = {r["address"] for r in rows} + if len(addresses) == 1: + # Take the uprn from the most recent + uprns = {newest_epc["uprn"]} + else: + raise ValueError("Multiple UPRNs found - investigate me") + + uprn = uprns.pop() if uprns else None return newest_epc, older_epcs, full_sap_epc, address_epc, postcode_epc, uprn @@ -670,6 +684,19 @@ class SearchEpc: # Step 2: If we don't have an EPC, we use the ordnance survey api to find the uprn if skip_os: + if self.ordnance_survey_client.property_type is not None: + # We can try and estimate + estimated_epc = self.estimate_epc( + property_type=self.ordnance_survey_client.property_type, + built_form=self.ordnance_survey_client.built_form + ) + self.newest_epc = estimated_epc + self.older_epcs = [] + self.full_sap_epc = {} + + # Finally, set a standardised address 1 and postcode + self.address_clean = self.ordnance_survey_client.address_os + self.postcode_clean = self.ordnance_survey_client.postcode_os return os_response = self.ordnance_survey_client.get_places_api() diff --git a/etl/eligibility/Eligibility.py b/etl/eligibility/Eligibility.py index 364be3cc..c9d75606 100644 --- a/etl/eligibility/Eligibility.py +++ b/etl/eligibility/Eligibility.py @@ -333,7 +333,8 @@ class Eligibility: """ current_sap = int(self.epc["current-energy-efficiency"]) - if current_sap > 54: + + if current_sap >= 69: self.eco4_warmfront = { "eligible": False, "message": "sap too high" @@ -347,7 +348,12 @@ class Eligibility: is_eligible = self.cavity["suitability"] & self.loft["suitability"] if post_retrofit_sap is None: - message = "subject to post retrofit sap" if is_eligible else "not eligible" + + if current_sap >= 55: + message = "Possibly eligible but property currently EPC D" + else: + message = "subject to post retrofit sap" if is_eligible else "not eligible" + self.eco4_warmfront = { "eligible": is_eligible, "message": message diff --git a/etl/eligibility/ha_15_32/app.py b/etl/eligibility/ha_15_32/app.py index 9a563770..3a0caec6 100644 --- a/etl/eligibility/ha_15_32/app.py +++ b/etl/eligibility/ha_15_32/app.py @@ -246,6 +246,8 @@ def merge_ha_15(asset_list, identified_addresses): identified_addresses = identified_addresses.drop_duplicates("merge_key") + # We pull out raw counts for the survey lists + # Check asset list for dupes asset_list_dupes = asset_list["merge_key"].duplicated() if asset_list_dupes.sum(): @@ -336,7 +338,8 @@ def merge_ha_15(asset_list, identified_addresses): def prepare_model_data_row( - property_id, modelling_epc, cleaned, cleaning_data, created_at, old_data=None, full_sap_epc=None + property_id, modelling_epc, cleaned, cleaning_data, created_at, + photo_supply_lookup, floor_area_decile_thresholds, old_data=None, full_sap_epc=None, ): """ This function prepares the data for modelling, in the same fashion as the recommendation engine @@ -353,7 +356,8 @@ def prepare_model_data_row( full_sap_epc=full_sap_epc ) - p.get_components(cleaned, None, None) + p.get_components(cleaned, photo_supply_lookup=photo_supply_lookup, + floor_area_decile_thresholds=floor_area_decile_thresholds) # This is temp - this should happen after scoring cleaned_property_data = DataProcessor.apply_averages_cleaning( data_to_clean=pd.DataFrame([dict(**p.get_model_data(), LOCAL_AUTHORITY=p.data["local-authority"])]), diff --git a/etl/eligibility/ha_15_32/ha16_app.py b/etl/eligibility/ha_15_32/ha16_app.py index 7c1db158..bd19fe97 100644 --- a/etl/eligibility/ha_15_32/ha16_app.py +++ b/etl/eligibility/ha_15_32/ha16_app.py @@ -1,6 +1,6 @@ +import os import msgpack import openpyxl -from openpyxl.styles.colors import COLOR_INDEX from pathlib import Path from datetime import datetime import pandas as pd @@ -8,7 +8,7 @@ import numpy as np from utils.s3 import read_from_s3 from utils.logger import setup_logger from dotenv import load_dotenv -from backend.app.utils import read_parquet_from_s3 +from utils.s3 import read_dataframe_from_s3_parquet from tqdm import tqdm from backend.SearchEpc import SearchEpc from etl.eligibility.Eligibility import Eligibility @@ -16,10 +16,12 @@ from etl.eligibility.ha_15_32.app import prepare_model_data_row from etl.epc.DataProcessor import DataProcessor from etl.epc.settings import COLUMNS_TO_MERGE_ON from backend.ml_models.api import ModelApi +from etl.solar.SolarPhotoSupply import SolarPhotoSupply import re ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env" +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") logger = setup_logger() load_dotenv(ENV_FILE) @@ -250,24 +252,55 @@ def load_data(): return data, survey_list -def get_epc_data(data, cleaned, cleaning_data, created_at): +def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds): scoring_data = [] results = [] nodata = [] - for _, property_meta in tqdm(data.iterrows(), total=len(data)): + property_type_lookup = { + 'Semi Detached Bungalow': {"property-type": "Bungalow", "built-form": "Semi-Detached"}, + 'Mid Terraced House': {"property-type": "House", "built-form": "Mid-Terrace"}, + 'End Terraced House': {"property-type": "House", "built-form": "End-Terrace"}, + 'Low Rise Flat': {"property-type": "Flat", "built-form": "Mid-Terrace"}, + 'Semi-Detached House': {"property-type": "House", "built-form": "Semi-Detached"}, + 'Detached Bungalow': {"property-type": "Bungalow", "built-form": "Detached"}, + 'End Terraced Bungalow': {"property-type": "Bungalow", "built-form": "End-Terrace"}, + 'Mid Terraced Bungalow': {"property-type": "Bungalow", "built-form": "Mid-Terrace"}, + 'Medium Rise Flat': {"property-type": "Flat", "built-form": "Mid-Terrace"}, + 'Detached House': {"property-type": "House", "built-form": "Detached"}, + 'Cottage Flat': {"property-type": "Flat", "built-form": "Semi-Detached"}, + 'Maisonette Medium Rise': {"property-type": "Flat", "built-form": "Mid-Terrace"}, + 'Maisonette Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"}, + 'End Terraced Town House': {"property-type": "House", "built-form": "End-Terrace"}, + 'Flat Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"}, + 'Mid Terraced Town House': {"property-type": "House", "built-form": "Mid-Terrace"}, + } + + for index, property_meta in tqdm(data.iterrows(), total=len(data)): + searcher = SearchEpc( address1=property_meta["HouseNo"], postcode=property_meta["Postcode"], - size=1000 + auth_token=EPC_AUTH_TOKEN, + os_api_key=None, + full_address=property_meta["Address"] ) - searcher.search() + searcher.ordnance_survey_client.property_type = property_type_lookup[property_meta["Type"]]["property-type"] + searcher.ordnance_survey_client.built_form = property_type_lookup[property_meta["Type"]]["built-form"] + searcher.find_property(skip_os=True) - if searcher.data is None: + if searcher.newest_epc is None: nodata.append(property_meta) continue - newest_epc, older_epcs, full_sap_epc = searcher.retrieve(address=property_meta["Address"]) + if searcher.newest_epc.get("estimated"): + # We insert the row ID as our proxy for UPRN + proxy_uprn = int(property_meta["row_id"].split("_")[1]) + searcher.newest_epc["uprn"] = proxy_uprn + + newest_epc = searcher.newest_epc + older_epcs = searcher.older_epcs + full_sap_epc = searcher.full_sap_epc # We also want to get the penultimate epc penultimate_epc, _ = searcher.filter_newest_epc(older_epcs) if not penultimate_epc: @@ -277,16 +310,14 @@ def get_epc_data(data, cleaned, cleaning_data, created_at): eligibility.check_gbis_warmfront() eligibility.check_eco4_warmfront() - if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront) and ( - property_meta["warmfront_identified"] - ): + if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront): eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned) eligibility.check_gbis_warmfront() eligibility.check_eco4_warmfront() # If this is the case, we need to update the older epcs - older_epcs = [ - x for x in older_epcs if x["lmk-key"] not in [newest_epc["lmk-key"], penultimate_epc["lmk-key"]] - ] + # We don't update just to make data cleaning easier + if penultimate_epc.get("estimated") is None: + older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]] # Full checks eligibility.check_gbis() @@ -303,7 +334,9 @@ def get_epc_data(data, cleaned, cleaning_data, created_at): cleaning_data=cleaning_data, created_at=created_at, old_data=older_epcs, - full_sap_epc=full_sap_epc + full_sap_epc=full_sap_epc, + photo_supply_lookup=photo_supply_lookup, + floor_area_decile_thresholds=floor_area_decile_thresholds ) scoring_data.extend(scoring_dictionary) @@ -433,6 +466,18 @@ def analyse_results(results_df, data, survey_list): how="left", on="survey_key" ) + all_identified_eco = analysis_data[ + (analysis_data["warmfront_identified"] & analysis_data["funding_scheme"].isin( + ["ECO4 A/W", "AFFORDABLE WARMTH"])) | + (analysis_data["eco4_eligible"]) + ] + + all_identified_gbis = analysis_data[ + (analysis_data["warmfront_identified"] & analysis_data["funding_scheme"].isin( + ["ECO4 GBIS (ECO+)"])) | + (analysis_data["gbis_eligible"] & analysis_data["eco4_eligible"].isin([False, None])) + ] + warmfront_identified = analysis_data[analysis_data["warmfront_identified"]] # Of the ECO jobs, what proportion to we get right @@ -482,17 +527,22 @@ def app(): ) cleaned = msgpack.unpackb(cleaned, raw=False) - cleaning_data = read_parquet_from_s3( + cleaning_data = read_dataframe_from_s3_parquet( bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet", ) created_at = datetime.now().isoformat() - results_df, scoring_data, nodata = get_epc_data(data, cleaned, cleaning_data, created_at) + photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev") + + results_df, scoring_data, nodata = get_epc_data( + data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds + ) # Store + # Old file was ha16.pickle # import pickle - # with open("ha16.pickle", "wb") as f: + # with open("ha16_8_jan_2.pickle", "wb") as f: # pickle.dump( # { # "scoring_data": scoring_data, @@ -500,3 +550,11 @@ def app(): # "nodata": nodata # }, f # ) + + # Read pickle + # import pickle + # with open("ha16.pickle", "rb") as f: + # saved = pickle.load(f) + # scoring_data = saved["scoring_data"] + # results_df = saved["results"] + # nodata = saved["nodata"] diff --git a/etl/eligibility/ha_15_32/ha24_app.py b/etl/eligibility/ha_15_32/ha24_app.py index 3edf8735..0f82f30a 100644 --- a/etl/eligibility/ha_15_32/ha24_app.py +++ b/etl/eligibility/ha_15_32/ha24_app.py @@ -1,14 +1,13 @@ +import os import msgpack import openpyxl -from openpyxl.styles.colors import COLOR_INDEX from pathlib import Path from datetime import datetime import pandas as pd import numpy as np -from utils.s3 import read_from_s3 +from utils.s3 import read_from_s3, read_dataframe_from_s3_parquet from utils.logger import setup_logger from dotenv import load_dotenv -from backend.app.utils import read_parquet_from_s3 from tqdm import tqdm from backend.SearchEpc import SearchEpc from etl.eligibility.Eligibility import Eligibility @@ -16,9 +15,9 @@ from etl.eligibility.ha_15_32.app import prepare_model_data_row from etl.epc.DataProcessor import DataProcessor from etl.epc.settings import COLUMNS_TO_MERGE_ON from backend.ml_models.api import ModelApi +from etl.solar.SolarPhotoSupply import SolarPhotoSupply -import re - +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env" logger = setup_logger() @@ -170,24 +169,46 @@ def load_data(): return data, survey_list -def get_epc_data(data, cleaned, cleaning_data, created_at): +def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds): scoring_data = [] results = [] nodata = [] + property_type_lookup = { + "01 HOUSE": "House", + "02 FLAT": "Flat", + "03 BUNGALOW": "Bungalow", + "05 BEDSIT": "Flat", + "04 MAISONETTE": "Maisonette", + "01 HOUSE MID": "House", + "10 PBUNGALOW": "Bungalow", + "14 SFLAT": "Flat", + "12 SBEDSIT": "Flat", + "11 PFLAT": "Flat", + "13 SBUNGALOW": "Bungalow", + " 01 HOUSE MID": "House", + "09 PBEDSIT": "Flat" + } + for _, property_meta in tqdm(data.iterrows(), total=len(data)): + searcher = SearchEpc( address1=property_meta["HouseNo"], postcode=property_meta["Postcode"], - size=1000 + auth_token=EPC_AUTH_TOKEN, + os_api_key=None, + full_address=property_meta["Address"] ) - searcher.search() + searcher.ordnance_survey_client.property_type = property_type_lookup[property_meta["Property Type"]] + searcher.find_property(skip_os=True) - if searcher.data is None: + if searcher.newest_epc is None: nodata.append(property_meta) continue - newest_epc, older_epcs, full_sap_epc = searcher.retrieve(address=property_meta["Address"]) + newest_epc = searcher.newest_epc + older_epcs = searcher.older_epcs + full_sap_epc = searcher.full_sap_epc # We also want to get the penultimate epc penultimate_epc, _ = searcher.filter_newest_epc(older_epcs) if not penultimate_epc: @@ -197,23 +218,25 @@ def get_epc_data(data, cleaned, cleaning_data, created_at): eligibility.check_gbis_warmfront() eligibility.check_eco4_warmfront() - if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront) and ( - property_meta["warmfront_identified"] - ): + if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront): eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned) eligibility.check_gbis_warmfront() eligibility.check_eco4_warmfront() # If this is the case, we need to update the older epcs - older_epcs = [ - x for x in older_epcs if x["lmk-key"] not in [newest_epc["lmk-key"], penultimate_epc["lmk-key"]] - ] + # older_epcs = [ + # x for x in older_epcs if x["lmk-key"] not in [newest_epc["lmk-key"], penultimate_epc["lmk-key"]] + # ] + # If this is the case, we need to update the older epcs + # We don't update just to make data cleaning easier + if penultimate_epc.get("estimated") is None: + older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]] # Full checks eligibility.check_gbis() eligibility.check_eco4() if eligibility.eco4_warmfront["eligible"]: - if eligibility.epc["uprn"] == "": + if eligibility.epc["uprn"] in ["", None]: eligibility.epc["uprn"] = int(property_meta["row_id"].split("_")[1]) scoring_dictionary = prepare_model_data_row( @@ -223,7 +246,9 @@ def get_epc_data(data, cleaned, cleaning_data, created_at): cleaning_data=cleaning_data, created_at=created_at, old_data=older_epcs, - full_sap_epc=full_sap_epc + full_sap_epc=full_sap_epc, + photo_supply_lookup=photo_supply_lookup, + floor_area_decile_thresholds=floor_area_decile_thresholds ) scoring_data.extend(scoring_dictionary) @@ -277,7 +302,7 @@ def get_epc_data(data, cleaned, cleaning_data, created_at): scoring_df = DataProcessor.clean_efficiency_variables(scoring_df) scoring_df["UPRN"] = scoring_df["UPRN"].astype(int) - model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at) + model_api = ModelApi(portfolio_id="ha24-eligibility", timestamp=created_at) all_predictions = model_api.predict_all( df=scoring_df, bucket="retrofit-data-dev", @@ -353,6 +378,18 @@ def analyse_results(results_df, data, survey_list): how="left", on="survey_key" ) + all_identified_eco = analysis_data[ + (analysis_data["warmfront_identified"] & analysis_data["funding_scheme"].isin( + ["ECO4 A/W"])) | + (analysis_data["eco4_eligible"]) + ] + + all_identified_gbis = analysis_data[ + (analysis_data["warmfront_identified"] & analysis_data["funding_scheme"].isin( + ["ECO4 GBIS (ECO+)"])) | + (analysis_data["gbis_eligible"] & analysis_data["eco4_eligible"].isin([False, None])) + ] + warmfront_identified = analysis_data[analysis_data["warmfront_identified"]] # Of the ECO jobs, what proportion to we get right @@ -403,17 +440,21 @@ def app(): ) cleaned = msgpack.unpackb(cleaned, raw=False) - cleaning_data = read_parquet_from_s3( + cleaning_data = read_dataframe_from_s3_parquet( bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet", ) created_at = datetime.now().isoformat() - results_df, scoring_data, nodata = get_epc_data(data, cleaned, cleaning_data, created_at) + photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev") + + results_df, scoring_data, nodata = get_epc_data( + data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds + ) # Pickle results just in case # import pickle - # with open("ha24.pickle", "wb") as f: + # with open("ha24_8_jan.pickle", "wb") as f: # pickle.dump( # { # "scoring_data": scoring_data, @@ -421,3 +462,11 @@ def app(): # "nodata": nodata # }, f # ) + + # Read in pickle + # import pickle + # with open("ha24_8_jan.pickle", "rb") as f: + # saved = pickle.load(f) + # scoring_data = saved["scoring_data"] + # results_df = saved["results"] + # nodata = saved["nodata"] diff --git a/etl/eligibility/ha_15_32/ha7_app.py b/etl/eligibility/ha_15_32/ha7_app.py index 0152ab91..62da5a52 100644 --- a/etl/eligibility/ha_15_32/ha7_app.py +++ b/etl/eligibility/ha_15_32/ha7_app.py @@ -16,6 +16,7 @@ from etl.eligibility.ha_15_32.app import prepare_model_data_row from etl.epc.DataProcessor import DataProcessor from etl.epc.settings import COLUMNS_TO_MERGE_ON from backend.ml_models.api import ModelApi +from etl.solar.SolarPhotoSupply import SolarPhotoSupply ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env" @@ -67,12 +68,16 @@ def load_data(): return df -def get_ha7_data(data, cleaned, cleaning_data, created_at): +def get_ha7_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds): property_type_lookup = { - "Mid Terrace": "Mid-Terrace", - "End Terrace": "End-Terrace", - "Semi Detached": "Semi-Detached", - "Detached": "Detached", + # "Mid Terrace": "Mid-Terrace", + # "End Terrace": "End-Terrace", + # "Semi Detached": "Semi-Detached", + # "Detached": "Detached", + "House": "House", + "Flat": "Flat", + "Bungalow": "Bungalow", + "Maisonette": "Maisonette", } scoring_data = [] @@ -80,7 +85,7 @@ def get_ha7_data(data, cleaned, cleaning_data, created_at): nodata = [] for _, house in tqdm(data.iterrows(), total=len(data)): - if house["Address"] is not None: + if house["Address"]: address = house["Address"] else: address = house["Address2"] @@ -89,7 +94,8 @@ def get_ha7_data(data, cleaned, cleaning_data, created_at): address1=address, postcode=house["Postcode"], auth_token=EPC_AUTH_TOKEN, - os_api_key=None + os_api_key=None, + property_type=property_type_lookup.get(house["Archetype"]), ) searcher.find_property(skip_os=True) @@ -118,7 +124,9 @@ def get_ha7_data(data, cleaned, cleaning_data, created_at): cleaning_data=cleaning_data, created_at=created_at, old_data=older_epcs, - full_sap_epc=full_sap_epc + full_sap_epc=full_sap_epc, + photo_supply_lookup=photo_supply_lookup, + floor_area_decile_thresholds=floor_area_decile_thresholds ) scoring_data.extend(scoring_dictionary) @@ -285,9 +293,13 @@ def app(): bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet", ) + photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev") + created_at = datetime.now().isoformat() - results_df, scoring_data, nodata = get_ha7_data(data, cleaned, cleaning_data, created_at) + results_df, scoring_data, nodata = get_ha7_data( + data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds + ) # Pickle results # import pickle diff --git a/etl/solar/SolarPhotoSupply.py b/etl/solar/SolarPhotoSupply.py index 6a225b5a..1a80c37a 100644 --- a/etl/solar/SolarPhotoSupply.py +++ b/etl/solar/SolarPhotoSupply.py @@ -210,7 +210,20 @@ class SolarPhotoSupply: ] if photo_supply_matched.empty: - raise ValueError("No photo supply matched") + # There are a small number of cases where we don't get a full match so try again with a more aggregated + # average + photo_supply_matched = photo_supply_lookup[ + (photo_supply_lookup["tenure"] == tenure) & + (photo_supply_lookup["built_form"] == built_form) & + (photo_supply_lookup["property_type"] == property_type) + ] + if construction_age_band in photo_supply_matched["construction_age_band"].values: + photo_supply_matched = photo_supply_matched[ + photo_supply_matched["construction_age_band"] == construction_age_band + ] + + if photo_supply_matched.empty: + raise ValueError("No photo supply matches") floor_area_decile = cls.classify_floor_area( floor_area, floor_area_decile_thresholds["floor_area_decile_thresholds"].values diff --git a/recommendations/tests/test_ventilation_recommendations.py b/recommendations/tests/test_ventilation_recommendations.py index 893bb01a..3242b1d1 100644 --- a/recommendations/tests/test_ventilation_recommendations.py +++ b/recommendations/tests/test_ventilation_recommendations.py @@ -1,5 +1,4 @@ from backend.Property import Property -from unittest.mock import Mock from recommendations.VentilationRecommendations import VentilationRecommendations from recommendations.tests.test_data.materials import materials @@ -7,7 +6,7 @@ from recommendations.tests.test_data.materials import materials class TestVentilationRecommendations: def test_natural_ventilation(self): - input_property1 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock()) + input_property1 = Property(id=1, postcode="F4k3 6", address="623 fake street") input_property1.data = {"mechanical-ventilation": "natural"} recommender = VentilationRecommendations( @@ -28,7 +27,7 @@ class TestVentilationRecommendations: assert recommender.recommendation[0]["parts"][0]["quantity"] == 2 def test_missing_ventilation(self): - input_property2 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock()) + input_property2 = Property(id=1, postcode="F4k3 6", address="623 fake street") input_property2.data = {"mechanical-ventilation": None} recommender2 = VentilationRecommendations( @@ -49,7 +48,7 @@ class TestVentilationRecommendations: assert recommender2.recommendation[0]["parts"][0]["quantity"] == 2 def test_nodata_ventilation(self): - input_property3 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock()) + input_property3 = Property(id=1, postcode="F4k3 6", address="623 fake street") input_property3.data = {"mechanical-ventilation": "NO DATA!!"} recommender3 = VentilationRecommendations( @@ -70,7 +69,7 @@ class TestVentilationRecommendations: assert recommender3.recommendation[0]["parts"][0]["quantity"] == 2 def test_existing_ventilation_1(self): - input_property4 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock()) + input_property4 = Property(id=1, postcode="F4k3 6", address="623 fake street") input_property4.data = {"mechanical-ventilation": 'mechanical, extract only'} recommender4 = VentilationRecommendations( @@ -86,7 +85,7 @@ class TestVentilationRecommendations: assert recommender4.has_ventilaion def test_existing_ventilation_2(self): - input_property5 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock()) + input_property5 = Property(id=1, postcode="F4k3 6", address="623 fake street") input_property5.data = {"mechanical-ventilation": 'mechanical, supply and extract'} recommender5 = VentilationRecommendations( diff --git a/recommendations/tests/test_wall_recommendations.py b/recommendations/tests/test_wall_recommendations.py index 0258e592..2fbf3239 100644 --- a/recommendations/tests/test_wall_recommendations.py +++ b/recommendations/tests/test_wall_recommendations.py @@ -231,7 +231,7 @@ class TestWallRecommendationsBase: class TestCavityWallRecommensations: def test_fill_empty_cavity(self): - input_property = Property(id=1, postcode="F4k3", address1="123 fake street", epc_client=Mock()) + input_property = Property(id=1, postcode="F4k3", address="123 fake street") input_property.walls = { 'original_description': 'Cavity wall, as built, no insulation (assumed)', 'clean_description': 'Cavity wall, as built, no insulation', @@ -265,7 +265,7 @@ class TestCavityWallRecommensations: assert np.isclose(recommender.recommendations[1]["total"], 2004.6600000000003) def test_fill_partial_filled_cavity(self): - input_property = Property(id=1, postcode="F4k3", address1="123 fake street", epc_client=Mock()) + input_property = Property(id=1, postcode="F4k3", address="123 fake street") input_property.walls = { 'original_description': 'Cavity wall, as built, partial insulation (assumed)', 'clean_description': 'Cavity wall, as built, partial insulation', @@ -299,7 +299,7 @@ class TestCavityWallRecommensations: assert np.isclose(recommender.recommendations[1]["total"], 1999.9350000000002) def test_system_built_wall(self): - input_property2 = Property(id=1, postcode="F4k3 2", address1="223 fake street", epc_client=Mock()) + input_property2 = Property(id=1, postcode="F4k3 2", address="223 fake street") input_property2.walls = { 'original_description': 'System built, as built, no insulation (assumed)', 'clean_description': 'System built, as built, no insulation', @@ -346,7 +346,7 @@ class TestCavityWallRecommensations: assert recommender2.recommendations[6]["parts"][0]["depth"] == 52.5 def test_timber_frame_wall(self): - input_property3 = Property(id=1, postcode="F4k3 2", address1="223 fake street", epc_client=Mock()) + input_property3 = Property(id=1, postcode="F4k3 2", address="223 fake street") input_property3.walls = { 'original_description': 'Timber frame, as built, no insulation (assumed)', 'clean_description': 'Timber frame, as built, no insulation', @@ -388,7 +388,7 @@ class TestCavityWallRecommensations: assert recommender3.recommendations[1]["parts"][0]["depth"] == 150.0 def test_granite_or_whinstone_wall(self): - input_property4 = Property(id=1, postcode="F4k3 2", address1="223 fake street", epc_client=Mock()) + input_property4 = Property(id=1, postcode="F4k3 2", address="223 fake street") input_property4.walls = { 'original_description': 'Granite or whinstone, as built, no insulation (assumed)', 'clean_description': 'Granite or whinstone, as built, no insulation', @@ -430,7 +430,7 @@ class TestCavityWallRecommensations: assert recommender4.recommendations[1]["parts"][0]["depth"] == 150 def test_cob_wall(self): - input_property5 = Property(id=1, postcode="F4k3 2", address1="223 fake street", epc_client=Mock()) + input_property5 = Property(id=1, postcode="F4k3 2", address="223 fake street") input_property5.walls = { 'original_description': 'Cob, as built', 'clean_description': 'Cob, as built', @@ -472,7 +472,7 @@ class TestCavityWallRecommensations: assert recommender5.recommendations[3]["parts"][0]["depth"] == 100 def test_sandstone_or_limestone_wall(self): - input_property6 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock()) + input_property6 = Property(id=1, postcode="F4k3 6", address="623 fake street") input_property6.walls = { 'original_description': 'Sandstone or limestone, as built, no insulation (assumed)', 'clean_description': 'Sandstone or limestone, as built, no insulation', diff --git a/recommendations/tests/test_window_recommendations.py b/recommendations/tests/test_window_recommendations.py index ac461594..f103299d 100644 --- a/recommendations/tests/test_window_recommendations.py +++ b/recommendations/tests/test_window_recommendations.py @@ -1,6 +1,5 @@ from recommendations.WindowsRecommendations import WindowsRecommendations from backend.Property import Property -from unittest.mock import Mock from recommendations.tests.test_data.materials import materials @@ -15,8 +14,7 @@ class TestWindowRecommendations: property_1 = Property( id=1, postcode='1', - address1='1', - epc_client=Mock(), + address='1', data={ "county": "Wychavon", "multi-glaze-proportion": 0 @@ -52,8 +50,7 @@ class TestWindowRecommendations: property_2 = Property( id=1, postcode='1', - address1='1', - epc_client=Mock(), + address='1', data={ "county": "Wychavon", "multi-glaze-proportion": 33 @@ -86,8 +83,7 @@ class TestWindowRecommendations: property_3 = Property( id=1, postcode='1', - address1='1', - epc_client=Mock(), + address='1', data={ "county": "Wychavon", "multi-glaze-proportion": 80 @@ -110,8 +106,7 @@ class TestWindowRecommendations: property_4 = Property( id=1, postcode='1', - address1='1', - epc_client=Mock(), + address='1', data={ "county": "Wychavon", "multi-glaze-proportion": 100 @@ -134,8 +129,7 @@ class TestWindowRecommendations: property_5 = Property( id=1, postcode='1', - address1='1', - epc_client=Mock(), + address='1', data={ "county": "Wychavon", "multi-glaze-proportion": 50 @@ -164,8 +158,7 @@ class TestWindowRecommendations: property_6 = Property( id=1, postcode='1', - address1='1', - epc_client=Mock(), + address='1', data={ "county": "Wychavon", "multi-glaze-proportion": 0 @@ -199,8 +192,7 @@ class TestWindowRecommendations: property_7 = Property( id=1, postcode='1', - address1='1', - epc_client=Mock(), + address='1', data={ "county": "Wychavon", "multi-glaze-proportion": 100 @@ -227,11 +219,11 @@ class TestWindowRecommendations: property_8 = Property( id=1, postcode='1', - address1='1', - epc_client=Mock(), + address='1', data={ "county": "Wychavon", - "multi-glaze-proportion": 80 + "multi-glaze-proportion": 80, + "uprn": 1 } ) property_8.windows = {'original_description': 'Mostly triple glazing', 'has_glazing': True, From 04dba265de129ea891b15a0abf3994089096eef4 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 10 Jan 2024 12:25:29 +0000 Subject: [PATCH 20/26] added in cavity age estimation --- backend/ml_models/Valuation.py | 4 +- etl/eligibility/Eligibility.py | 44 +++- etl/eligibility/ha_15_32/ha16_app.py | 78 +++++- etl/eligibility/ha_15_32/ha25_app.py | 375 +++++++++++++++++++++++++-- etl/eligibility/ha_15_32/ha7_app.py | 19 +- 5 files changed, 490 insertions(+), 30 deletions(-) diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py index cdbbe698..018b4678 100644 --- a/backend/ml_models/Valuation.py +++ b/backend/ml_models/Valuation.py @@ -19,7 +19,9 @@ class PropertyValuation: 100070505235: 344000, # Based on Zoopla's estimation of 131 School road, which is also semi-detached 100070513306: 182000, # Based on Zoopla's estimation of 61 Simmons Drive 100071306896: 77000, # Based on Flat 2 of 44 Wedgewood Road on Zoopla - 100021192109: 650000 # Based on Zoopla + 100021192109: 650000, # Based on Zoopla + 766249482: 358000, # Based on Zoopla estimate for 19 Spring Lane, 3 bedroom semi-detached + 100120703802: 277000, # Based on Zoopla } # We base our valuation uplifts on a number of sources diff --git a/etl/eligibility/Eligibility.py b/etl/eligibility/Eligibility.py index c9d75606..13966655 100644 --- a/etl/eligibility/Eligibility.py +++ b/etl/eligibility/Eligibility.py @@ -33,6 +33,7 @@ class Eligibility: # If the loft has less than 100mm of insulation, we classify the home has needing loft insulation LOFT_INSULATION_THRESHOLD = 100 + HIGH_LOFT_INSULATION_THRESHOLD = 269 # Because EPCS have different values for tenure, we need to remap them to a common set of values tenure_remap = { @@ -104,6 +105,8 @@ class Eligibility: self.LOFT_INSULATION_THRESHOLD if loft_thickness_threshold is None else loft_thickness_threshold ) + high_loft_thickness_threshold = self.HIGH_LOFT_INSULATION_THRESHOLD + # We firstly check if the roof is a loft is_loft = self.roof["is_pitched"] and (not self.roof["is_roof_room"]) @@ -122,7 +125,22 @@ class Eligibility: is_flat=self.roof["is_flat"] ) - if insulation_thickness > loft_thickness_threshold: + if insulation_thickness <= loft_thickness_threshold: + self.loft = { + "suitability": True, + "thickness": insulation_thickness, + "reason": None + } + + if insulation_thickness <= high_loft_thickness_threshold: + self.loft = { + "suitability": True, + "thickness": insulation_thickness, + "reason": "high loft thickness but below regulation" + } + return + + if insulation_thickness > high_loft_thickness_threshold: # Insulation is already thick enough self.loft = { "suitability": False, @@ -131,12 +149,6 @@ class Eligibility: } return - self.loft = { - "suitability": True, - "thickness": insulation_thickness, - "reason": None - } - def cavity_insulation(self): """ @@ -161,6 +173,17 @@ class Eligibility: is_partial_filled_cavity = is_cavity and is_partial_filled is_underperforming_cavity = is_cavity and is_underperforming + # Check if it has internal or external wall insulation + has_internal_wall_insulation = self.walls["internal_insulation"] + has_external_wall_insulation = self.walls["external_insulation"] + + if has_internal_wall_insulation or has_external_wall_insulation: + self.cavity = { + "suitability": False, + "type": "internal or external wall insulation" + } + return + if is_unfilled_cavity: self.cavity = { "suitability": True, @@ -354,6 +377,13 @@ class Eligibility: else: message = "subject to post retrofit sap" if is_eligible else "not eligible" + # Update the message to flag properties that failed just because of a full cavity. + # We need to double check that the wall is a cavity, that the loft is suitable and that the + # sap is within reason + # We can then estimate the age of the cavity fill + if not is_eligible and (current_sap < 69) and self.loft["suitability"] and self.walls["is_cavity_wall"]: + message = "Failed due to full cavity - check cavity age" + self.eco4_warmfront = { "eligible": is_eligible, "message": message diff --git a/etl/eligibility/ha_15_32/ha16_app.py b/etl/eligibility/ha_15_32/ha16_app.py index bd19fe97..446c35c9 100644 --- a/etl/eligibility/ha_15_32/ha16_app.py +++ b/etl/eligibility/ha_15_32/ha16_app.py @@ -252,6 +252,31 @@ def load_data(): return data, survey_list +def calculate_cavity_age(newest_epc, older_epcs, cleaned): + all_epcs = [newest_epc] + older_epcs + + df = [] + for x in all_epcs: + # Get the cleaned mapping + mapped = [y for y in cleaned["walls-description"] if y["original_description"] == x["walls-description"]] + if not mapped: + continue + df.append( + { + **mapped[0], + "inspection-date": x["lodgement-date"], + } + ) + + df = pd.DataFrame(df) + df = df[ + (df["is_cavity_wall"] == True) & (df["is_filled_cavity"] == True) + ] + + cavity_age = (datetime.now() - pd.to_datetime(df["inspection-date"].max())).days + return cavity_age + + def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds): scoring_data = [] results = [] @@ -319,6 +344,19 @@ def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, if penultimate_epc.get("estimated") is None: older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]] + # If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity + + # Loft MUST be suitable + cavity_age = None + if ( + eligibility.walls["is_cavity_wall"] and + eligibility.walls["is_filled_cavity"] and + eligibility.loft["suitability"] and + eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age" + ): + # We check the age of the cavity and if it's particularly old, we flag it + cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned) + # Full checks eligibility.check_gbis() eligibility.check_eco4() @@ -362,6 +400,10 @@ def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, "heating": eligibility.epc["mainheat-description"], "tenure": eligibility.tenure, "date_epc": eligibility.epc["lodgement-date"], + "loft_thickness": eligibility.roof["insulation_thickness"], + "cavity_age": cavity_age, + **eligibility.walls, + **eligibility.roof, } ) @@ -472,12 +514,46 @@ def analyse_results(results_df, data, survey_list): (analysis_data["eco4_eligible"]) ] + eco_eligible = analysis_data[analysis_data["eco4_eligible"] == True] + eco_ineligible = analysis_data[analysis_data["eco4_eligible"] == False] + + eco_ineligible["eco4_message"].value_counts() + + # SAP too high: + sap_too_high = eco_ineligible[eco_ineligible["eco4_message"] == "sap too high"].copy() + further_possibilities = sap_too_high[ + sap_too_high["walls"].isin( + [ + "Cavity wall, as built, insulated", + "Cavity wall, as built, no insulation", + "Cavity wall, as built, partial insulation", + "Cavity wall, no insulation", + "Cavity wall, partial insulation" + ] + ) + ] + + filled_cavities = eco_ineligible[ + eco_ineligible["eco4_message"] == "sap too high" + ] + + warmfront_identified = analysis_data[analysis_data["warmfront_identified"]] + warmfront_identified["walls"].value_counts() + all_identified_gbis = analysis_data[ (analysis_data["warmfront_identified"] & analysis_data["funding_scheme"].isin( ["ECO4 GBIS (ECO+)"])) | (analysis_data["gbis_eligible"] & analysis_data["eco4_eligible"].isin([False, None])) ] + empty_cavity_desriptions = [ + "Cavity wall, as built, no insulation", "Cavity wall, as built, partial insulation", + "Cavity wall, no insulation", "Cavity wall, partial insulation" + ] + + empty_cavities = analysis_data[analysis_data["walls"].isin(empty_cavity_desriptions)] + remaining_empty = empty_cavities[~empty_cavities["warmfront_identified"]] + warmfront_identified = analysis_data[analysis_data["warmfront_identified"]] # Of the ECO jobs, what proportion to we get right @@ -553,7 +629,7 @@ def app(): # Read pickle # import pickle - # with open("ha16.pickle", "rb") as f: + # with open("ha16_8_jan_2.pickle", "rb") as f: # saved = pickle.load(f) # scoring_data = saved["scoring_data"] # results_df = saved["results"] diff --git a/etl/eligibility/ha_15_32/ha25_app.py b/etl/eligibility/ha_15_32/ha25_app.py index 4d86a546..541f77d3 100644 --- a/etl/eligibility/ha_15_32/ha25_app.py +++ b/etl/eligibility/ha_15_32/ha25_app.py @@ -1,6 +1,6 @@ +import os import msgpack import openpyxl -from openpyxl.styles.colors import COLOR_INDEX from pathlib import Path from datetime import datetime import pandas as pd @@ -8,7 +8,7 @@ import numpy as np from utils.s3 import read_from_s3 from utils.logger import setup_logger from dotenv import load_dotenv -from backend.app.utils import read_parquet_from_s3 +from utils.s3 import read_dataframe_from_s3_parquet from tqdm import tqdm from backend.SearchEpc import SearchEpc from etl.eligibility.Eligibility import Eligibility @@ -16,9 +16,11 @@ from etl.eligibility.ha_15_32.app import prepare_model_data_row from etl.epc.DataProcessor import DataProcessor from etl.epc.settings import COLUMNS_TO_MERGE_ON from backend.ml_models.api import ModelApi +from etl.solar.SolarPhotoSupply import SolarPhotoSupply import re +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env" logger = setup_logger() @@ -272,27 +274,99 @@ def load_data(): ) data["warmfront_identified"] = data["warmfront_identified"].fillna(False) - return data, eco4_prospects_survey_list + lost_identified_properties = eco4_prospects_survey_list[ + ~eco4_prospects_survey_list["survey_key"].isin(matched["survey_key"]) + ] + + return data, eco4_prospects_survey_list, lost_identified_properties -def get_epc_data(data, cleaned, cleaning_data, created_at): +def map_year_to_age_band(year): + try: + year = int(year) + except ValueError: + return "Invalid Year" # Or any other way you want to handle invalid inputs + + if year < 1900: + return "England and Wales: before 1900" + elif 1900 <= year <= 1929: + return "England and Wales: 1900-1929" + elif 1930 <= year <= 1949: + return "England and Wales: 1930-1949" + elif 1950 <= year <= 1966: + return "England and Wales: 1950-1966" + elif 1967 <= year <= 1975: + return "England and Wales: 1967-1975" + elif 1976 <= year <= 1982: + return "England and Wales: 1976-1982" + elif 1983 <= year <= 1990: + return "England and Wales: 1983-1990" + elif 1991 <= year <= 1995: + return "England and Wales: 1991-1995" + elif 1996 <= year <= 2002: + return "England and Wales: 1996-2002" + elif 2003 <= year <= 2006: + return "England and Wales: 2003-2006" + elif 2007 <= year <= 2011: + return "England and Wales: 2007-2011" + else: # Assuming all remaining years are 2012 onwards + return "England and Wales: 2012 onwards" + + +def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds): scoring_data = [] results = [] nodata = [] + property_type_lookup = { + "Flat": {"property-type": "Flat", "built-form": None}, + "Mid Terrace House": {"property-type": "House", "built-form": "Mid-Terrace"}, + "End Terrace House": {"property-type": "House", "built-form": "End-Terrace"}, + "Maisonnette": {"property-type": "Flat", "built-form": None}, + "Semi Detached House": {"property-type": "House", "built-form": "Semi-Detached"}, + "Detached House": {"property-type": "House", "built-form": "Detached"}, + "Coach House": {"property-type": "House", "built-form": "Detached"}, + "Bungalow": {"property-type": "Bungalow", "built-form": None}, + "Detached Bungalow": {"property-type": "Bungalow", "built-form": "Detached"}, + "House": {"property-type": "House", "built-form": None}, + "Semi Detached Bung": {"property-type": "Bungalow", "built-form": "Semi-Detached"}, + "Bedspace": {"property-type": None, "built-form": None}, + "Office Buildings": {"property-type": None, "built-form": None}, + "End Terrace Bungalow": {"property-type": "Bungalow", "built-form": "End-Terrace"}, + "Mid Terrace Bungalow": {"property-type": "Bungalow", "built-form": "Mid-Terrace"}, + "Bedsit": {"property-type": "Flat", "built-form": None}, + "Mid Terrace Housekeeping": {"property-type": "House", "built-form": "Mid-Terrace"}, + "Mid Terrace Housekeeping ": {"property-type": "House", "built-form": "Mid-Terrace"}, + "End Terrace Housex": {"property-type": "House", "built-form": "End-Terrace"}, + "Guest Room": {"property-type": None, "built-form": None} + } + for _, property_meta in tqdm(data.iterrows(), total=len(data)): + searcher = SearchEpc( address1=property_meta["HouseNo"], postcode=property_meta["postcode"], - size=1000 + auth_token=EPC_AUTH_TOKEN, + os_api_key=None, + full_address=property_meta["address"] ) - searcher.search() + searcher.ordnance_survey_client.property_type = property_type_lookup[property_meta["T1_AssetType"]][ + "property-type"] + searcher.ordnance_survey_client.built_form = property_type_lookup[property_meta["T1_AssetType"]]["built-form"] + searcher.find_property(skip_os=True) - if searcher.data is None: + if searcher.newest_epc is None: nodata.append(property_meta) continue - newest_epc, older_epcs, full_sap_epc = searcher.retrieve(address=property_meta["T1_Address"]) + if searcher.newest_epc.get("estimated"): + # We insert the row ID as our proxy for UPRN + proxy_uprn = int(property_meta["row_id"].split("_")[1]) + searcher.newest_epc["uprn"] = proxy_uprn + + newest_epc = searcher.newest_epc + older_epcs = searcher.older_epcs + full_sap_epc = searcher.full_sap_epc # We also want to get the penultimate epc penultimate_epc, _ = searcher.filter_newest_epc(older_epcs) if not penultimate_epc: @@ -302,25 +376,26 @@ def get_epc_data(data, cleaned, cleaning_data, created_at): eligibility.check_gbis_warmfront() eligibility.check_eco4_warmfront() - if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront) and ( - property_meta["warmfront_identified"] - ): + if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront): eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned) eligibility.check_gbis_warmfront() eligibility.check_eco4_warmfront() # If this is the case, we need to update the older epcs - older_epcs = [ - x for x in older_epcs if x["lmk-key"] not in [newest_epc["lmk-key"], penultimate_epc["lmk-key"]] - ] + # We don't update just to make data cleaning easier + if penultimate_epc.get("estimated") is None: + older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]] # Full checks eligibility.check_gbis() eligibility.check_eco4() if eligibility.eco4_warmfront["eligible"]: - if eligibility.epc["uprn"] == "": + if eligibility.epc["uprn"] in ["", None]: eligibility.epc["uprn"] = int(property_meta["row_id"].split("_")[1]) + if eligibility.epc["construction-age-band"] in ["", None]: + eligibility.epc["construction-age-band"] = map_year_to_age_band(property_meta["Build Yr"]) + scoring_dictionary = prepare_model_data_row( property_id=property_meta["row_id"], modelling_epc=eligibility.epc, @@ -328,7 +403,9 @@ def get_epc_data(data, cleaned, cleaning_data, created_at): cleaning_data=cleaning_data, created_at=created_at, old_data=older_epcs, - full_sap_epc=full_sap_epc + full_sap_epc=full_sap_epc, + photo_supply_lookup=photo_supply_lookup, + floor_area_decile_thresholds=floor_area_decile_thresholds, ) scoring_data.extend(scoring_dictionary) @@ -450,6 +527,232 @@ def get_epc_data(data, cleaned, cleaning_data, created_at): return results_df, scoring_data, nodata +def get_epc_data_for_lost_surveys( + lost_identified_properties, cleaned, cleaning_data, created_at, photo_supply_lookup, + floor_area_decile_thresholds +): + lost_identified_properties["row_id"] = [ + "lost_surveys_ha25_" + str(i) for i in range(0, len(lost_identified_properties)) + ] + + scoring_data = [] + results = [] + nodata = [] + + property_type_lookup = { + "MID-TERRACE": {"property-type": "House", "built-form": "Mid-Terrace"}, + "N/A": {"property-type": "House", "built-form": None}, + "END-TERRACE": {"property-type": "House", "built-form": "End-Terrace"}, + "GROUND-FLOOR": {"property-type": "House", "built-form": None}, + "TOP-FLOOR": {"property-type": "House", "built-form": None}, + "SEMI-DETACHED": {"property-type": "House", "built-form": "Semi-Detached"}, + "MID-FLOOR": {"property-type": "House", "built-form": None}, + "TOP-FLOOR FLAT": {"property-type": "House", "built-form": None}, + "DETACHED": {"property-type": "House", "built-form": "Detached"}, + "MID-FLOOR FLAT": {"property-type": "House", "built-form": None}, + "SEMI- DETACHED": {"property-type": "House", "built-form": "Semi-Detached"}, + "NO EPC ON GOV": {"property-type": "House", "built-form": None}, + "Top-floor flat": {"property-type": "House", "built-form": None}, + "GROUND-FLOOR FLAT": {"property-type": "House", "built-form": None}, + "NOT ON GOV SITE": {"property-type": "House", "built-form": None} + } + + for _, property_meta in tqdm(lost_identified_properties.iterrows(), total=len(lost_identified_properties)): + + if property_meta["POSTCODE"] is None: + continue + + full_address = ", ".join( + [str(x) for x in [ + property_meta["NO"], property_meta["ADDRESS 1"], property_meta["ADDRESS 2"], property_meta["ADDRESS 3"] + ] if x is not None] + ) + + searcher = SearchEpc( + address1=str(property_meta["NO"]), + postcode=property_meta["POSTCODE"], + auth_token=EPC_AUTH_TOKEN, + os_api_key=None, + full_address=full_address + ) + + property_type_key = property_meta["PROPERTY TYPE"] + if property_type_key is not None: + searcher.ordnance_survey_client.property_type = property_type_lookup[property_type_key.strip()][ + "property-type"] + searcher.ordnance_survey_client.built_form = property_type_lookup[property_type_key.strip()][ + "built-form"] + searcher.find_property(skip_os=True) + + if searcher.newest_epc is None: + nodata.append(property_meta) + continue + + if searcher.newest_epc.get("estimated"): + # We insert the row ID as our proxy for UPRN + proxy_uprn = int(property_meta["row_id"].split("_")[-1]) + searcher.newest_epc["uprn"] = proxy_uprn + + newest_epc = searcher.newest_epc + older_epcs = searcher.older_epcs + full_sap_epc = searcher.full_sap_epc + # We also want to get the penultimate epc + penultimate_epc, _ = searcher.filter_newest_epc(older_epcs) + if not penultimate_epc: + penultimate_epc = newest_epc + + eligibility = Eligibility(epc=newest_epc, cleaned=cleaned) + eligibility.check_gbis_warmfront() + eligibility.check_eco4_warmfront() + + if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront): + eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned) + eligibility.check_gbis_warmfront() + eligibility.check_eco4_warmfront() + # If this is the case, we need to update the older epcs + # We don't update just to make data cleaning easier + if penultimate_epc.get("estimated") is None: + older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]] + + # Full checks + eligibility.check_gbis() + eligibility.check_eco4() + + if eligibility.eco4_warmfront["eligible"] & (eligibility.epc["construction-age-band"] not in ["", None]): + if eligibility.epc["uprn"] in ["", None]: + eligibility.epc["uprn"] = int(property_meta["row_id"].split("_")[1]) + + scoring_dictionary = prepare_model_data_row( + property_id=property_meta["row_id"], + modelling_epc=eligibility.epc, + cleaned=cleaned, + cleaning_data=cleaning_data, + created_at=created_at, + old_data=older_epcs, + full_sap_epc=full_sap_epc, + photo_supply_lookup=photo_supply_lookup, + floor_area_decile_thresholds=floor_area_decile_thresholds, + ) + scoring_data.extend(scoring_dictionary) + + results.append( + { + "row_id": property_meta["row_id"], + "uprn": eligibility.epc["uprn"], + "Address": property_meta["ADDRESS 1"], + "Postcode": property_meta["POSTCODE"], + "property_type": eligibility.epc["property-type"], + "gbis_eligible": eligibility.gbis_warmfront, + "eco4_eligible": eligibility.eco4_warmfront["eligible"], + "eco4_message": eligibility.eco4_warmfront["message"], + "sap": float(eligibility.epc["current-energy-efficiency"]), + "gbis_eligible_future": eligibility.gbis["eligible"], + "gbis_eligible_future_message": eligibility.gbis["message"], + "eco4_eligible_future": eligibility.eco4["eligible"], + "eco4_eligible_future_message": eligibility.eco4["message"], + # Property components + "roof": eligibility.roof["clean_description"], + "walls": eligibility.walls["clean_description"], + "cavity_type": eligibility.cavity["type"], + "heating": eligibility.epc["mainheat-description"], + "tenure": eligibility.tenure, + "date_epc": eligibility.epc["lodgement-date"], + } + ) + + scoring_df = pd.DataFrame(scoring_data) + + # Perform the same cleaning as in the model - first clean number of room variables though + scoring_df = DataProcessor.apply_averages_cleaning( + data_to_clean=scoring_df, + cleaning_data=cleaning_data, + cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'], + colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"], + ) + + scoring_df = DataProcessor.apply_averages_cleaning( + data_to_clean=scoring_df, + cleaning_data=cleaning_data, + cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"], + ).drop(columns=["LOCAL_AUTHORITY"]) + + scoring_df = DataProcessor.clean_missings_after_description_process( + scoring_df, + ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or ( + "insulation_thickness" in c) or ("ENERGY_EFF" in c)] + ) + + scoring_df = DataProcessor.clean_efficiency_variables(scoring_df) + scoring_df["UPRN"] = scoring_df["UPRN"].astype(int) + + model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at) + all_predictions = model_api.predict_all( + df=scoring_df, + bucket="retrofit-data-dev", + prediction_buckets={ + "sap_change_predictions": "retrofit-sap-predictions-dev", + "heat_demand_predictions": "retrofit-heat-predictions-dev", + "carbon_change_predictions": "retrofit-carbon-predictions-dev" + } + ) + + predictions = all_predictions["sap_change_predictions"].copy() + + results_df = pd.DataFrame(results) + + predictions = predictions.rename(columns={"property_id": "row_id"}).merge( + results_df[["row_id", "sap"]], how="left", on="row_id" + ) + predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"] + predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index() + + results_df = results_df.merge( + predictions[["sap_uplift", "row_id"]], + how="left", + on="row_id" + ) + results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"] + + eligibility_assessment = [] + for _, row in results_df[results_df["eco4_eligible"] == True].iterrows(): + # The upgrade requirements are dependent on the current SAP + + # If the property is an F or G, it only needs to upgrade to an % + if row["sap"] <= 38: + if row["post_install_sap"] >= 57: + eligibility_classification = "highest confidence" + elif row["post_install_sap"] >= 55: + eligibility_classification = "high confidence" + elif row["post_install_sap"] >= 53: + eligibility_classification = "medium confidence" + else: + eligibility_classification = "unlikely" + else: + + if row["post_install_sap"] >= 71: + eligibility_classification = "highest confidence" + elif row["post_install_sap"] >= 69: + eligibility_classification = "high confidence" + elif row["post_install_sap"] >= 67: + eligibility_classification = "medium confidence" + else: + eligibility_classification = "unlikely" + + eligibility_assessment.append( + { + "row_id": row["row_id"], + "eligibility_classification": eligibility_classification + } + ) + + eligibility_assessment = pd.DataFrame(eligibility_assessment) + + results_df = results_df.merge( + eligibility_assessment, how="left", on="row_id" + ) + return results_df, scoring_data, nodata + + def analyse_results(results_df, data, eco4_prospects_survey_list): analysis_data = data[["row_id", "survey_key", "warmfront_identified"]].merge( results_df, how="left", on="row_id" @@ -457,6 +760,18 @@ def analyse_results(results_df, data, eco4_prospects_survey_list): warmfront_identified = analysis_data[analysis_data["warmfront_identified"]] + identified_eco = analysis_data[analysis_data["eco4_eligible"] == True] + identified_eco = identified_eco[identified_eco["eco4_message"] == "subject to post retrofit sap"] + + identified_gbis = analysis_data[ + (analysis_data["gbis_eligible"] == True) & (analysis_data["eco4_eligible"] == False) + ] + + # Take just unfilled cavities and remove filled potentials + identified_gbis["walls"].value_counts() + + identified_gbis["walls"].value_counts() + # Of the ECO jobs, what proportion to we get right success_rate = (warmfront_identified["eco4_eligible"] | warmfront_identified["gbis_eligible"]).sum() / \ @@ -490,8 +805,15 @@ def analyse_results(results_df, data, eco4_prospects_survey_list): ].shape[0] +def analyse_lost_surveys(results_df): + identified_eco = results_df[results_df["eco4_eligible"] == True] + # 59 for lost surveys + identified_gbis = results_df[results_df["gbis_eligible"] == True] + # 107 + + def app(): - data, eco4_prospects_survey_list = load_data() + data, eco4_prospects_survey_list, lost_identified_properties = load_data() data["row_id"] = ["ha25_" + str(i) for i in range(0, len(data))] @@ -501,16 +823,21 @@ def app(): ) cleaned = msgpack.unpackb(cleaned, raw=False) - cleaning_data = read_parquet_from_s3( + cleaning_data = read_dataframe_from_s3_parquet( bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet", ) created_at = datetime.now().isoformat() - results_df, scoring_data, nodata = get_epc_data(data, cleaned, cleaning_data, created_at) + photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev") + + results_df, scoring_data, nodata = get_epc_data( + data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds + ) # Pickle the outputs + # Old data was ha25.pickle # import pickle - # with open("ha25.pickle", "wb") as f: + # with open("ha25_9_jan.pickle", "wb") as f: # pickle.dump( # { # "results_df": results_df, @@ -519,3 +846,11 @@ def app(): # }, # f # ) + + # Load in pickle + # import pickle + # with open("ha25_9_jan.pickle", "rb") as f: + # saved = pickle.load(f) + # results_df = saved["results_df"] + # scoring_data = saved["scoring_data"] + # nodata = saved["nodata"] diff --git a/etl/eligibility/ha_15_32/ha7_app.py b/etl/eligibility/ha_15_32/ha7_app.py index 62da5a52..544f614d 100644 --- a/etl/eligibility/ha_15_32/ha7_app.py +++ b/etl/eligibility/ha_15_32/ha7_app.py @@ -150,6 +150,7 @@ def get_ha7_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, "heating": eligibility.epc["mainheat-description"], "tenure": eligibility.tenure, "date_epc": eligibility.epc["lodgement-date"], + **newest_epc, } ) @@ -250,10 +251,18 @@ def get_ha7_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, def analyse_ha_7(results_df, data): df = results_df.merge( - data[["row_id", "row_code", "Property Type"]], how="left", on="row_id" + data[["row_id", "row_code", "Property Type", "Construction Year Band"]], how="left", on="row_id" ) warmfront_identification = df["row_code"].value_counts() warmfront_identified = df[df["row_code"] == "potential ECO4"] + warmfront_identified["walls"].value_counts(normalize=True) + + df["Construction Year Band"].value_counts(normalize=True) + + # Number of days from today + + days_to_today = (datetime.now() - pd.to_datetime(warmfront_identified["date_epc"])).dt.days + days_to_today.mean() property_types = df["Property Type"].value_counts() @@ -305,3 +314,11 @@ def app(): # import pickle # with open("ha7_results.pkl", "wb") as f: # pickle.dump({"results_df": results_df, "scoring_data": scoring_data, "nodata": nodata}, f) + + # Read in the old data + # import pickle + # with open("ha7_results.pkl", "rb") as f: + # old_data = pickle.load(f) + # results_df = old_data["results_df"] + # scoring_data = old_data["scoring_data"] + # nodata = old_data["nodata"] From 1bb188a8b8107b8adebd7e5163631c232a0e85c2 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 10 Jan 2024 16:14:01 +0000 Subject: [PATCH 21/26] working on eligibility pipeline --- backend/Property.py | 4 +- etl/eligibility/ha_15_32/ha16_app.py | 74 +++++++++++++++---------- etl/eligibility/ha_15_32/ha24_app.py | 17 +++++- recommendations/recommendation_utils.py | 24 ++++++++ 4 files changed, 87 insertions(+), 32 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index 5713c179..03fc507e 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -336,7 +336,9 @@ class Property(Definitions): self.construction_age_band = 'England and Wales: 2012 onwards' if self.age_band is None: - raise ValueError("age_band is missing") + logger.info("Age band is missing - filling with national average") + self.age_band = "C" + self.construction_age_band = "England and Wales: 1930-1949" def set_spatial(self, spatial: pd.DataFrame): """ diff --git a/etl/eligibility/ha_15_32/ha16_app.py b/etl/eligibility/ha_15_32/ha16_app.py index 446c35c9..f2b80542 100644 --- a/etl/eligibility/ha_15_32/ha16_app.py +++ b/etl/eligibility/ha_15_32/ha16_app.py @@ -17,6 +17,7 @@ from etl.epc.DataProcessor import DataProcessor from etl.epc.settings import COLUMNS_TO_MERGE_ON from backend.ml_models.api import ModelApi from etl.solar.SolarPhotoSupply import SolarPhotoSupply +from recommendations.recommendation_utils import calculate_cavity_age import re @@ -252,31 +253,6 @@ def load_data(): return data, survey_list -def calculate_cavity_age(newest_epc, older_epcs, cleaned): - all_epcs = [newest_epc] + older_epcs - - df = [] - for x in all_epcs: - # Get the cleaned mapping - mapped = [y for y in cleaned["walls-description"] if y["original_description"] == x["walls-description"]] - if not mapped: - continue - df.append( - { - **mapped[0], - "inspection-date": x["lodgement-date"], - } - ) - - df = pd.DataFrame(df) - df = df[ - (df["is_cavity_wall"] == True) & (df["is_filled_cavity"] == True) - ] - - cavity_age = (datetime.now() - pd.to_datetime(df["inspection-date"].max())).days - return cavity_age - - def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds): scoring_data = [] results = [] @@ -508,10 +484,48 @@ def analyse_results(results_df, data, survey_list): how="left", on="survey_key" ) - all_identified_eco = analysis_data[ - (analysis_data["warmfront_identified"] & analysis_data["funding_scheme"].isin( - ["ECO4 A/W", "AFFORDABLE WARMTH"])) | - (analysis_data["eco4_eligible"]) + from recommendation_utils import convert_thickness_to_numeric + + analysis_data["roof_insulation_thickness"] = analysis_data["roof_insulation_thickness"].fillna(None) + analysis_data["roof_insulation_thickness"] = np.where( + pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"] + ) + analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply( + lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True) + ) + + warmfront_sold_eco4 = analysis_data[ + (analysis_data["warmfront_identified"] == True) & ( + analysis_data["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"])) + ] + + warmfront_sold_gbis = analysis_data[ + (analysis_data["warmfront_identified"] == True) & ( + analysis_data["funding_scheme"].isin(["ECO4 GBIS (ECO+)"])) + ] + # 1407 + + ideal_eco4_warmfront_not_sold = analysis_data[ + (analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & ( + analysis_data["roof_insulation_thickness_numeric"] <= 100) + ] + + secondary_eco4_warmfront_not_sold = analysis_data[ + (analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & ( + analysis_data["roof_insulation_thickness_numeric"] > 100) + ] + + # underperforming cavities + underperforming_cavities = analysis_data[ + (analysis_data["eco4_message"] == "Failed due to full cavity - check cavity age") & ( + analysis_data["cavity_age"] > 10 * 365 + ) + ] + + identified_gbis_not_sold = analysis_data[ + (analysis_data["gbis_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & ( + analysis_data["eco4_eligible"] == False + ) ] eco_eligible = analysis_data[analysis_data["eco4_eligible"] == True] @@ -618,7 +632,7 @@ def app(): # Store # Old file was ha16.pickle # import pickle - # with open("ha16_8_jan_2.pickle", "wb") as f: + # with open("ha16_10_jan.pickle", "wb") as f: # pickle.dump( # { # "scoring_data": scoring_data, diff --git a/etl/eligibility/ha_15_32/ha24_app.py b/etl/eligibility/ha_15_32/ha24_app.py index 0f82f30a..49a5abb1 100644 --- a/etl/eligibility/ha_15_32/ha24_app.py +++ b/etl/eligibility/ha_15_32/ha24_app.py @@ -16,6 +16,7 @@ from etl.epc.DataProcessor import DataProcessor from etl.epc.settings import COLUMNS_TO_MERGE_ON from backend.ml_models.api import ModelApi from etl.solar.SolarPhotoSupply import SolarPhotoSupply +from recommendations.recommendation_utils import calculate_cavity_age EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env" @@ -231,6 +232,17 @@ def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, if penultimate_epc.get("estimated") is None: older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]] + # Loft MUST be suitable + cavity_age = None + if ( + eligibility.walls["is_cavity_wall"] and + eligibility.walls["is_filled_cavity"] and + eligibility.loft["suitability"] and + eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age" + ): + # We check the age of the cavity and if it's particularly old, we flag it + cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned) + # Full checks eligibility.check_gbis() eligibility.check_eco4() @@ -274,6 +286,9 @@ def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, "heating": eligibility.epc["mainheat-description"], "tenure": eligibility.tenure, "date_epc": eligibility.epc["lodgement-date"], + "cavity_age": cavity_age, + **eligibility.walls, + **eligibility.roof, } ) @@ -454,7 +469,7 @@ def app(): # Pickle results just in case # import pickle - # with open("ha24_8_jan.pickle", "wb") as f: + # with open("ha24_10_jan.pickle", "wb") as f: # pickle.dump( # { # "scoring_data": scoring_data, diff --git a/recommendations/recommendation_utils.py b/recommendations/recommendation_utils.py index 175eb641..64880aca 100644 --- a/recommendations/recommendation_utils.py +++ b/recommendations/recommendation_utils.py @@ -1,4 +1,5 @@ import math +from datetime import datetime from copy import deepcopy import numpy as np @@ -713,3 +714,26 @@ def estimate_windows( raise ValueError("Window count cannot be negative.") return window_count + + +def calculate_cavity_age(newest_epc, older_epcs, cleaned): + all_epcs = [newest_epc] + older_epcs + + df = [] + for x in all_epcs: + # Get the cleaned mapping + mapped = [y for y in cleaned["walls-description"] if y["original_description"] == x["walls-description"]] + if not mapped: + continue + df.append( + { + **mapped[0], + "inspection-date": x["lodgement-date"], + } + ) + + df = pd.DataFrame(df) + df = df[df["is_cavity_wall"] & df["is_filled_cavity"]] + + cavity_age = (datetime.now() - pd.to_datetime(df["inspection-date"].max())).days + return cavity_age From 7969f517337865353c03c87536d0a3aa58e1ad61 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 11 Jan 2024 11:57:44 +0000 Subject: [PATCH 22/26] set up load data function for cancellation app --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- etl/eligibility/ha_15_32/app.py | 9 +- etl/eligibility/ha_15_32/ha16_app.py | 11 +- etl/eligibility/ha_15_32/ha24_app.py | 39 ++++++- etl/eligibility/ha_15_32/ha25_app.py | 150 +++++++++++++++------------ etl/eligibility/ha_15_32/ha4_app.py | 39 +++++-- etl/eligibility/ha_15_32/ha7_app.py | 67 ++++++++++-- recommendations/Costs.py | 6 +- 9 files changed, 234 insertions(+), 91 deletions(-) diff --git a/.idea/Model.iml b/.idea/Model.iml index b0f9c00d..4413bb06 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 1122b380..6f308057 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/etl/eligibility/ha_15_32/app.py b/etl/eligibility/ha_15_32/app.py index 3a0caec6..b7f44a43 100644 --- a/etl/eligibility/ha_15_32/app.py +++ b/etl/eligibility/ha_15_32/app.py @@ -358,9 +358,16 @@ def prepare_model_data_row( p.get_components(cleaned, photo_supply_lookup=photo_supply_lookup, floor_area_decile_thresholds=floor_area_decile_thresholds) + + # THIS IS TEMP AND SHOULDN'T BE HERE + data_to_clean = p.get_model_data() + if data_to_clean["NUMBER_HEATED_ROOMS"] in ['', None]: + data_to_clean["NUMBER_HEATED_ROOMS"] = data_to_clean["NUMBER_HABITABLE_ROOMS"] + p.data["number-heated-rooms"] = data_to_clean["NUMBER_HABITABLE_ROOMS"] + # This is temp - this should happen after scoring cleaned_property_data = DataProcessor.apply_averages_cleaning( - data_to_clean=pd.DataFrame([dict(**p.get_model_data(), LOCAL_AUTHORITY=p.data["local-authority"])]), + data_to_clean=pd.DataFrame([dict(**data_to_clean, LOCAL_AUTHORITY=p.data["local-authority"])]), cleaning_data=cleaning_data, cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'], ) diff --git a/etl/eligibility/ha_15_32/ha16_app.py b/etl/eligibility/ha_15_32/ha16_app.py index f2b80542..b7f076b1 100644 --- a/etl/eligibility/ha_15_32/ha16_app.py +++ b/etl/eligibility/ha_15_32/ha16_app.py @@ -18,6 +18,7 @@ from etl.epc.settings import COLUMNS_TO_MERGE_ON from backend.ml_models.api import ModelApi from etl.solar.SolarPhotoSupply import SolarPhotoSupply from recommendations.recommendation_utils import calculate_cavity_age +from recommendation_utils import convert_thickness_to_numeric import re @@ -484,9 +485,6 @@ def analyse_results(results_df, data, survey_list): how="left", on="survey_key" ) - from recommendation_utils import convert_thickness_to_numeric - - analysis_data["roof_insulation_thickness"] = analysis_data["roof_insulation_thickness"].fillna(None) analysis_data["roof_insulation_thickness"] = np.where( pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"] ) @@ -497,13 +495,12 @@ def analyse_results(results_df, data, survey_list): warmfront_sold_eco4 = analysis_data[ (analysis_data["warmfront_identified"] == True) & ( analysis_data["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"])) - ] + ] # 1407 warmfront_sold_gbis = analysis_data[ (analysis_data["warmfront_identified"] == True) & ( analysis_data["funding_scheme"].isin(["ECO4 GBIS (ECO+)"])) ] - # 1407 ideal_eco4_warmfront_not_sold = analysis_data[ (analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & ( @@ -519,7 +516,7 @@ def analyse_results(results_df, data, survey_list): underperforming_cavities = analysis_data[ (analysis_data["eco4_message"] == "Failed due to full cavity - check cavity age") & ( analysis_data["cavity_age"] > 10 * 365 - ) + ) & (analysis_data["roof_insulation_thickness_numeric"] <= 100) ] identified_gbis_not_sold = analysis_data[ @@ -643,7 +640,7 @@ def app(): # Read pickle # import pickle - # with open("ha16_8_jan_2.pickle", "rb") as f: + # with open("ha16_10_jan.pickle", "rb") as f: # saved = pickle.load(f) # scoring_data = saved["scoring_data"] # results_df = saved["results"] diff --git a/etl/eligibility/ha_15_32/ha24_app.py b/etl/eligibility/ha_15_32/ha24_app.py index 49a5abb1..dc4df018 100644 --- a/etl/eligibility/ha_15_32/ha24_app.py +++ b/etl/eligibility/ha_15_32/ha24_app.py @@ -17,6 +17,7 @@ from etl.epc.settings import COLUMNS_TO_MERGE_ON from backend.ml_models.api import ModelApi from etl.solar.SolarPhotoSupply import SolarPhotoSupply from recommendations.recommendation_utils import calculate_cavity_age +from recommendation_utils import convert_thickness_to_numeric EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env" @@ -393,6 +394,42 @@ def analyse_results(results_df, data, survey_list): how="left", on="survey_key" ) + # NEW + + analysis_data["roof_insulation_thickness"] = np.where( + pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"] + ) + analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply( + lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True) + ) + + warmfront_sold_eco4 = analysis_data[ + (analysis_data["warmfront_identified"] == True) & ( + analysis_data["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"])) + ] + + warmfront_sold_gbis = analysis_data[ + (analysis_data["warmfront_identified"] == True) & ( + analysis_data["funding_scheme"].isin(["ECO4 GBIS (ECO+)"])) + ] + # 1407 + + additional_eco4_warmfront_not_sold = analysis_data[ + (analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & ( + analysis_data["roof_insulation_thickness_numeric"] <= 100) + ] + + additional_gbis_warmfront_not_sold = analysis_data[ + (analysis_data["gbis_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & ( + ~analysis_data["row_id"].isin(additional_eco4_warmfront_not_sold["row_id"].values) + ) + ] + + additional_gbis_warmfront_not_sold["walls"].value_counts() + analysis_data["walls"].value_counts() + + # END NEW + all_identified_eco = analysis_data[ (analysis_data["warmfront_identified"] & analysis_data["funding_scheme"].isin( ["ECO4 A/W"])) | @@ -480,7 +517,7 @@ def app(): # Read in pickle # import pickle - # with open("ha24_8_jan.pickle", "rb") as f: + # with open("ha24_10_jan.pickle", "rb") as f: # saved = pickle.load(f) # scoring_data = saved["scoring_data"] # results_df = saved["results"] diff --git a/etl/eligibility/ha_15_32/ha25_app.py b/etl/eligibility/ha_15_32/ha25_app.py index 541f77d3..c67c6b6b 100644 --- a/etl/eligibility/ha_15_32/ha25_app.py +++ b/etl/eligibility/ha_15_32/ha25_app.py @@ -17,6 +17,8 @@ from etl.epc.DataProcessor import DataProcessor from etl.epc.settings import COLUMNS_TO_MERGE_ON from backend.ml_models.api import ModelApi from etl.solar.SolarPhotoSupply import SolarPhotoSupply +from recommendations.recommendation_utils import calculate_cavity_age +from recommendation_utils import convert_thickness_to_numeric import re @@ -341,7 +343,7 @@ def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, "Guest Room": {"property-type": None, "built-form": None} } - for _, property_meta in tqdm(data.iterrows(), total=len(data)): + for _, property_meta in tqdm(data, total=len(data)): searcher = SearchEpc( address1=property_meta["HouseNo"], @@ -368,22 +370,35 @@ def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, older_epcs = searcher.older_epcs full_sap_epc = searcher.full_sap_epc # We also want to get the penultimate epc - penultimate_epc, _ = searcher.filter_newest_epc(older_epcs) - if not penultimate_epc: - penultimate_epc = newest_epc + # penultimate_epc, _ = searcher.filter_newest_epc(older_epcs) + # if not penultimate_epc: + # penultimate_epc = newest_epc eligibility = Eligibility(epc=newest_epc, cleaned=cleaned) eligibility.check_gbis_warmfront() eligibility.check_eco4_warmfront() - if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront): - eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned) - eligibility.check_gbis_warmfront() - eligibility.check_eco4_warmfront() - # If this is the case, we need to update the older epcs - # We don't update just to make data cleaning easier - if penultimate_epc.get("estimated") is None: - older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]] + # if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront): + # eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned) + # eligibility.check_gbis_warmfront() + # eligibility.check_eco4_warmfront() + # # If this is the case, we need to update the older epcs + # # We don't update just to make data cleaning easier + # if penultimate_epc.get("estimated") is None: + # older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]] + + # If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity + + # Loft MUST be suitable + cavity_age = None + if ( + eligibility.walls["is_cavity_wall"] and + eligibility.walls["is_filled_cavity"] and + eligibility.loft["suitability"] and + eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age" + ): + # We check the age of the cavity and if it's particularly old, we flag it + cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned) # Full checks eligibility.check_gbis() @@ -396,6 +411,15 @@ def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, if eligibility.epc["construction-age-band"] in ["", None]: eligibility.epc["construction-age-band"] = map_year_to_age_band(property_meta["Build Yr"]) + # This is not the right place to do this but this is temp + if eligibility.epc["extension-count"] in ["", None]: + eligibility.epc["extension-count"] = 0 + + # Not in the right place but temp + if eligibility.epc["built-form"] in ["", None]: + if not older_epcs: + eligibility.epc["built-form"] = "Mid-Terrace" + scoring_dictionary = prepare_model_data_row( property_id=property_meta["row_id"], modelling_epc=eligibility.epc, @@ -431,6 +455,9 @@ def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, "heating": eligibility.epc["mainheat-description"], "tenure": eligibility.tenure, "date_epc": eligibility.epc["lodgement-date"], + "cavity_age": cavity_age, + **eligibility.walls, + **eligibility.roof, } ) @@ -657,6 +684,8 @@ def get_epc_data_for_lost_surveys( "heating": eligibility.epc["mainheat-description"], "tenure": eligibility.tenure, "date_epc": eligibility.epc["lodgement-date"], + **eligibility.walls, + **eligibility.roof, } ) @@ -758,58 +787,51 @@ def analyse_results(results_df, data, eco4_prospects_survey_list): results_df, how="left", on="row_id" ) - warmfront_identified = analysis_data[analysis_data["warmfront_identified"]] + # NEW + analysis_data["roof_insulation_thickness"] = np.where( + pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"] + ) + analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply( + lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True) + ) - identified_eco = analysis_data[analysis_data["eco4_eligible"] == True] - identified_eco = identified_eco[identified_eco["eco4_message"] == "subject to post retrofit sap"] + warmfront_identified = analysis_data[ + (analysis_data["warmfront_identified"] == True) + ] # 2204 - identified_gbis = analysis_data[ - (analysis_data["gbis_eligible"] == True) & (analysis_data["eco4_eligible"] == False) + # Because we don't know which property is for which scheme, we'll just look at what we found + ideal_eco4 = analysis_data[ + (analysis_data["eco4_eligible"] == True) & + (analysis_data["roof_insulation_thickness_numeric"] <= 100) & + (analysis_data["sap"] <= 54) + ] # 335 + + gbis = analysis_data[ + (analysis_data["gbis_eligible"] == True) & + ~analysis_data["row_id"].isin(ideal_eco4["row_id"].values) ] - # Take just unfilled cavities and remove filled potentials - identified_gbis["walls"].value_counts() - - identified_gbis["walls"].value_counts() - - # Of the ECO jobs, what proportion to we get right - - success_rate = (warmfront_identified["eco4_eligible"] | warmfront_identified["gbis_eligible"]).sum() / \ - warmfront_identified.shape[ - 0] - - # No gbis for this - # gbis_success_rate = warmfront_identified_gbis["gbis_eligible"].sum() / warmfront_identified_gbis.shape[0] - - # Additional identified - additional_identified_eco = analysis_data[ - (analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) - ] - - additional_identified_eco["eligibility_classification"].value_counts() - - additional_identified_gbis = analysis_data[ - (analysis_data["gbis_eligible"] == True) & (analysis_data["eco4_eligible"] == False) & ( - analysis_data["warmfront_identified"] == False - ) - ].shape[0] - - # Future - additional_identified_eco_future = analysis_data[ - (analysis_data["eco4_eligible_future"] == True) & (analysis_data["warmfront_identified"] == False) - ].shape[0] - additional_identified_gbis_future = analysis_data[ - (analysis_data["gbis_eligible_future"] == True) & (analysis_data["eco4_eligible_future"] == False) & ( - analysis_data["warmfront_identified"] == False - ) - ].shape[0] + ideal_eco4 = ideal_eco4[ideal_eco4["sap"] <= 54] def analyse_lost_surveys(results_df): - identified_eco = results_df[results_df["eco4_eligible"] == True] - # 59 for lost surveys - identified_gbis = results_df[results_df["gbis_eligible"] == True] - # 107 + results_df["roof_insulation_thickness"] = np.where( + pd.isnull(results_df["roof_insulation_thickness"]), None, results_df["roof_insulation_thickness"] + ) + results_df["roof_insulation_thickness_numeric"] = results_df["roof_insulation_thickness"].apply( + lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True) + ) + + ideal_eco4 = results_df[ + (results_df["eco4_eligible"] == True) & + (results_df["roof_insulation_thickness_numeric"] <= 100) & + (results_df["sap"] <= 54) + ] # 25 + + gbis = results_df[ + (results_df["gbis_eligible"] == True) & + ~results_df["row_id"].isin(ideal_eco4["row_id"].values) + ] # 82 def app(): @@ -837,7 +859,7 @@ def app(): # Pickle the outputs # Old data was ha25.pickle # import pickle - # with open("ha25_9_jan.pickle", "wb") as f: + # with open("ha25_10_jan.pickle", "wb") as f: # pickle.dump( # { # "results_df": results_df, @@ -848,9 +870,9 @@ def app(): # ) # Load in pickle - # import pickle - # with open("ha25_9_jan.pickle", "rb") as f: - # saved = pickle.load(f) - # results_df = saved["results_df"] - # scoring_data = saved["scoring_data"] - # nodata = saved["nodata"] + import pickle + with open("ha25_10_jan.pickle", "rb") as f: + saved = pickle.load(f) + results_df = saved["results_df"] + scoring_data = saved["scoring_data"] + nodata = saved["nodata"] diff --git a/etl/eligibility/ha_15_32/ha4_app.py b/etl/eligibility/ha_15_32/ha4_app.py index 92b03539..d2702dd8 100644 --- a/etl/eligibility/ha_15_32/ha4_app.py +++ b/etl/eligibility/ha_15_32/ha4_app.py @@ -1,3 +1,4 @@ +import os import msgpack from pathlib import Path from datetime import datetime @@ -6,7 +7,7 @@ import pandas as pd from utils.s3 import read_from_s3 from utils.logger import setup_logger from dotenv import load_dotenv -from backend.app.utils import read_parquet_from_s3 +from utils.s3 import read_dataframe_from_s3_parquet from tqdm import tqdm from backend.SearchEpc import SearchEpc from etl.eligibility.Eligibility import Eligibility @@ -14,9 +15,13 @@ from etl.eligibility.ha_15_32.app import prepare_model_data_row from etl.epc.DataProcessor import DataProcessor from etl.epc.settings import COLUMNS_TO_MERGE_ON from backend.ml_models.api import ModelApi +from etl.solar.SolarPhotoSupply import SolarPhotoSupply +from recommendations.recommendation_utils import calculate_cavity_age +from recommendation_utils import convert_thickness_to_numeric import re +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env" logger = setup_logger() @@ -52,7 +57,7 @@ def standardise_ha_4(data): return data -def get_ha_4_data(data, cleaned, cleaning_data, created_at): +def get_ha_4_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds): scoring_data = [] results = [] nodata = [] @@ -62,19 +67,33 @@ def get_ha_4_data(data, cleaned, cleaning_data, created_at): searcher = SearchEpc( address1=property_meta["Address Line 1"], postcode=property_meta["Post Code"], - size=1000 + auth_token=EPC_AUTH_TOKEN, + os_api_key=None, + property_type=property_type_lookup.get(house["Archetype"]), ) - searcher.search() + searcher.find_property(skip_os=True) - if searcher.data is None: + if searcher.newest_epc is None: searcher = SearchEpc( address1=property_meta["Location Name"], postcode=property_meta["Post Code"], - size=1000 + auth_token=EPC_AUTH_TOKEN, + os_api_key=None, + property_type=property_type_lookup.get(house["Archetype"]), ) searcher.search() + if searcher.newest_epc is None: + nodata.append(house["row_id"]) + continue + + newest_epc = searcher.newest_epc + older_epcs = searcher.older_epcs + full_sap_epc = searcher.full_sap_epc + + searcher.search() + if searcher.data is None: nodata.append(property_meta.to_dict()) continue @@ -273,17 +292,21 @@ def app(): ) cleaned = msgpack.unpackb(cleaned, raw=False) - cleaning_data = read_parquet_from_s3( + cleaning_data = read_dataframe_from_s3_parquet( bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet", ) created_at = datetime.now().isoformat() + photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev") + results_df, scoring_data, nodata = get_ha_4_data( data=data, cleaned=cleaned, cleaning_data=cleaning_data, - created_at=created_at + created_at=created_at, + photo_supply_lookup=photo_supply_lookup, + floor_area_decile_thresholds=floor_area_decile_thresholds ) # Store the data locally as a pickle diff --git a/etl/eligibility/ha_15_32/ha7_app.py b/etl/eligibility/ha_15_32/ha7_app.py index 544f614d..54d0dbb0 100644 --- a/etl/eligibility/ha_15_32/ha7_app.py +++ b/etl/eligibility/ha_15_32/ha7_app.py @@ -17,6 +17,8 @@ from etl.epc.DataProcessor import DataProcessor from etl.epc.settings import COLUMNS_TO_MERGE_ON from backend.ml_models.api import ModelApi from etl.solar.SolarPhotoSupply import SolarPhotoSupply +from recommendations.recommendation_utils import calculate_cavity_age +from recommendation_utils import convert_thickness_to_numeric ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env" @@ -112,6 +114,19 @@ def get_ha7_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, eligibility.check_gbis_warmfront() eligibility.check_eco4_warmfront() + # If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity + + # Loft MUST be suitable + cavity_age = None + if ( + eligibility.walls["is_cavity_wall"] and + eligibility.walls["is_filled_cavity"] and + eligibility.loft["suitability"] and + eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age" + ): + # We check the age of the cavity and if it's particularly old, we flag it + cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned) + # If the house is not identified, we do a full gbis and eco4 check eligibility.check_gbis() eligibility.check_eco4() @@ -151,6 +166,9 @@ def get_ha7_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, "tenure": eligibility.tenure, "date_epc": eligibility.epc["lodgement-date"], **newest_epc, + "cavity_age": cavity_age, + **eligibility.walls, + **eligibility.roof, } ) @@ -250,21 +268,56 @@ def get_ha7_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, def analyse_ha_7(results_df, data): - df = results_df.merge( + analysis_data = results_df.merge( data[["row_id", "row_code", "Property Type", "Construction Year Band"]], how="left", on="row_id" ) - warmfront_identification = df["row_code"].value_counts() - warmfront_identified = df[df["row_code"] == "potential ECO4"] + + # NEW + + analysis_data["roof_insulation_thickness"] = np.where( + pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"] + ) + analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply( + lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True) + ) + + ideal_eco4 = analysis_data[ + (analysis_data["eco4_eligible"] == True) & ( + analysis_data["roof_insulation_thickness_numeric"] <= 100) + ] + + secondary_eco4_warmfront_not_sold = analysis_data[ + (analysis_data["eco4_eligible"] == True) & ( + analysis_data["roof_insulation_thickness_numeric"] > 100) + ] + + # underperforming cavities + underperforming_cavities = analysis_data[ + (analysis_data["eco4_message"] == "Failed due to full cavity - check cavity age") & ( + analysis_data["cavity_age"] > 9 * 365 + ) & (analysis_data["roof_insulation_thickness_numeric"] <= 100) + ] + + identified_gbis_not_sold = analysis_data[ + (analysis_data["gbis_eligible"] == True) & ( + analysis_data["eco4_eligible"] == False + ) + ] + + # END NEW + + warmfront_identification = analysis_data["row_code"].value_counts() + warmfront_identified = analysis_data[analysis_data["row_code"] == "potential ECO4"] warmfront_identified["walls"].value_counts(normalize=True) - df["Construction Year Band"].value_counts(normalize=True) + analysis_data["Construction Year Band"].value_counts(normalize=True) # Number of days from today days_to_today = (datetime.now() - pd.to_datetime(warmfront_identified["date_epc"])).dt.days days_to_today.mean() - property_types = df["Property Type"].value_counts() + property_types = analysis_data["Property Type"].value_counts() n_identified = (results_df["gbis_eligible"] | results_df["eco4_eligible"]).sum() @@ -312,12 +365,12 @@ def app(): # Pickle results # import pickle - # with open("ha7_results.pkl", "wb") as f: + # with open("ha7_results_jan_10.pkl", "wb") as f: # pickle.dump({"results_df": results_df, "scoring_data": scoring_data, "nodata": nodata}, f) # Read in the old data # import pickle - # with open("ha7_results.pkl", "rb") as f: + # with open("ha7_results_jan_10.pkl", "rb") as f: # old_data = pickle.load(f) # results_df = old_data["results_df"] # scoring_data = old_data["scoring_data"] diff --git a/recommendations/Costs.py b/recommendations/Costs.py index 8dbb9cc9..e2b26448 100644 --- a/recommendations/Costs.py +++ b/recommendations/Costs.py @@ -176,12 +176,16 @@ class Costs: """ material_cost_per_m2 = material["material_cost"] + # We inflate material costs due to recent price increases + material_cost_per_m2 = material_cost_per_m2 * 1.5 + base_material_cost = material_cost_per_m2 * floor_area labour_cost = material["labour_cost"] * floor_area * self.labour_adjustment_factor subtotal_before_profit = base_material_cost + labour_cost - contingency_cost = subtotal_before_profit * self.CONTINGENCY + # We use high risk contingency because of the possibility of access issues and clearing existing insulation + contingency_cost = subtotal_before_profit * self.HIGH_RISK_CONTINGENCY preliminaries_cost = subtotal_before_profit * self.PRELIMINARIES profit_cost = subtotal_before_profit * self.PROFIT_MARGIN From 5becd8d11bee83cc39b6134fdc67786bb7cbbd8a Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 12 Jan 2024 11:21:13 +0000 Subject: [PATCH 23/26] fixed existing unit tests --- backend/tests/test_sap_model_prep.py | 31 +++++++++++++------ etl/solar/SolarPhotoSupply.py | 7 +++++ recommendations/tests/test_costs.py | 6 ++-- .../tests/test_roof_recommendations.py | 4 +-- .../tests/test_window_recommendations.py | 21 ++++++++----- 5 files changed, 48 insertions(+), 21 deletions(-) diff --git a/backend/tests/test_sap_model_prep.py b/backend/tests/test_sap_model_prep.py index c1ff514e..89c436ce 100644 --- a/backend/tests/test_sap_model_prep.py +++ b/backend/tests/test_sap_model_prep.py @@ -7,7 +7,6 @@ import pytest import msgpack from utils.s3 import read_dataframe_from_s3_parquet, read_from_s3 -from tqdm import tqdm # Handy code for selecting testing data @@ -121,7 +120,21 @@ class TestSapModelPrep: cleaned = msgpack.unpackb(cleaned, raw=False) return cleaned - def test_fill_cavity_wall(self, cleaned, cleaning_data): + @pytest.fixture + def photo_supply_lookup(self): + photo_supply_lookup = read_dataframe_from_s3_parquet( + bucket_name="retrofit-data-dev", file_key="solar_pv_supply/photo_supply_lookup.parquet", + ) + return photo_supply_lookup + + @pytest.fixture + def floor_area_decile_thresholds(self): + floor_area_decile_thresholds = read_dataframe_from_s3_parquet( + bucket_name="retrofit-data-dev", file_key="solar_pv_supply/floor_area_decile_thresholds.parquet", + ) + return floor_area_decile_thresholds + + def test_fill_cavity_wall(self, cleaned, cleaning_data, photo_supply_lookup, floor_area_decile_thresholds): """ We ensure that the process that prepares the data in the engine code results in the same data as the model is trained on @@ -290,7 +303,7 @@ class TestSapModelPrep: address=starting_epc["address1"], data=starting_epc ) - home.get_components(cleaned) + home.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds) data_processor = DataProcessor(None, newdata=True) data_processor.insert_data(pd.DataFrame([home.get_model_data()])) @@ -354,7 +367,7 @@ class TestSapModelPrep: assert test_record[c].values[0] == row[c] - def test_internal_wall_insulation(self, cleaned, cleaning_data): + def test_internal_wall_insulation(self, cleaned, cleaning_data, photo_supply_lookup, floor_area_decile_thresholds): starting_epc2 = { 'low-energy-fixed-light-count': '2', 'address': 'FLAT 12, WAREHOUSE W, 3 WESTERN GATEWAY', @@ -509,7 +522,7 @@ class TestSapModelPrep: address=starting_epc2["address1"], data=starting_epc2 ) - home2.get_components(cleaned) + home2.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds) home2.set_number_lighting_outlets(None) data_processor2 = DataProcessor(None, newdata=True) @@ -575,7 +588,7 @@ class TestSapModelPrep: assert test_record2[c].values[0] == row2[c] - def test_ventilation(self, cleaned, cleaning_data): + def test_ventilation(self, cleaned, cleaning_data, photo_supply_lookup, floor_area_decile_thresholds): starting_epc3 = { 'low-energy-fixed-light-count': '', 'address': '45 Shepperson Road', 'uprn-source': 'Energy Assessor', @@ -728,7 +741,7 @@ class TestSapModelPrep: address=starting_epc3["address1"], data=starting_epc3 ) - home3.get_components(cleaned) + home3.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds) home3.set_number_lighting_outlets(None) data_processor3 = DataProcessor(None, newdata=True) @@ -778,7 +791,7 @@ class TestSapModelPrep: assert test_record3[c].values[0] == row3[c] - def test_fireplaces(self, cleaned, cleaning_data): + def test_fireplaces(self, cleaned, cleaning_data, photo_supply_lookup, floor_area_decile_thresholds): starting_epc4 = { 'low-energy-fixed-light-count': '', 'address': '9 Glebe Road, Asfordby Hill', @@ -936,7 +949,7 @@ class TestSapModelPrep: address=starting_epc4["address1"], data=starting_epc4 ) - home4.get_components(cleaned) + home4.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds) home4.set_number_lighting_outlets(None) data_processor4 = DataProcessor(None, newdata=True) diff --git a/etl/solar/SolarPhotoSupply.py b/etl/solar/SolarPhotoSupply.py index 1a80c37a..180cd6f5 100644 --- a/etl/solar/SolarPhotoSupply.py +++ b/etl/solar/SolarPhotoSupply.py @@ -199,6 +199,13 @@ class SolarPhotoSupply: # Convert the tenure to lower case, as is done in the creation of the dataset tenure = tenure.lower() + # We remap the "not defined" + tenure = { + "not defined - use in the case of a new dwelling for which the intended tenure in not known. it is not to " + "be used for an existing dwelling": + "not defined - use in the case of a new dwelling for which the intended tenure in not known. it is no" + }.get(tenure, tenure) + photo_supply_matched = photo_supply_lookup[ (photo_supply_lookup["tenure"] == tenure) & (photo_supply_lookup["built_form"] == built_form) & diff --git a/recommendations/tests/test_costs.py b/recommendations/tests/test_costs.py index 1d519b91..ab822322 100644 --- a/recommendations/tests/test_costs.py +++ b/recommendations/tests/test_costs.py @@ -58,9 +58,9 @@ class TestCosts: ) assert loft_results == { - 'total': 430.21445040000003, 'subtotal': 358.512042, 'vat': 71.70240840000001, - 'contingency': 25.608003000000004, 'preliminaries': 25.608003000000004, 'material': 198.29923000000002, - 'profit': 51.21600600000001, 'labour_hours': 3.685, 'labour_cost': 57.7808, 'labour_days': 0.460625 + 'total': 639.4133610000001, 'subtotal': 532.8444675000001, 'vat': 106.56889350000002, + 'contingency': 71.045929, 'preliminaries': 35.5229645, 'material': 297.448845, 'profit': 71.045929, + 'labour_hours': 3.685, 'labour_cost': 57.7808, 'labour_days': 0.460625 } def test_internal_wall_insulation(self): diff --git a/recommendations/tests/test_roof_recommendations.py b/recommendations/tests/test_roof_recommendations.py index c1a7dfd9..75b7ddb2 100644 --- a/recommendations/tests/test_roof_recommendations.py +++ b/recommendations/tests/test_roof_recommendations.py @@ -53,7 +53,7 @@ class TestRoofRecommendations: assert len(roof_recommender2.recommendations) == 1 - assert roof_recommender2.recommendations[0]["total"] == 1310.56464 + assert roof_recommender2.recommendations[0]["total"] == 1936.9206000000004 assert roof_recommender2.recommendations[0]["new_u_value"] == 0.14 assert roof_recommender2.recommendations[0]["starting_u_value"] == 0.68 @@ -104,7 +104,7 @@ class TestRoofRecommendations: assert len(roof_recommender4.recommendations) == 4 - assert roof_recommender4.recommendations[0]["total"] == 788.0544 + assert roof_recommender4.recommendations[0]["total"] == 1128.744 assert roof_recommender4.recommendations[0]["new_u_value"] == 0.15 assert roof_recommender4.recommendations[0]["starting_u_value"] == 0.3 assert roof_recommender4.recommendations[0]["parts"][0]["depth"] == 150 diff --git a/recommendations/tests/test_window_recommendations.py b/recommendations/tests/test_window_recommendations.py index f103299d..664a1e39 100644 --- a/recommendations/tests/test_window_recommendations.py +++ b/recommendations/tests/test_window_recommendations.py @@ -17,7 +17,8 @@ class TestWindowRecommendations: address='1', data={ "county": "Wychavon", - "multi-glaze-proportion": 0 + "multi-glaze-proportion": 0, + "uprn": 0 } ) property_1.windows = { @@ -53,7 +54,8 @@ class TestWindowRecommendations: address='1', data={ "county": "Wychavon", - "multi-glaze-proportion": 33 + "multi-glaze-proportion": 33, + "uprn": 0 } ) property_2.windows = {'original_description': 'Mostly double glazing', 'has_glazing': True, @@ -86,7 +88,8 @@ class TestWindowRecommendations: address='1', data={ "county": "Wychavon", - "multi-glaze-proportion": 80 + "multi-glaze-proportion": 80, + "uprn": 0 } ) property_3.windows = {'original_description': 'Fully double glazed', 'has_glazing': True, @@ -109,7 +112,8 @@ class TestWindowRecommendations: address='1', data={ "county": "Wychavon", - "multi-glaze-proportion": 100 + "multi-glaze-proportion": 100, + "uprn": 0 } ) property_4.windows = {'original_description': 'Full secondary glazing', 'has_glazing': True, @@ -132,7 +136,8 @@ class TestWindowRecommendations: address='1', data={ "county": "Wychavon", - "multi-glaze-proportion": 50 + "multi-glaze-proportion": 50, + "uprn": 0 } ) property_5.windows = {'original_description': 'Partial secondary glazing', 'has_glazing': True, @@ -161,7 +166,8 @@ class TestWindowRecommendations: address='1', data={ "county": "Wychavon", - "multi-glaze-proportion": 0 + "multi-glaze-proportion": 0, + "uprn": 0 } ) property_6.windows = {'original_description': 'Single glazed', 'has_glazing': False, 'glazing_coverage': None, @@ -195,7 +201,8 @@ class TestWindowRecommendations: address='1', data={ "county": "Wychavon", - "multi-glaze-proportion": 100 + "multi-glaze-proportion": 100, + "uprn": 0 } ) property_7.windows = {'original_description': 'Fully triple glazed', 'has_glazing': True, From 0c76e4b9d12a6f61ad735d44d42615311a853b45 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 16 Jan 2024 11:10:56 +0000 Subject: [PATCH 24/26] Added basic solar recommendations --- etl/eligibility/ha_15_32/app.py | 7 ++ etl/eligibility/ha_15_32/ha16_app.py | 2 +- etl/eligibility/ha_15_32/ha25_app.py | 5 ++ etl/eligibility/ha_15_32/ha7_app.py | 6 ++ recommendations/SolarPvRecommendations.py | 3 +- .../tests/test_solar_pv_recommendations.py | 79 +++++++++++++++++++ 6 files changed, 100 insertions(+), 2 deletions(-) diff --git a/etl/eligibility/ha_15_32/app.py b/etl/eligibility/ha_15_32/app.py index b7f44a43..76aadcc4 100644 --- a/etl/eligibility/ha_15_32/app.py +++ b/etl/eligibility/ha_15_32/app.py @@ -980,6 +980,8 @@ def analyse_ha_15_results(results_df, ha15, no_house_numbers): results_df["warmfront_identified"] ] + warmfront_identified = warmfront_identified + n_identified = (warmfront_identified["gbis_eligible"] | warmfront_identified["eco4_eligible"]).sum() success_rate = n_identified / warmfront_identified.shape[0] @@ -1030,6 +1032,11 @@ def analyse_ha_15_results(results_df, ha15, no_house_numbers): (results_df["eco4_eligible"] == True) ].copy() + new_possibilities_gbis = results_df[ + (~results_df["warmfront_identified"]) & + (results_df["eco4_eligible"] == False) & (results_df["gbis_eligible"] == True) + ].copy() + # These are future possibilityies future_possibilities_eco = results_df[ (~results_df["warmfront_identified"]) & diff --git a/etl/eligibility/ha_15_32/ha16_app.py b/etl/eligibility/ha_15_32/ha16_app.py index b7f076b1..0d67e0b4 100644 --- a/etl/eligibility/ha_15_32/ha16_app.py +++ b/etl/eligibility/ha_15_32/ha16_app.py @@ -478,7 +478,7 @@ def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, def analyse_results(results_df, data, survey_list): - analysis_data = data[["row_id", "survey_key", "warmfront_identified"]].merge( + analysis_data = data[["row_id", "survey_key", "warmfront_identified", "row_colour_name"]].merge( results_df, how="left", on="row_id" ).merge( survey_list[["survey_key", survey_list.columns[0]]].rename(columns={survey_list.columns[0]: "funding_scheme"}), diff --git a/etl/eligibility/ha_15_32/ha25_app.py b/etl/eligibility/ha_15_32/ha25_app.py index c67c6b6b..7dd36726 100644 --- a/etl/eligibility/ha_15_32/ha25_app.py +++ b/etl/eligibility/ha_15_32/ha25_app.py @@ -787,6 +787,11 @@ def analyse_results(results_df, data, eco4_prospects_survey_list): results_df, how="left", on="row_id" ) + analysis_data = analysis_data.merge( + eco4_prospects_survey_list[["survey_key", "ADDRESS 1", "NO", "POSTCODE"]], + how="left", on="survey_key" + ) + # NEW analysis_data["roof_insulation_thickness"] = np.where( pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"] diff --git a/etl/eligibility/ha_15_32/ha7_app.py b/etl/eligibility/ha_15_32/ha7_app.py index 54d0dbb0..c6486159 100644 --- a/etl/eligibility/ha_15_32/ha7_app.py +++ b/etl/eligibility/ha_15_32/ha7_app.py @@ -272,6 +272,8 @@ def analyse_ha_7(results_df, data): data[["row_id", "row_code", "Property Type", "Construction Year Band"]], how="left", on="row_id" ) + analysis_data["row_code"].value_counts() + # NEW analysis_data["roof_insulation_thickness"] = np.where( @@ -304,6 +306,10 @@ def analyse_ha_7(results_df, data): ) ] + wf_identified = analysis_data[ + (analysis_data["row_code"] == "potential ECO4") + ] + # END NEW warmfront_identification = analysis_data["row_code"].value_counts() diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py index 5163c1cb..01cd4f17 100644 --- a/recommendations/SolarPvRecommendations.py +++ b/recommendations/SolarPvRecommendations.py @@ -31,7 +31,8 @@ class SolarPvRecommendations: is_valid_roof_type = ( self.property.roof["is_flat"] or self.property.roof["is_pitched"] or self.property.roof["is_roof_room"] ) - has_no_existing_solar_pv = not self.property.data["photo-supply"] in [ + # If there is no existing solar PV, the photo-supply field will be None or a missing value + has_no_existing_solar_pv = self.property.data["photo-supply"] in [ None, 0, self.property.DATA_ANOMALY_MATCHES ] diff --git a/recommendations/tests/test_solar_pv_recommendations.py b/recommendations/tests/test_solar_pv_recommendations.py index e69de29b..f2436cb1 100644 --- a/recommendations/tests/test_solar_pv_recommendations.py +++ b/recommendations/tests/test_solar_pv_recommendations.py @@ -0,0 +1,79 @@ +import pytest +from recommendations.SolarPvRecommendations import SolarPvRecommendations +from backend.Property import Property + + +class TestSolarPvRecommendations: + @pytest.fixture + def property_instance_invalid_type(self): + # Setup the property_instance with an invalid property type + property_instance_invalid_type = Property(id=1, address="", postcode="") + property_instance_invalid_type.data = { + "property-type": "InvalidType", "county": "Broxbourne", "photo-supply": None + } + property_instance_invalid_type.roof = {"is_flat": False, "is_pitched": False, "is_roof_room": False} + return property_instance_invalid_type + + @pytest.fixture + def property_instance_invalid_roof(self): + # Setup the property_instance with invalid roof type + property_instance_invalid_roof = Property(id=1, address="", postcode="") + property_instance_invalid_roof.data = { + "county": "Huntingdonshire", "property-type": "House", "photo-supply": None + } + property_instance_invalid_roof.roof = {"is_flat": False, "is_pitched": False, "is_roof_room": False} + return property_instance_invalid_roof + + @pytest.fixture + def property_instance_has_solar_pv(self): + # Setup the property_instance without existing solar pv + property_instance_has_solar_pv = Property(id=1, address="", postcode="") + property_instance_has_solar_pv.data = {"photo-supply": "40", "county": "Huntingdonshire", + "property-type": "House"} + property_instance_has_solar_pv.roof = {"is_flat": True} + return property_instance_has_solar_pv + + @pytest.fixture + def property_instance_valid_all(self): + # Setup a valid property_instance that passes all conditions + property_instance_valid_all = Property(id=1, address="", postcode="") + property_instance_valid_all.solar_pv_roof_area = 20 + property_instance_valid_all.solar_pv_percentage = 40 + property_instance_valid_all.data = {"property-type": "House", "photo-supply": None, "county": "Huntingdonshire"} + property_instance_valid_all.roof = {"is_flat": True} + return property_instance_valid_all + + def test_invalid_property_type(self, property_instance_invalid_type): + solar_pv = SolarPvRecommendations(property_instance_invalid_type) + solar_pv.recommend() + assert not solar_pv.recommendation + + def test_invalid_roof_type(self, property_instance_invalid_roof): + solar_pv = SolarPvRecommendations(property_instance_invalid_roof) + solar_pv.recommend() + assert not solar_pv.recommendation + + def test_existing_solar_pv(self, property_instance_has_solar_pv): + solar_pv = SolarPvRecommendations(property_instance_has_solar_pv) + solar_pv.recommend() + assert not solar_pv.recommendation + + def test_valid_all_conditions(self, property_instance_valid_all): + solar_pv = SolarPvRecommendations(property_instance_valid_all) + solar_pv.recommend() + assert solar_pv.recommendation == [ + { + 'parts': [], + 'type': 'solar_pv', + 'description': 'Install a 4 kilowatt-peak (kWp) solar photovoltaic (PV) panel system on the roof', + 'starting_u_value': None, + 'new_u_value': None, + 'sap_points': None, + 'total': 8527.0752, + 'subtotal': 7105.896, + 'vat': 1421.1791999999996, + 'labour_hours': 72, + 'labour_days': 2, + 'photo_supply': 4000 + } + ] From dbe704f6f6d1202e431baa37e03d1f610493bf14 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 16 Jan 2024 11:34:47 +0000 Subject: [PATCH 25/26] Added solar costs unit tests --- recommendations/tests/test_costs.py | 46 +++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/recommendations/tests/test_costs.py b/recommendations/tests/test_costs.py index ab822322..1f5b300c 100644 --- a/recommendations/tests/test_costs.py +++ b/recommendations/tests/test_costs.py @@ -1,6 +1,7 @@ from recommendations.Costs import Costs from unittest.mock import Mock import datetime +import pytest class TestCosts: @@ -499,3 +500,48 @@ class TestCosts: 'labour_hours': 24.79, 'labour_days': 1.549375, 'labour_cost': 186.9032} assert costs.labour_adjustment_factor == 0.88 + + # Mock property instance for regional tests + @pytest.fixture(params=[ + ("Northamptonshire", "East Midlands", 7927.44), + ("Greater London Authority", "Inner London", 10475.0), + ("Adur", "South East England", 8333.32), + ("Bournemouth", "South West England", 8452), + ("Basildon", "East of England", 7895.44), + ("Birmingham", "West Midlands", 7706.2), + ("County Durham", "North East England", 8113.96), + ("Allerdale", "North West England", 6481.68), + ("York", "Yorkshire and the Humber", 8243.6), + ("Cardiff", "Wales", 7595.32), + ("Glasgow City", "Scotland", 7871.88), + ("Belfast", "Northern Ireland", 8504.36) + ]) + def mock_property_with_region(self, request): + county, region, expected_cost = request.param + mock_property = Mock() + mock_property.data = {"county": county} + return mock_property, region, expected_cost + + # Test for different wattages + @pytest.mark.parametrize("wattage, expected_cost", [ + (3000, 5945.58), + (4000, 7927.44), + (5000, 9909.3), + (6000, 11891.16), + ]) + def test_solar_pv_different_wattages(self, wattage, expected_cost): + mock_property = Mock() + mock_property.data = {"county": "Mansfield"} + costs = Costs(mock_property) + result = costs.solar_pv(wattage) + assert result['total'] == pytest.approx(expected_cost, rel=0.01) + + def test_solar_pv_regional_variation(self, mock_property_with_region): + # Test for regional cost variations + property_instance, expected_region, expected_cost = mock_property_with_region + costs = Costs(property_instance) + + assert costs.region == expected_region + + result = costs.solar_pv(4000) # Testing with a fixed wattage of 4000 + assert result['total'] == pytest.approx(expected_cost, rel=0.01) From b03b9d49083a22f78ac6e7969423696fa32b4293 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 16 Jan 2024 11:45:19 +0000 Subject: [PATCH 26/26] made contingency updates to costs --- recommendations/Costs.py | 6 ++--- recommendations/tests/test_costs.py | 12 ++++------ .../tests/test_wall_recommendations.py | 24 +++++++++---------- 3 files changed, 20 insertions(+), 22 deletions(-) diff --git a/recommendations/Costs.py b/recommendations/Costs.py index e2b26448..106f4453 100644 --- a/recommendations/Costs.py +++ b/recommendations/Costs.py @@ -61,7 +61,7 @@ class Costs: # We use a higher contingency rate for internal wall insulation because of the potential for issues with moving # fittings and trimming doors, as well as scope for damage to the existing wall during preparation. - IWI_CONTINGENCY = 0.15 + IWI_CONTINGENCY = 0.2 # Where there is more uncertainty, a higher contingency rate is used HIGH_RISK_CONTINGENCY = 0.2 @@ -77,8 +77,8 @@ class Costs: # have a preliminaries of 12-14% so we use 12% as the median for the preliminaries rate. # For External wall insulation (EWI), we use 15% as the preliminaries rate if we think the property might # need scaffolding, otherwise we use 12%. This is to account for any site preparation that might be required - EWI_NO_SCAFFOLDING_PRELIMINARIES = 0.15 - EWI_SCAFFOLDING_PRELIMINARIES = 0.20 + EWI_NO_SCAFFOLDING_PRELIMINARIES = 0.2 + EWI_SCAFFOLDING_PRELIMINARIES = 0.25 VAT_RATE = 0.2 PROFIT_MARGIN = 0.2 diff --git a/recommendations/tests/test_costs.py b/recommendations/tests/test_costs.py index 1f5b300c..402e38eb 100644 --- a/recommendations/tests/test_costs.py +++ b/recommendations/tests/test_costs.py @@ -177,11 +177,9 @@ class TestCosts: ) assert iwi_results == { - 'total': 6650.889456921851, 'subtotal': 5542.407880768209, 'vat': 1108.4815761536418, - 'contingency': 573.3525393898148, 'preliminaries': 382.2350262598765, - 'material': 1747.488000615996, - 'profit': 764.470052519753, 'labour_hours': 88.23759388401297, - 'labour_days': 2.757424808875405, + 'total': 6880.2304726777775, 'subtotal': 5733.525393898148, 'vat': 1146.7050787796295, + 'contingency': 764.470052519753, 'preliminaries': 382.2350262598765, 'material': 1747.488000615996, + 'profit': 764.470052519753, 'labour_hours': 88.23759388401297, 'labour_days': 2.757424808875405, 'labour_cost': 1927.1602026551818 } @@ -415,8 +413,8 @@ class TestCosts: ) assert ewi_results == { - 'total': 14561.688989159393, 'subtotal': 12134.740824299493, 'vat': 2426.948164859899, - 'contingency': 808.9827216199662, 'preliminaries': 1617.9654432399325, 'material': 4020.565147410677, + 'total': 15047.078622131372, 'subtotal': 12539.232185109477, 'vat': 2507.8464370218953, + 'contingency': 808.9827216199662, 'preliminaries': 2022.4568040499155, 'material': 4020.565147410677, 'profit': 1617.9654432399325, 'labour_hours': 187.02533486285358, 'labour_days': 5.8445417144641745, 'labour_cost': 3921.5600094613983 } diff --git a/recommendations/tests/test_wall_recommendations.py b/recommendations/tests/test_wall_recommendations.py index 2fbf3239..bfc681f5 100644 --- a/recommendations/tests/test_wall_recommendations.py +++ b/recommendations/tests/test_wall_recommendations.py @@ -331,17 +331,17 @@ class TestCavityWallRecommensations: assert len(recommender2.recommendations) == 9 assert recommender2.estimated_u_value == 1 assert np.isclose(recommender2.recommendations[0]["new_u_value"], 0.19) - assert np.isclose(recommender2.recommendations[0]["total"], 15899.9616) + assert np.isclose(recommender2.recommendations[0]["total"], 16429.960320000002) assert recommender2.recommendations[0]["parts"][0]["type"] == "external_wall_insulation" assert recommender2.recommendations[0]["parts"][0]["depth"] == 100 assert np.isclose(recommender2.recommendations[8]["new_u_value"], 0.23) - assert np.isclose(recommender2.recommendations[8]["total"], 10916.3424) + assert np.isclose(recommender2.recommendations[8]["total"], 11292.768) assert recommender2.recommendations[8]["parts"][0]["type"] == "internal_wall_insulation" assert recommender2.recommendations[8]["parts"][0]["depth"] == 72.5 assert np.isclose(recommender2.recommendations[6]["new_u_value"], 0.29) - assert np.isclose(recommender2.recommendations[6]["total"], 10621.934399999998) + assert np.isclose(recommender2.recommendations[6]["total"], 10988.208) assert recommender2.recommendations[6]["parts"][0]["type"] == "internal_wall_insulation" assert recommender2.recommendations[6]["parts"][0]["depth"] == 52.5 @@ -378,12 +378,12 @@ class TestCavityWallRecommensations: assert len(recommender3.recommendations) == 6 assert recommender3.estimated_u_value == 1.9 assert np.isclose(recommender3.recommendations[0]["new_u_value"], 0.2) - assert np.isclose(recommender3.recommendations[0]["total"], 13117.46832) + assert np.isclose(recommender3.recommendations[0]["total"], 13554.717263999999) assert recommender3.recommendations[0]["parts"][0]["type"] == "external_wall_insulation" assert recommender3.recommendations[0]["parts"][0]["depth"] == 100.0 assert np.isclose(recommender3.recommendations[1]["new_u_value"], 0.23) - assert np.isclose(recommender3.recommendations[1]["total"], 34070.50944) + assert np.isclose(recommender3.recommendations[1]["total"], 35206.19308800001) assert recommender3.recommendations[1]["parts"][0]["type"] == "external_wall_insulation" assert recommender3.recommendations[1]["parts"][0]["depth"] == 150.0 @@ -420,12 +420,12 @@ class TestCavityWallRecommensations: assert len(recommender4.recommendations) == 6 assert recommender4.estimated_u_value == 2.3 assert np.isclose(recommender4.recommendations[0]["new_u_value"], 0.21) - assert np.isclose(recommender4.recommendations[0]["total"], 28562.514352) + assert np.isclose(recommender4.recommendations[0]["total"], 29547.42864) assert recommender4.recommendations[0]["parts"][0]["type"] == "external_wall_insulation" assert recommender4.recommendations[0]["parts"][0]["depth"] == 100 assert np.isclose(recommender4.recommendations[1]["new_u_value"], 0.23) - assert np.isclose(recommender4.recommendations[1]["total"], 74186.52678400002) + assert np.isclose(recommender4.recommendations[1]["total"], 76744.68288000001) assert recommender4.recommendations[1]["parts"][0]["type"] == "external_wall_insulation" assert recommender4.recommendations[1]["parts"][0]["depth"] == 150 @@ -462,12 +462,12 @@ class TestCavityWallRecommensations: assert len(recommender5.recommendations) == 5 assert recommender5.estimated_u_value == 0.8 assert np.isclose(recommender5.recommendations[0]["new_u_value"], 0.29) - assert np.isclose(recommender5.recommendations[0]["total"], 8665.040384000002) + assert np.isclose(recommender5.recommendations[0]["total"], 8963.834880000002) assert recommender5.recommendations[0]["parts"][0]["type"] == "external_wall_insulation" assert recommender5.recommendations[0]["parts"][0]["depth"] == 50 assert np.isclose(recommender5.recommendations[3]["new_u_value"], 0.26) - assert np.isclose(recommender5.recommendations[3]["total"], 20078.742992) + assert np.isclose(recommender5.recommendations[3]["total"], 20771.11344) assert recommender5.recommendations[3]["parts"][0]["type"] == "internal_wall_insulation" assert recommender5.recommendations[3]["parts"][0]["depth"] == 100 @@ -504,16 +504,16 @@ class TestCavityWallRecommensations: assert len(recommender6.recommendations) == 9 assert recommender6.estimated_u_value == 1 assert np.isclose(recommender6.recommendations[0]["new_u_value"], 0.19) - assert np.isclose(recommender6.recommendations[0]["total"], 44829.0584) + assert np.isclose(recommender6.recommendations[0]["total"], 46374.888000000006) assert recommender6.recommendations[0]["parts"][0]["type"] == "external_wall_insulation" assert recommender6.recommendations[0]["parts"][0]["depth"] == 100 assert np.isclose(recommender6.recommendations[2]["new_u_value"], 0.21) - assert np.isclose(recommender6.recommendations[2]["total"], 116436.25280000002) + assert np.isclose(recommender6.recommendations[2]["total"], 120451.29600000002) assert recommender6.recommendations[2]["parts"][0]["type"] == "external_wall_insulation" assert recommender6.recommendations[2]["parts"][0]["depth"] == 150 assert np.isclose(recommender6.recommendations[4]["new_u_value"], 0.28) - assert np.isclose(recommender6.recommendations[4]["total"], 91267.0136) + assert np.isclose(recommender6.recommendations[4]["total"], 94414.15199999999) assert recommender6.recommendations[4]["parts"][0]["type"] == "internal_wall_insulation" assert recommender6.recommendations[4]["parts"][0]["depth"] == 100