From b41fa37072f1ca5e1f3b71ec4700a9f017997b7b Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 11 Dec 2023 10:20:46 +0000 Subject: [PATCH] eligibiltiy pipeline wip --- backend/ml_models/Valuation.py | 2 +- etl/eligibility/Eligibility.py | 232 ++++++++++++++++++++++++++ etl/eligibility/MeasureSuitibility.py | 83 --------- etl/eligibility/ha_15_32/app.py | 160 ++++++++++++++---- 4 files changed, 357 insertions(+), 120 deletions(-) create mode 100644 etl/eligibility/Eligibility.py delete mode 100644 etl/eligibility/MeasureSuitibility.py diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py index 9e409b9f..522a7067 100644 --- a/backend/ml_models/Valuation.py +++ b/backend/ml_models/Valuation.py @@ -15,7 +15,7 @@ class PropertyValuation: 100071264896: 128000, # Based on next door neighbour: https://themovemarket.com/tools/propertyprices/flat-2-queens-wood-house-219 # -brandwood-road-birmingham-b14-6pu - 100070533688: 218000, # Based on Zoopla's estimation of 95 Tenby Road, which is also end terrace + 100070533688: 218000, # Based on Zoopla's estimation of 95 Tenby Road, which is also mid terrace 100070505235: 344000, # Based on Zoopla's estimation of 131 School road, which is also semi-detached 100070513306: 182000, # Based on Zoopla's estimation of 61 Simmons Drive 100071306896: 77000, # Based on Flat 2 of 44 Wedgewood Road on Zoopla diff --git a/etl/eligibility/Eligibility.py b/etl/eligibility/Eligibility.py new file mode 100644 index 00000000..18b4ecd1 --- /dev/null +++ b/etl/eligibility/Eligibility.py @@ -0,0 +1,232 @@ +from recommendations.recommendation_utils import convert_thickness_to_numeric +from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes +from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes + + +class Eligibility: + """ + Given the epc data about a property, this class holds the logic for determining if the home + is eligible for a specific retrofit measure. + + For example, this could be whether the loft has insulation below a standardised threshold, or + if it has an empty cavity + + Further to this, this class is responsible for determining if the property is suitable for specific funding + schemes + """ + + loft = None + cavity = None + + # schemes + gbis = None + eco4 = None + + # If the loft has less than 100mm of insulation, we classify the home has needing loft insulation + LOFT_INSULATION_THRESHOLD = 100 + + def __init__(self, epc, cleaned): + self.epc = epc + self.cleaned = cleaned + + self.walls = self.parse_fabric("walls-description") + self.roof = self.parse_fabric("roof-description") + + self.loft_insulation() + self.cavity_insulation() + + def parse_fabric(self, key): + + if "thermal transmittance" in self.epc[key]: + if key == "walls-description": + return WallAttributes(self.epc["walls-description"]).process() + + if key == "roof-description": + return RoofAttributes(self.epc["roof-description"]).process() + + raise ValueError("Invalid Key") + + # Get the cleaned version of the description + return [ + data for data in self.cleaned[key] if + data["original_description"] == self.epc[key] + ][0] + + def loft_insulation(self, loft_thickness_threshold: int = None): + """ + Given the description of roof, this function determines whether or not the property is suitable for loft + insulation. A loft existing insulation with a thickness below loft_thickness_threshold, is deemed to + be suitable for loft insulation + :param loft_thickness_threshold: Integer, Optional. If provided, any loft found with insulation lower than + this thickness is deemed to be suitable for loft insulation. If this + parameter is not provided, this method will default to the variable specified + in LOFT_INSULATION_THRESHOLD + """ + + loft_thickness_threshold = ( + self.LOFT_INSULATION_THRESHOLD if loft_thickness_threshold is None else loft_thickness_threshold + ) + + # We firstly check if the roof is a loft + is_loft = self.roof["is_pitched"] and (not self.roof["is_roof_room"]) + + if not is_loft: + self.loft = { + "suitablility": False, + "thickness": None, + "reason": "roof not loft" + } + return + + # If it is a loft, we'll convert the textual thickenss to a numerical value we can easily use + insulation_thickness = convert_thickness_to_numeric( + string_thickness=self.roof["insulation_thickness"], + is_pitched=self.roof["is_pitched"], + is_flat=self.roof["is_flat"] + ) + + if insulation_thickness > loft_thickness_threshold: + # Insulation is already thick enough + self.loft = { + "suitablility": False, + "thickness": insulation_thickness, + "reason": "existing insulation" + } + return + + self.loft = { + "suitablility": True, + "thickness": insulation_thickness, + "reason": None + } + + def cavity_insulation(self): + + """ + Given the description of the walls, this function determines if the property is suitable for cavity wall + insulation + :return: + """ + + is_cavity = self.walls["is_cavity_wall"] + is_empty = (not self.walls["is_filled_cavity"]) or ( + self.walls["is_as_built"] and self.walls["insulation_thickness"] not in ["average", "above average"] + ) + is_partial_filled = ( + self.walls["is_as_built"] and self.walls["insulation_thickness"] not in ["below average"] + ) + + is_unfilled_cavity = is_cavity and is_empty + is_partial_filled_cavity = is_cavity and is_partial_filled + + if is_unfilled_cavity: + self.cavity = { + "suitablility": True, + "type": "empty", + } + return + + if is_partial_filled_cavity: + self.cavity = { + "suitability": True, + "type": "partial" + } + return + + self.cavity = { + "suitability": False, + "type": "full" + } + + def check_gbis(self): + """ + The Eligibility criteria for the Great British Insulation Scheme (GBIS) can be found here: + https://www.ofgem.gov.uk/environmental-and-social-schemes/great-british-insulation-scheme/homeowners-and-tenants + + At a high level, the criteria is the following: + - The home must be within council tax bands A-D in England, A-E in Scotland, A-E in Wales + - It must have an EPC rating of D or below + + For the moment, we won't check whether a property is in the correct council tax band. There is likely + to be public data for this since there is a govenment website which allows you to search for properties: + https://www.gov.uk/council-tax-bands + This data is possibly contained on the council tax valuation list but it remains to be see (seems unlikely) + whether or not the data is openly accessible + https://www.gov.uk/government/statistics/quality-assurance-of-administrative-data-in-the-uk-house-price-index + /valuation-office-agency-council-tax-valuation-lists + + Currently, we tailor this module to the Warmfront Team and their delivery capabilities (both practically and + commercially). Therefore, we will check: + 1) Whether the property is an EPC D or below + 2) Whether the property is suitible for cavity wall insulation + + However, GBIS applies to many insulation measures, which can be seen in the ofgem document + + GBIS does not have any minimum upgrade requirement so we don't need to simulate the post retrofit sap score + using the machine learning model + """ + + # Check if the property is suitable for cavity wall + self.cavity_insulation() + self.loft_insulation() + + self.gbis = (self.cavity["suitablility"] or self.loft["suitibility"]) and ( + int(self.epc["current-energy-efficiency"]) <= 68 + ) + + def check_eco4(self, post_retrofit_sap=None): + """ + This funciton will check if the property is eligible for funding under the ECO4 scheme + + For the moment, this function will consider just measures that can be implemented by the + Warmfront team, therefore we will only check if a property has an uninsulated loft AND uninsulated + cavity + + We use Ofgem's V1.1 ECO 4 guidance document for the conditions under which a property is elligible + This document can be found here: + https://www.ofgem.gov.uk/sites/default/files/2023-02/ECO4%20Delivery%20Guidance%20v1.1%20%281%29.pdf + + The conditions (to be reviewed) to be eligible for retrofit, under ECO4, are the following: + 1) The property is a social home (This is assumed prior to this function as this code will often + be run on property lists provided by a HA + 2) The property is an EPC E or below + 3) The property has an unfilled cavity and uninsulated loft + 4) After retrofit, the property will hit an EPC C + + Note: This criteria will likely be adjusted depending on the properties that can be served right now + + If the post_retrofit_sap is provided, then is this value is 69 or higher, the property will be deemed + to be eligible for ECO4 funding. If the post_retrofit_sap is not provided, the property will be + deemed to be eligible, conditional to the post_retrofit_sap score check + :param post_retrofit_sap: + :return: + """ + + current_sap = int(self.epc["current-energy-efficiency"]) + if current_sap > 54: + self.eco4 = { + "eligible": False, + "message": "sap too high" + } + return + + self.cavity_insulation() + self.loft_insulation() + + # make sure conditions 2 and 3 are true + is_eligible = self.cavity["suitablility"] & self.loft["suitablility"] + + if post_retrofit_sap is None: + self.eco4 = { + "eligible": is_eligible, + "message": "subject to post retrofit sap" + } + return + + is_eligible = is_eligible & (post_retrofit_sap >= 69) + + self.eco4 = { + "eligible": is_eligible, + "message": None + } + return diff --git a/etl/eligibility/MeasureSuitibility.py b/etl/eligibility/MeasureSuitibility.py deleted file mode 100644 index 54deec4d..00000000 --- a/etl/eligibility/MeasureSuitibility.py +++ /dev/null @@ -1,83 +0,0 @@ -from recommendations.recommendation_utils import convert_thickness_to_numeric -from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes -from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes - - -class MeasureSuitibility: - """ - Given the epc data about a property, this class holds the logic for determining if the home - is eligible for a specific retrofit measure. - - For example, this could be whether the loft has insulation below a standardised threshold, or - if it has an empty cavity - """ - - # If the loft has less than 100mm of insulation, we classify the home has needing loft insulation - LOFT_INSULATION_THRESHOLD = 100 - - def __init__(self, epc, cleaned): - self.epc = epc - self.cleaned = cleaned - - self.walls = self.parse_fabric("walls-description") - self.roof = self.parse_fabric("roof-description") - - def parse_fabric(self, key): - - if "thermal transmittance" in self.epc[key]: - if key == "walls-description": - return WallAttributes(self.epc["walls-description"]).process() - - if key == "roof-description": - return RoofAttributes(self.epc["roof-description"]).process() - - raise ValueError("Invalid Key") - - # Get the cleaned version of the description - return [ - data for data in self.cleaned[key] if - data["original_description"] == self.epc[key] - ][0] - - def loft_insulation(self, loft_thickness_threshold: int = None): - """ - Given the description of roof, this function determines whether or not the property is suitable for loft - insulation. A loft existing insulation with a thickness below loft_thickness_threshold, is deemed to - be suitable for loft insulation - :param loft_thickness_threshold: Integer, Optional. If provided, any loft found with insulation lower than - this thickness is deemed to be suitable for loft insulation. If this - parameter is not provided, this method will default to the variable specified - in LOFT_INSULATION_THRESHOLD - """ - - loft_thickness_threshold = ( - self.LOFT_INSULATION_THRESHOLD if loft_thickness_threshold is None else loft_thickness_threshold - ) - - # We firstly check if the roof is a loft - is_loft = self.roof["is_pitched"] and (not self.roof["is_roof_room"]) - - if not is_loft: - return { - "suitablility": False, - "thickness": None - } - - # If it is a loft, we'll convert the textual thickenss to a numerical value we can easily use - insulation_thickness = convert_thickness_to_numeric( - string_thickness=self.roof["insulation_thickness"], - is_pitched=self.roof["is_pitched"], - is_flat=self.roof["is_flat"] - ) - - if insulation_thickness > loft_thickness_threshold: - # Insulation is already thick enough - return { - "suitablility": False, - "thickness": insulation_thickness - } - - return { - "suitablility": True, - "thickness": insulation_thickness - } diff --git a/etl/eligibility/ha_15_32/app.py b/etl/eligibility/ha_15_32/app.py index 47426d5d..2cdee129 100644 --- a/etl/eligibility/ha_15_32/app.py +++ b/etl/eligibility/ha_15_32/app.py @@ -8,14 +8,16 @@ from tqdm import tqdm import pandas as pd import numpy as np import msgpack +from datetime import datetime from utils.logger import setup_logger from utils.s3 import read_from_s3 from dotenv import load_dotenv from backend.SearchEpc import SearchEpc from backend.Property import Property -from etl.eligibility.MeasureSuitibility import MeasureSuitibility +from etl.eligibility.Eligibility import Eligibility from etl.epc.DataProcessor import DataProcessor from backend.app.utils import read_parquet_from_s3 +from backend.app.plan.utils import create_recommendation_scoring_data ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env" @@ -323,6 +325,79 @@ def merge_ha_15(asset_list, identified_addresses): return merged_data, dropped_identified_merge_keys +def prepare_model_data_row(property_id, modelling_epc, cleaned, cleaning_data, created_at): + """ + This function prepares the data for modelling, in the same fashion as the recommendation engine + With up-coming refactoring, this will change + :param modelling_epc: + :return: + """ + p = Property( + id=property_id, + postcode=modelling_epc["postcode"], + address1=modelling_epc["address1"], + epc_client=None, + data=modelling_epc + ) + + p.get_components(cleaned) + # This is temp - this should happen after scoring + cleaned_property_data = DataProcessor.apply_averages_cleaning( + data_to_clean=pd.DataFrame([dict(**p.get_model_data(), LOCAL_AUTHORITY=p.data["local-authority"])]), + cleaning_data=cleaning_data, + cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'], + ) + p.set_number_lighting_outlets(cleaned_property_data) + + data_processor = DataProcessor(None, newdata=True) + data_processor.insert_data(pd.DataFrame([p.get_model_data()])) + + data_processor.pre_process() + + starting_epc_data = data_processor.get_component_features(suffix="_STARTING") + ending_epc_data = data_processor.get_component_features(suffix="_ENDING") + fixed_data = data_processor.get_fixed_features() + + # We update the ending record with the recommended updates and we set lodgement date to today + ending_epc_data["DAYS_TO_ENDING"] = data_processor.calculate_days_to(created_at) + + # We simulate the impact of the retrofit using expected performance of the wall and roof, + # after retrofit. We use the minimal u-values required to meet building regulations part L + # TODO: Check the performance of the materials warmfront's installers use, particularly for + # cavity + simulation_recommendations = [ + { + "recommendation_id": "-".join([property_id, "cavity"]), + "type": "cavity_wall_insulation", + "new_u_value": 0.55, + "parts": [{}] + }, + { + "recommendation_id": "-".join([property_id, "loft"]), + "type": "loft_insulation", + "new_u_value": 0.16, + "parts": [{"depth": 270}] + } + ] + + scoring_dict = {} + for recommendation in simulation_recommendations: + scoring_dict = create_recommendation_scoring_data( + property=p, + recommendation=recommendation, + starting_epc_data=starting_epc_data, + ending_epc_data=ending_epc_data, + fixed_data=fixed_data, + ) + # At each iteration, we want to update the ending_epc_data, so in the end, ending_epc_data contains + # all of the updates + for k in scoring_dict.keys(): + if k in ending_epc_data.columns: + ending_epc_data[k] = scoring_dict[k] + + return scoring_dict + + def app(): ha32_asset_list, ha15_asset_list, ha32_identified_addresses, ha15_identified_addresses = load_data() @@ -342,14 +417,19 @@ def app(): bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet", ) + created_at = datetime.now().isoformat() + # We want to retrieve EPCs for every single property + # NOTE: HA32 is MOSTLY cavity via GBIS ha_data = ha32 house_number_key = "Dwelling num" address_key = "Street" postcode_key = "Postcode" def get_data(ha_data, house_number_key, address_key, postcode_key): - ha_scoring_data = [] + + scoring_data = [] + results = [] for _, house in tqdm(ha_data.iterrows(), total=len(ha_data)): searcher = SearchEpc( address1=" ".join([house[house_number_key], house[address_key]]), @@ -364,42 +444,50 @@ def app(): if not penultimate_epc: penultimate_epc = newest_epc - suitability = MeasureSuitibility( - epc=newest_epc, cleaned=cleaned - ) - suitable = suitability.loft_insulation() + eligibility = Eligibility(epc=newest_epc, cleaned=cleaned) + eligibility.check_gbis() + eligibility.check_eco4() - modelling_epc = newest_epc.copy() - if not suitable["suitablility"]: - # if unsuccessful with newest EPC, try penultimate - suitability = MeasureSuitibility( - epc=penultimate_epc, cleaned=cleaned + # If there is no eligibility, we need to check the penultimate epc + if (not eligibility.eco4["eligible"]) and (not eligibility.gbis): + eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned) + eligibility.check_gbis() + eligibility.check_eco4() + + if eligibility.eco4["eligible"]: + # TODO: Check me + scoring_dictionary = prepare_model_data_row( + property_id=house["row_id"], + modelling_epc=eligibility.epc, + cleaned=cleaned, + cleaning_data=cleaning_data, + created_at=created_at ) - suitable = suitability.loft_insulation() - modelling_epc = penultimate_epc.copy() + scoring_data.append(scoring_dictionary) + results.append( + { + "row_id": house["row_id"], + "warmfront_identified": house["identified"], + "gbis_eligible": eligibility.gbis, + "eco4_eligible": eligibility.eco4["eligible"], + "date_epc": eligibility.epc["lodgement-date"], + "eco4_note": "conditional on post sap" + } + ) + continue - if not suitable["suitablility"]: - raise ValueError("DO SOMETHING") + if (house["identified"] and not eligibility.gbis) and ( + house["identified"] and not eligibility.eco4["eligible"]): + raise NotImplementedError("Investigate ms") - p = Property( - id=house["row_id"], - postcode=modelling_epc["postcode"], - address1=modelling_epc["address1"], - epc_client=None, - data=modelling_epc + # If nothing is eligible or gbis is eligible, then we make a record this + results.append( + { + "row_id": house["row_id"], + "warmfront_identified": house["identified"], + "gbis_eligible": eligibility.gbis, + "eco4_eligible": eligibility.eco4["eligible"], + "date_epc": eligibility.epc["lodgement-date"], + "eco4_note": None + } ) - ################################################################################ - # Prepare the data for modelling, in the same fasion as the engine - ################################################################################ - - p.get_components(cleaned) - # This is temp - this should happen after scoring - cleaned_property_data = DataProcessor.apply_averages_cleaning( - data_to_clean=pd.DataFrame([dict(**p.get_model_data(), LOCAL_AUTHORITY=p.data["local-authority"])]), - cleaning_data=cleaning_data, - cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'], - ) - p.set_number_lighting_outlets(cleaned_property_data) - - from pprint import pprint - len(searcher.data["rows"])