From 1bb188a8b8107b8adebd7e5163631c232a0e85c2 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 10 Jan 2024 16:14:01 +0000 Subject: [PATCH] working on eligibility pipeline --- backend/Property.py | 4 +- etl/eligibility/ha_15_32/ha16_app.py | 74 +++++++++++++++---------- etl/eligibility/ha_15_32/ha24_app.py | 17 +++++- recommendations/recommendation_utils.py | 24 ++++++++ 4 files changed, 87 insertions(+), 32 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index 5713c179..03fc507e 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -336,7 +336,9 @@ class Property(Definitions): self.construction_age_band = 'England and Wales: 2012 onwards' if self.age_band is None: - raise ValueError("age_band is missing") + logger.info("Age band is missing - filling with national average") + self.age_band = "C" + self.construction_age_band = "England and Wales: 1930-1949" def set_spatial(self, spatial: pd.DataFrame): """ diff --git a/etl/eligibility/ha_15_32/ha16_app.py b/etl/eligibility/ha_15_32/ha16_app.py index 446c35c9..f2b80542 100644 --- a/etl/eligibility/ha_15_32/ha16_app.py +++ b/etl/eligibility/ha_15_32/ha16_app.py @@ -17,6 +17,7 @@ from etl.epc.DataProcessor import DataProcessor from etl.epc.settings import COLUMNS_TO_MERGE_ON from backend.ml_models.api import ModelApi from etl.solar.SolarPhotoSupply import SolarPhotoSupply +from recommendations.recommendation_utils import calculate_cavity_age import re @@ -252,31 +253,6 @@ def load_data(): return data, survey_list -def calculate_cavity_age(newest_epc, older_epcs, cleaned): - all_epcs = [newest_epc] + older_epcs - - df = [] - for x in all_epcs: - # Get the cleaned mapping - mapped = [y for y in cleaned["walls-description"] if y["original_description"] == x["walls-description"]] - if not mapped: - continue - df.append( - { - **mapped[0], - "inspection-date": x["lodgement-date"], - } - ) - - df = pd.DataFrame(df) - df = df[ - (df["is_cavity_wall"] == True) & (df["is_filled_cavity"] == True) - ] - - cavity_age = (datetime.now() - pd.to_datetime(df["inspection-date"].max())).days - return cavity_age - - def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds): scoring_data = [] results = [] @@ -508,10 +484,48 @@ def analyse_results(results_df, data, survey_list): how="left", on="survey_key" ) - all_identified_eco = analysis_data[ - (analysis_data["warmfront_identified"] & analysis_data["funding_scheme"].isin( - ["ECO4 A/W", "AFFORDABLE WARMTH"])) | - (analysis_data["eco4_eligible"]) + from recommendation_utils import convert_thickness_to_numeric + + analysis_data["roof_insulation_thickness"] = analysis_data["roof_insulation_thickness"].fillna(None) + analysis_data["roof_insulation_thickness"] = np.where( + pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"] + ) + analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply( + lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True) + ) + + warmfront_sold_eco4 = analysis_data[ + (analysis_data["warmfront_identified"] == True) & ( + analysis_data["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"])) + ] + + warmfront_sold_gbis = analysis_data[ + (analysis_data["warmfront_identified"] == True) & ( + analysis_data["funding_scheme"].isin(["ECO4 GBIS (ECO+)"])) + ] + # 1407 + + ideal_eco4_warmfront_not_sold = analysis_data[ + (analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & ( + analysis_data["roof_insulation_thickness_numeric"] <= 100) + ] + + secondary_eco4_warmfront_not_sold = analysis_data[ + (analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & ( + analysis_data["roof_insulation_thickness_numeric"] > 100) + ] + + # underperforming cavities + underperforming_cavities = analysis_data[ + (analysis_data["eco4_message"] == "Failed due to full cavity - check cavity age") & ( + analysis_data["cavity_age"] > 10 * 365 + ) + ] + + identified_gbis_not_sold = analysis_data[ + (analysis_data["gbis_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & ( + analysis_data["eco4_eligible"] == False + ) ] eco_eligible = analysis_data[analysis_data["eco4_eligible"] == True] @@ -618,7 +632,7 @@ def app(): # Store # Old file was ha16.pickle # import pickle - # with open("ha16_8_jan_2.pickle", "wb") as f: + # with open("ha16_10_jan.pickle", "wb") as f: # pickle.dump( # { # "scoring_data": scoring_data, diff --git a/etl/eligibility/ha_15_32/ha24_app.py b/etl/eligibility/ha_15_32/ha24_app.py index 0f82f30a..49a5abb1 100644 --- a/etl/eligibility/ha_15_32/ha24_app.py +++ b/etl/eligibility/ha_15_32/ha24_app.py @@ -16,6 +16,7 @@ from etl.epc.DataProcessor import DataProcessor from etl.epc.settings import COLUMNS_TO_MERGE_ON from backend.ml_models.api import ModelApi from etl.solar.SolarPhotoSupply import SolarPhotoSupply +from recommendations.recommendation_utils import calculate_cavity_age EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env" @@ -231,6 +232,17 @@ def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, if penultimate_epc.get("estimated") is None: older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]] + # Loft MUST be suitable + cavity_age = None + if ( + eligibility.walls["is_cavity_wall"] and + eligibility.walls["is_filled_cavity"] and + eligibility.loft["suitability"] and + eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age" + ): + # We check the age of the cavity and if it's particularly old, we flag it + cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned) + # Full checks eligibility.check_gbis() eligibility.check_eco4() @@ -274,6 +286,9 @@ def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, "heating": eligibility.epc["mainheat-description"], "tenure": eligibility.tenure, "date_epc": eligibility.epc["lodgement-date"], + "cavity_age": cavity_age, + **eligibility.walls, + **eligibility.roof, } ) @@ -454,7 +469,7 @@ def app(): # Pickle results just in case # import pickle - # with open("ha24_8_jan.pickle", "wb") as f: + # with open("ha24_10_jan.pickle", "wb") as f: # pickle.dump( # { # "scoring_data": scoring_data, diff --git a/recommendations/recommendation_utils.py b/recommendations/recommendation_utils.py index 175eb641..64880aca 100644 --- a/recommendations/recommendation_utils.py +++ b/recommendations/recommendation_utils.py @@ -1,4 +1,5 @@ import math +from datetime import datetime from copy import deepcopy import numpy as np @@ -713,3 +714,26 @@ def estimate_windows( raise ValueError("Window count cannot be negative.") return window_count + + +def calculate_cavity_age(newest_epc, older_epcs, cleaned): + all_epcs = [newest_epc] + older_epcs + + df = [] + for x in all_epcs: + # Get the cleaned mapping + mapped = [y for y in cleaned["walls-description"] if y["original_description"] == x["walls-description"]] + if not mapped: + continue + df.append( + { + **mapped[0], + "inspection-date": x["lodgement-date"], + } + ) + + df = pd.DataFrame(df) + df = df[df["is_cavity_wall"] & df["is_filled_cavity"]] + + cavity_age = (datetime.now() - pd.to_datetime(df["inspection-date"].max())).days + return cavity_age