working on eligibility pipeline

This commit is contained in:
Khalim Conn-Kowlessar 2024-01-10 16:14:01 +00:00
parent 04dba265de
commit 1bb188a8b8
4 changed files with 87 additions and 32 deletions

View file

@ -336,7 +336,9 @@ class Property(Definitions):
self.construction_age_band = 'England and Wales: 2012 onwards'
if self.age_band is None:
raise ValueError("age_band is missing")
logger.info("Age band is missing - filling with national average")
self.age_band = "C"
self.construction_age_band = "England and Wales: 1930-1949"
def set_spatial(self, spatial: pd.DataFrame):
"""

View file

@ -17,6 +17,7 @@ from etl.epc.DataProcessor import DataProcessor
from etl.epc.settings import COLUMNS_TO_MERGE_ON
from backend.ml_models.api import ModelApi
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
from recommendations.recommendation_utils import calculate_cavity_age
import re
@ -252,31 +253,6 @@ def load_data():
return data, survey_list
def calculate_cavity_age(newest_epc, older_epcs, cleaned):
all_epcs = [newest_epc] + older_epcs
df = []
for x in all_epcs:
# Get the cleaned mapping
mapped = [y for y in cleaned["walls-description"] if y["original_description"] == x["walls-description"]]
if not mapped:
continue
df.append(
{
**mapped[0],
"inspection-date": x["lodgement-date"],
}
)
df = pd.DataFrame(df)
df = df[
(df["is_cavity_wall"] == True) & (df["is_filled_cavity"] == True)
]
cavity_age = (datetime.now() - pd.to_datetime(df["inspection-date"].max())).days
return cavity_age
def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
scoring_data = []
results = []
@ -508,10 +484,48 @@ def analyse_results(results_df, data, survey_list):
how="left", on="survey_key"
)
all_identified_eco = analysis_data[
(analysis_data["warmfront_identified"] & analysis_data["funding_scheme"].isin(
["ECO4 A/W", "AFFORDABLE WARMTH"])) |
(analysis_data["eco4_eligible"])
from recommendation_utils import convert_thickness_to_numeric
analysis_data["roof_insulation_thickness"] = analysis_data["roof_insulation_thickness"].fillna(None)
analysis_data["roof_insulation_thickness"] = np.where(
pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
)
analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
)
warmfront_sold_eco4 = analysis_data[
(analysis_data["warmfront_identified"] == True) & (
analysis_data["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"]))
]
warmfront_sold_gbis = analysis_data[
(analysis_data["warmfront_identified"] == True) & (
analysis_data["funding_scheme"].isin(["ECO4 GBIS (ECO+)"]))
]
# 1407
ideal_eco4_warmfront_not_sold = analysis_data[
(analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
analysis_data["roof_insulation_thickness_numeric"] <= 100)
]
secondary_eco4_warmfront_not_sold = analysis_data[
(analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
analysis_data["roof_insulation_thickness_numeric"] > 100)
]
# underperforming cavities
underperforming_cavities = analysis_data[
(analysis_data["eco4_message"] == "Failed due to full cavity - check cavity age") & (
analysis_data["cavity_age"] > 10 * 365
)
]
identified_gbis_not_sold = analysis_data[
(analysis_data["gbis_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
analysis_data["eco4_eligible"] == False
)
]
eco_eligible = analysis_data[analysis_data["eco4_eligible"] == True]
@ -618,7 +632,7 @@ def app():
# Store
# Old file was ha16.pickle
# import pickle
# with open("ha16_8_jan_2.pickle", "wb") as f:
# with open("ha16_10_jan.pickle", "wb") as f:
# pickle.dump(
# {
# "scoring_data": scoring_data,

View file

@ -16,6 +16,7 @@ from etl.epc.DataProcessor import DataProcessor
from etl.epc.settings import COLUMNS_TO_MERGE_ON
from backend.ml_models.api import ModelApi
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
from recommendations.recommendation_utils import calculate_cavity_age
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
@ -231,6 +232,17 @@ def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup,
if penultimate_epc.get("estimated") is None:
older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
# Loft MUST be suitable
cavity_age = None
if (
eligibility.walls["is_cavity_wall"] and
eligibility.walls["is_filled_cavity"] and
eligibility.loft["suitability"] and
eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
):
# We check the age of the cavity and if it's particularly old, we flag it
cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
# Full checks
eligibility.check_gbis()
eligibility.check_eco4()
@ -274,6 +286,9 @@ def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup,
"heating": eligibility.epc["mainheat-description"],
"tenure": eligibility.tenure,
"date_epc": eligibility.epc["lodgement-date"],
"cavity_age": cavity_age,
**eligibility.walls,
**eligibility.roof,
}
)
@ -454,7 +469,7 @@ def app():
# Pickle results just in case
# import pickle
# with open("ha24_8_jan.pickle", "wb") as f:
# with open("ha24_10_jan.pickle", "wb") as f:
# pickle.dump(
# {
# "scoring_data": scoring_data,

View file

@ -1,4 +1,5 @@
import math
from datetime import datetime
from copy import deepcopy
import numpy as np
@ -713,3 +714,26 @@ def estimate_windows(
raise ValueError("Window count cannot be negative.")
return window_count
def calculate_cavity_age(newest_epc, older_epcs, cleaned):
all_epcs = [newest_epc] + older_epcs
df = []
for x in all_epcs:
# Get the cleaned mapping
mapped = [y for y in cleaned["walls-description"] if y["original_description"] == x["walls-description"]]
if not mapped:
continue
df.append(
{
**mapped[0],
"inspection-date": x["lodgement-date"],
}
)
df = pd.DataFrame(df)
df = df[df["is_cavity_wall"] & df["is_filled_cavity"]]
cavity_age = (datetime.now() - pd.to_datetime(df["inspection-date"].max())).days
return cavity_age