mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
eligibiltiy pipeline wip
This commit is contained in:
parent
e1c066ea8d
commit
b41fa37072
4 changed files with 357 additions and 120 deletions
|
|
@ -15,7 +15,7 @@ class PropertyValuation:
|
|||
100071264896: 128000,
|
||||
# Based on next door neighbour: https://themovemarket.com/tools/propertyprices/flat-2-queens-wood-house-219
|
||||
# -brandwood-road-birmingham-b14-6pu
|
||||
100070533688: 218000, # Based on Zoopla's estimation of 95 Tenby Road, which is also end terrace
|
||||
100070533688: 218000, # Based on Zoopla's estimation of 95 Tenby Road, which is also mid terrace
|
||||
100070505235: 344000, # Based on Zoopla's estimation of 131 School road, which is also semi-detached
|
||||
100070513306: 182000, # Based on Zoopla's estimation of 61 Simmons Drive
|
||||
100071306896: 77000, # Based on Flat 2 of 44 Wedgewood Road on Zoopla
|
||||
|
|
|
|||
232
etl/eligibility/Eligibility.py
Normal file
232
etl/eligibility/Eligibility.py
Normal file
|
|
@ -0,0 +1,232 @@
|
|||
from recommendations.recommendation_utils import convert_thickness_to_numeric
|
||||
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
|
||||
from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes
|
||||
|
||||
|
||||
class Eligibility:
|
||||
"""
|
||||
Given the epc data about a property, this class holds the logic for determining if the home
|
||||
is eligible for a specific retrofit measure.
|
||||
|
||||
For example, this could be whether the loft has insulation below a standardised threshold, or
|
||||
if it has an empty cavity
|
||||
|
||||
Further to this, this class is responsible for determining if the property is suitable for specific funding
|
||||
schemes
|
||||
"""
|
||||
|
||||
loft = None
|
||||
cavity = None
|
||||
|
||||
# schemes
|
||||
gbis = None
|
||||
eco4 = None
|
||||
|
||||
# If the loft has less than 100mm of insulation, we classify the home has needing loft insulation
|
||||
LOFT_INSULATION_THRESHOLD = 100
|
||||
|
||||
def __init__(self, epc, cleaned):
|
||||
self.epc = epc
|
||||
self.cleaned = cleaned
|
||||
|
||||
self.walls = self.parse_fabric("walls-description")
|
||||
self.roof = self.parse_fabric("roof-description")
|
||||
|
||||
self.loft_insulation()
|
||||
self.cavity_insulation()
|
||||
|
||||
def parse_fabric(self, key):
|
||||
|
||||
if "thermal transmittance" in self.epc[key]:
|
||||
if key == "walls-description":
|
||||
return WallAttributes(self.epc["walls-description"]).process()
|
||||
|
||||
if key == "roof-description":
|
||||
return RoofAttributes(self.epc["roof-description"]).process()
|
||||
|
||||
raise ValueError("Invalid Key")
|
||||
|
||||
# Get the cleaned version of the description
|
||||
return [
|
||||
data for data in self.cleaned[key] if
|
||||
data["original_description"] == self.epc[key]
|
||||
][0]
|
||||
|
||||
def loft_insulation(self, loft_thickness_threshold: int = None):
|
||||
"""
|
||||
Given the description of roof, this function determines whether or not the property is suitable for loft
|
||||
insulation. A loft existing insulation with a thickness below loft_thickness_threshold, is deemed to
|
||||
be suitable for loft insulation
|
||||
:param loft_thickness_threshold: Integer, Optional. If provided, any loft found with insulation lower than
|
||||
this thickness is deemed to be suitable for loft insulation. If this
|
||||
parameter is not provided, this method will default to the variable specified
|
||||
in LOFT_INSULATION_THRESHOLD
|
||||
"""
|
||||
|
||||
loft_thickness_threshold = (
|
||||
self.LOFT_INSULATION_THRESHOLD if loft_thickness_threshold is None else loft_thickness_threshold
|
||||
)
|
||||
|
||||
# We firstly check if the roof is a loft
|
||||
is_loft = self.roof["is_pitched"] and (not self.roof["is_roof_room"])
|
||||
|
||||
if not is_loft:
|
||||
self.loft = {
|
||||
"suitablility": False,
|
||||
"thickness": None,
|
||||
"reason": "roof not loft"
|
||||
}
|
||||
return
|
||||
|
||||
# If it is a loft, we'll convert the textual thickenss to a numerical value we can easily use
|
||||
insulation_thickness = convert_thickness_to_numeric(
|
||||
string_thickness=self.roof["insulation_thickness"],
|
||||
is_pitched=self.roof["is_pitched"],
|
||||
is_flat=self.roof["is_flat"]
|
||||
)
|
||||
|
||||
if insulation_thickness > loft_thickness_threshold:
|
||||
# Insulation is already thick enough
|
||||
self.loft = {
|
||||
"suitablility": False,
|
||||
"thickness": insulation_thickness,
|
||||
"reason": "existing insulation"
|
||||
}
|
||||
return
|
||||
|
||||
self.loft = {
|
||||
"suitablility": True,
|
||||
"thickness": insulation_thickness,
|
||||
"reason": None
|
||||
}
|
||||
|
||||
def cavity_insulation(self):
|
||||
|
||||
"""
|
||||
Given the description of the walls, this function determines if the property is suitable for cavity wall
|
||||
insulation
|
||||
:return:
|
||||
"""
|
||||
|
||||
is_cavity = self.walls["is_cavity_wall"]
|
||||
is_empty = (not self.walls["is_filled_cavity"]) or (
|
||||
self.walls["is_as_built"] and self.walls["insulation_thickness"] not in ["average", "above average"]
|
||||
)
|
||||
is_partial_filled = (
|
||||
self.walls["is_as_built"] and self.walls["insulation_thickness"] not in ["below average"]
|
||||
)
|
||||
|
||||
is_unfilled_cavity = is_cavity and is_empty
|
||||
is_partial_filled_cavity = is_cavity and is_partial_filled
|
||||
|
||||
if is_unfilled_cavity:
|
||||
self.cavity = {
|
||||
"suitablility": True,
|
||||
"type": "empty",
|
||||
}
|
||||
return
|
||||
|
||||
if is_partial_filled_cavity:
|
||||
self.cavity = {
|
||||
"suitability": True,
|
||||
"type": "partial"
|
||||
}
|
||||
return
|
||||
|
||||
self.cavity = {
|
||||
"suitability": False,
|
||||
"type": "full"
|
||||
}
|
||||
|
||||
def check_gbis(self):
|
||||
"""
|
||||
The Eligibility criteria for the Great British Insulation Scheme (GBIS) can be found here:
|
||||
https://www.ofgem.gov.uk/environmental-and-social-schemes/great-british-insulation-scheme/homeowners-and-tenants
|
||||
|
||||
At a high level, the criteria is the following:
|
||||
- The home must be within council tax bands A-D in England, A-E in Scotland, A-E in Wales
|
||||
- It must have an EPC rating of D or below
|
||||
|
||||
For the moment, we won't check whether a property is in the correct council tax band. There is likely
|
||||
to be public data for this since there is a govenment website which allows you to search for properties:
|
||||
https://www.gov.uk/council-tax-bands
|
||||
This data is possibly contained on the council tax valuation list but it remains to be see (seems unlikely)
|
||||
whether or not the data is openly accessible
|
||||
https://www.gov.uk/government/statistics/quality-assurance-of-administrative-data-in-the-uk-house-price-index
|
||||
/valuation-office-agency-council-tax-valuation-lists
|
||||
|
||||
Currently, we tailor this module to the Warmfront Team and their delivery capabilities (both practically and
|
||||
commercially). Therefore, we will check:
|
||||
1) Whether the property is an EPC D or below
|
||||
2) Whether the property is suitible for cavity wall insulation
|
||||
|
||||
However, GBIS applies to many insulation measures, which can be seen in the ofgem document
|
||||
|
||||
GBIS does not have any minimum upgrade requirement so we don't need to simulate the post retrofit sap score
|
||||
using the machine learning model
|
||||
"""
|
||||
|
||||
# Check if the property is suitable for cavity wall
|
||||
self.cavity_insulation()
|
||||
self.loft_insulation()
|
||||
|
||||
self.gbis = (self.cavity["suitablility"] or self.loft["suitibility"]) and (
|
||||
int(self.epc["current-energy-efficiency"]) <= 68
|
||||
)
|
||||
|
||||
def check_eco4(self, post_retrofit_sap=None):
|
||||
"""
|
||||
This funciton will check if the property is eligible for funding under the ECO4 scheme
|
||||
|
||||
For the moment, this function will consider just measures that can be implemented by the
|
||||
Warmfront team, therefore we will only check if a property has an uninsulated loft AND uninsulated
|
||||
cavity
|
||||
|
||||
We use Ofgem's V1.1 ECO 4 guidance document for the conditions under which a property is elligible
|
||||
This document can be found here:
|
||||
https://www.ofgem.gov.uk/sites/default/files/2023-02/ECO4%20Delivery%20Guidance%20v1.1%20%281%29.pdf
|
||||
|
||||
The conditions (to be reviewed) to be eligible for retrofit, under ECO4, are the following:
|
||||
1) The property is a social home (This is assumed prior to this function as this code will often
|
||||
be run on property lists provided by a HA
|
||||
2) The property is an EPC E or below
|
||||
3) The property has an unfilled cavity and uninsulated loft
|
||||
4) After retrofit, the property will hit an EPC C
|
||||
|
||||
Note: This criteria will likely be adjusted depending on the properties that can be served right now
|
||||
|
||||
If the post_retrofit_sap is provided, then is this value is 69 or higher, the property will be deemed
|
||||
to be eligible for ECO4 funding. If the post_retrofit_sap is not provided, the property will be
|
||||
deemed to be eligible, conditional to the post_retrofit_sap score check
|
||||
:param post_retrofit_sap:
|
||||
:return:
|
||||
"""
|
||||
|
||||
current_sap = int(self.epc["current-energy-efficiency"])
|
||||
if current_sap > 54:
|
||||
self.eco4 = {
|
||||
"eligible": False,
|
||||
"message": "sap too high"
|
||||
}
|
||||
return
|
||||
|
||||
self.cavity_insulation()
|
||||
self.loft_insulation()
|
||||
|
||||
# make sure conditions 2 and 3 are true
|
||||
is_eligible = self.cavity["suitablility"] & self.loft["suitablility"]
|
||||
|
||||
if post_retrofit_sap is None:
|
||||
self.eco4 = {
|
||||
"eligible": is_eligible,
|
||||
"message": "subject to post retrofit sap"
|
||||
}
|
||||
return
|
||||
|
||||
is_eligible = is_eligible & (post_retrofit_sap >= 69)
|
||||
|
||||
self.eco4 = {
|
||||
"eligible": is_eligible,
|
||||
"message": None
|
||||
}
|
||||
return
|
||||
|
|
@ -1,83 +0,0 @@
|
|||
from recommendations.recommendation_utils import convert_thickness_to_numeric
|
||||
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
|
||||
from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes
|
||||
|
||||
|
||||
class MeasureSuitibility:
|
||||
"""
|
||||
Given the epc data about a property, this class holds the logic for determining if the home
|
||||
is eligible for a specific retrofit measure.
|
||||
|
||||
For example, this could be whether the loft has insulation below a standardised threshold, or
|
||||
if it has an empty cavity
|
||||
"""
|
||||
|
||||
# If the loft has less than 100mm of insulation, we classify the home has needing loft insulation
|
||||
LOFT_INSULATION_THRESHOLD = 100
|
||||
|
||||
def __init__(self, epc, cleaned):
|
||||
self.epc = epc
|
||||
self.cleaned = cleaned
|
||||
|
||||
self.walls = self.parse_fabric("walls-description")
|
||||
self.roof = self.parse_fabric("roof-description")
|
||||
|
||||
def parse_fabric(self, key):
|
||||
|
||||
if "thermal transmittance" in self.epc[key]:
|
||||
if key == "walls-description":
|
||||
return WallAttributes(self.epc["walls-description"]).process()
|
||||
|
||||
if key == "roof-description":
|
||||
return RoofAttributes(self.epc["roof-description"]).process()
|
||||
|
||||
raise ValueError("Invalid Key")
|
||||
|
||||
# Get the cleaned version of the description
|
||||
return [
|
||||
data for data in self.cleaned[key] if
|
||||
data["original_description"] == self.epc[key]
|
||||
][0]
|
||||
|
||||
def loft_insulation(self, loft_thickness_threshold: int = None):
|
||||
"""
|
||||
Given the description of roof, this function determines whether or not the property is suitable for loft
|
||||
insulation. A loft existing insulation with a thickness below loft_thickness_threshold, is deemed to
|
||||
be suitable for loft insulation
|
||||
:param loft_thickness_threshold: Integer, Optional. If provided, any loft found with insulation lower than
|
||||
this thickness is deemed to be suitable for loft insulation. If this
|
||||
parameter is not provided, this method will default to the variable specified
|
||||
in LOFT_INSULATION_THRESHOLD
|
||||
"""
|
||||
|
||||
loft_thickness_threshold = (
|
||||
self.LOFT_INSULATION_THRESHOLD if loft_thickness_threshold is None else loft_thickness_threshold
|
||||
)
|
||||
|
||||
# We firstly check if the roof is a loft
|
||||
is_loft = self.roof["is_pitched"] and (not self.roof["is_roof_room"])
|
||||
|
||||
if not is_loft:
|
||||
return {
|
||||
"suitablility": False,
|
||||
"thickness": None
|
||||
}
|
||||
|
||||
# If it is a loft, we'll convert the textual thickenss to a numerical value we can easily use
|
||||
insulation_thickness = convert_thickness_to_numeric(
|
||||
string_thickness=self.roof["insulation_thickness"],
|
||||
is_pitched=self.roof["is_pitched"],
|
||||
is_flat=self.roof["is_flat"]
|
||||
)
|
||||
|
||||
if insulation_thickness > loft_thickness_threshold:
|
||||
# Insulation is already thick enough
|
||||
return {
|
||||
"suitablility": False,
|
||||
"thickness": insulation_thickness
|
||||
}
|
||||
|
||||
return {
|
||||
"suitablility": True,
|
||||
"thickness": insulation_thickness
|
||||
}
|
||||
|
|
@ -8,14 +8,16 @@ from tqdm import tqdm
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
import msgpack
|
||||
from datetime import datetime
|
||||
from utils.logger import setup_logger
|
||||
from utils.s3 import read_from_s3
|
||||
from dotenv import load_dotenv
|
||||
from backend.SearchEpc import SearchEpc
|
||||
from backend.Property import Property
|
||||
from etl.eligibility.MeasureSuitibility import MeasureSuitibility
|
||||
from etl.eligibility.Eligibility import Eligibility
|
||||
from etl.epc.DataProcessor import DataProcessor
|
||||
from backend.app.utils import read_parquet_from_s3
|
||||
from backend.app.plan.utils import create_recommendation_scoring_data
|
||||
|
||||
ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
|
||||
|
||||
|
|
@ -323,6 +325,79 @@ def merge_ha_15(asset_list, identified_addresses):
|
|||
return merged_data, dropped_identified_merge_keys
|
||||
|
||||
|
||||
def prepare_model_data_row(property_id, modelling_epc, cleaned, cleaning_data, created_at):
|
||||
"""
|
||||
This function prepares the data for modelling, in the same fashion as the recommendation engine
|
||||
With up-coming refactoring, this will change
|
||||
:param modelling_epc:
|
||||
:return:
|
||||
"""
|
||||
p = Property(
|
||||
id=property_id,
|
||||
postcode=modelling_epc["postcode"],
|
||||
address1=modelling_epc["address1"],
|
||||
epc_client=None,
|
||||
data=modelling_epc
|
||||
)
|
||||
|
||||
p.get_components(cleaned)
|
||||
# This is temp - this should happen after scoring
|
||||
cleaned_property_data = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=pd.DataFrame([dict(**p.get_model_data(), LOCAL_AUTHORITY=p.data["local-authority"])]),
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
|
||||
)
|
||||
p.set_number_lighting_outlets(cleaned_property_data)
|
||||
|
||||
data_processor = DataProcessor(None, newdata=True)
|
||||
data_processor.insert_data(pd.DataFrame([p.get_model_data()]))
|
||||
|
||||
data_processor.pre_process()
|
||||
|
||||
starting_epc_data = data_processor.get_component_features(suffix="_STARTING")
|
||||
ending_epc_data = data_processor.get_component_features(suffix="_ENDING")
|
||||
fixed_data = data_processor.get_fixed_features()
|
||||
|
||||
# We update the ending record with the recommended updates and we set lodgement date to today
|
||||
ending_epc_data["DAYS_TO_ENDING"] = data_processor.calculate_days_to(created_at)
|
||||
|
||||
# We simulate the impact of the retrofit using expected performance of the wall and roof,
|
||||
# after retrofit. We use the minimal u-values required to meet building regulations part L
|
||||
# TODO: Check the performance of the materials warmfront's installers use, particularly for
|
||||
# cavity
|
||||
simulation_recommendations = [
|
||||
{
|
||||
"recommendation_id": "-".join([property_id, "cavity"]),
|
||||
"type": "cavity_wall_insulation",
|
||||
"new_u_value": 0.55,
|
||||
"parts": [{}]
|
||||
},
|
||||
{
|
||||
"recommendation_id": "-".join([property_id, "loft"]),
|
||||
"type": "loft_insulation",
|
||||
"new_u_value": 0.16,
|
||||
"parts": [{"depth": 270}]
|
||||
}
|
||||
]
|
||||
|
||||
scoring_dict = {}
|
||||
for recommendation in simulation_recommendations:
|
||||
scoring_dict = create_recommendation_scoring_data(
|
||||
property=p,
|
||||
recommendation=recommendation,
|
||||
starting_epc_data=starting_epc_data,
|
||||
ending_epc_data=ending_epc_data,
|
||||
fixed_data=fixed_data,
|
||||
)
|
||||
# At each iteration, we want to update the ending_epc_data, so in the end, ending_epc_data contains
|
||||
# all of the updates
|
||||
for k in scoring_dict.keys():
|
||||
if k in ending_epc_data.columns:
|
||||
ending_epc_data[k] = scoring_dict[k]
|
||||
|
||||
return scoring_dict
|
||||
|
||||
|
||||
def app():
|
||||
ha32_asset_list, ha15_asset_list, ha32_identified_addresses, ha15_identified_addresses = load_data()
|
||||
|
||||
|
|
@ -342,14 +417,19 @@ def app():
|
|||
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
|
||||
)
|
||||
|
||||
created_at = datetime.now().isoformat()
|
||||
|
||||
# We want to retrieve EPCs for every single property
|
||||
# NOTE: HA32 is MOSTLY cavity via GBIS
|
||||
ha_data = ha32
|
||||
house_number_key = "Dwelling num"
|
||||
address_key = "Street"
|
||||
postcode_key = "Postcode"
|
||||
|
||||
def get_data(ha_data, house_number_key, address_key, postcode_key):
|
||||
ha_scoring_data = []
|
||||
|
||||
scoring_data = []
|
||||
results = []
|
||||
for _, house in tqdm(ha_data.iterrows(), total=len(ha_data)):
|
||||
searcher = SearchEpc(
|
||||
address1=" ".join([house[house_number_key], house[address_key]]),
|
||||
|
|
@ -364,42 +444,50 @@ def app():
|
|||
if not penultimate_epc:
|
||||
penultimate_epc = newest_epc
|
||||
|
||||
suitability = MeasureSuitibility(
|
||||
epc=newest_epc, cleaned=cleaned
|
||||
)
|
||||
suitable = suitability.loft_insulation()
|
||||
eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
|
||||
eligibility.check_gbis()
|
||||
eligibility.check_eco4()
|
||||
|
||||
modelling_epc = newest_epc.copy()
|
||||
if not suitable["suitablility"]:
|
||||
# if unsuccessful with newest EPC, try penultimate
|
||||
suitability = MeasureSuitibility(
|
||||
epc=penultimate_epc, cleaned=cleaned
|
||||
# If there is no eligibility, we need to check the penultimate epc
|
||||
if (not eligibility.eco4["eligible"]) and (not eligibility.gbis):
|
||||
eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
|
||||
eligibility.check_gbis()
|
||||
eligibility.check_eco4()
|
||||
|
||||
if eligibility.eco4["eligible"]:
|
||||
# TODO: Check me
|
||||
scoring_dictionary = prepare_model_data_row(
|
||||
property_id=house["row_id"],
|
||||
modelling_epc=eligibility.epc,
|
||||
cleaned=cleaned,
|
||||
cleaning_data=cleaning_data,
|
||||
created_at=created_at
|
||||
)
|
||||
suitable = suitability.loft_insulation()
|
||||
modelling_epc = penultimate_epc.copy()
|
||||
scoring_data.append(scoring_dictionary)
|
||||
results.append(
|
||||
{
|
||||
"row_id": house["row_id"],
|
||||
"warmfront_identified": house["identified"],
|
||||
"gbis_eligible": eligibility.gbis,
|
||||
"eco4_eligible": eligibility.eco4["eligible"],
|
||||
"date_epc": eligibility.epc["lodgement-date"],
|
||||
"eco4_note": "conditional on post sap"
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
if not suitable["suitablility"]:
|
||||
raise ValueError("DO SOMETHING")
|
||||
if (house["identified"] and not eligibility.gbis) and (
|
||||
house["identified"] and not eligibility.eco4["eligible"]):
|
||||
raise NotImplementedError("Investigate ms")
|
||||
|
||||
p = Property(
|
||||
id=house["row_id"],
|
||||
postcode=modelling_epc["postcode"],
|
||||
address1=modelling_epc["address1"],
|
||||
epc_client=None,
|
||||
data=modelling_epc
|
||||
# If nothing is eligible or gbis is eligible, then we make a record this
|
||||
results.append(
|
||||
{
|
||||
"row_id": house["row_id"],
|
||||
"warmfront_identified": house["identified"],
|
||||
"gbis_eligible": eligibility.gbis,
|
||||
"eco4_eligible": eligibility.eco4["eligible"],
|
||||
"date_epc": eligibility.epc["lodgement-date"],
|
||||
"eco4_note": None
|
||||
}
|
||||
)
|
||||
################################################################################
|
||||
# Prepare the data for modelling, in the same fasion as the engine
|
||||
################################################################################
|
||||
|
||||
p.get_components(cleaned)
|
||||
# This is temp - this should happen after scoring
|
||||
cleaned_property_data = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=pd.DataFrame([dict(**p.get_model_data(), LOCAL_AUTHORITY=p.data["local-authority"])]),
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
|
||||
)
|
||||
p.set_number_lighting_outlets(cleaned_property_data)
|
||||
|
||||
from pprint import pprint
|
||||
len(searcher.data["rows"])
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue