eligibiltiy pipeline wip

This commit is contained in:
Khalim Conn-Kowlessar 2023-12-11 10:20:46 +00:00
parent e1c066ea8d
commit b41fa37072
4 changed files with 357 additions and 120 deletions

View file

@ -15,7 +15,7 @@ class PropertyValuation:
100071264896: 128000, 100071264896: 128000,
# Based on next door neighbour: https://themovemarket.com/tools/propertyprices/flat-2-queens-wood-house-219 # Based on next door neighbour: https://themovemarket.com/tools/propertyprices/flat-2-queens-wood-house-219
# -brandwood-road-birmingham-b14-6pu # -brandwood-road-birmingham-b14-6pu
100070533688: 218000, # Based on Zoopla's estimation of 95 Tenby Road, which is also end terrace 100070533688: 218000, # Based on Zoopla's estimation of 95 Tenby Road, which is also mid terrace
100070505235: 344000, # Based on Zoopla's estimation of 131 School road, which is also semi-detached 100070505235: 344000, # Based on Zoopla's estimation of 131 School road, which is also semi-detached
100070513306: 182000, # Based on Zoopla's estimation of 61 Simmons Drive 100070513306: 182000, # Based on Zoopla's estimation of 61 Simmons Drive
100071306896: 77000, # Based on Flat 2 of 44 Wedgewood Road on Zoopla 100071306896: 77000, # Based on Flat 2 of 44 Wedgewood Road on Zoopla

View file

@ -0,0 +1,232 @@
from recommendations.recommendation_utils import convert_thickness_to_numeric
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes
class Eligibility:
"""
Given the epc data about a property, this class holds the logic for determining if the home
is eligible for a specific retrofit measure.
For example, this could be whether the loft has insulation below a standardised threshold, or
if it has an empty cavity
Further to this, this class is responsible for determining if the property is suitable for specific funding
schemes
"""
loft = None
cavity = None
# schemes
gbis = None
eco4 = None
# If the loft has less than 100mm of insulation, we classify the home has needing loft insulation
LOFT_INSULATION_THRESHOLD = 100
def __init__(self, epc, cleaned):
self.epc = epc
self.cleaned = cleaned
self.walls = self.parse_fabric("walls-description")
self.roof = self.parse_fabric("roof-description")
self.loft_insulation()
self.cavity_insulation()
def parse_fabric(self, key):
if "thermal transmittance" in self.epc[key]:
if key == "walls-description":
return WallAttributes(self.epc["walls-description"]).process()
if key == "roof-description":
return RoofAttributes(self.epc["roof-description"]).process()
raise ValueError("Invalid Key")
# Get the cleaned version of the description
return [
data for data in self.cleaned[key] if
data["original_description"] == self.epc[key]
][0]
def loft_insulation(self, loft_thickness_threshold: int = None):
"""
Given the description of roof, this function determines whether or not the property is suitable for loft
insulation. A loft existing insulation with a thickness below loft_thickness_threshold, is deemed to
be suitable for loft insulation
:param loft_thickness_threshold: Integer, Optional. If provided, any loft found with insulation lower than
this thickness is deemed to be suitable for loft insulation. If this
parameter is not provided, this method will default to the variable specified
in LOFT_INSULATION_THRESHOLD
"""
loft_thickness_threshold = (
self.LOFT_INSULATION_THRESHOLD if loft_thickness_threshold is None else loft_thickness_threshold
)
# We firstly check if the roof is a loft
is_loft = self.roof["is_pitched"] and (not self.roof["is_roof_room"])
if not is_loft:
self.loft = {
"suitablility": False,
"thickness": None,
"reason": "roof not loft"
}
return
# If it is a loft, we'll convert the textual thickenss to a numerical value we can easily use
insulation_thickness = convert_thickness_to_numeric(
string_thickness=self.roof["insulation_thickness"],
is_pitched=self.roof["is_pitched"],
is_flat=self.roof["is_flat"]
)
if insulation_thickness > loft_thickness_threshold:
# Insulation is already thick enough
self.loft = {
"suitablility": False,
"thickness": insulation_thickness,
"reason": "existing insulation"
}
return
self.loft = {
"suitablility": True,
"thickness": insulation_thickness,
"reason": None
}
def cavity_insulation(self):
"""
Given the description of the walls, this function determines if the property is suitable for cavity wall
insulation
:return:
"""
is_cavity = self.walls["is_cavity_wall"]
is_empty = (not self.walls["is_filled_cavity"]) or (
self.walls["is_as_built"] and self.walls["insulation_thickness"] not in ["average", "above average"]
)
is_partial_filled = (
self.walls["is_as_built"] and self.walls["insulation_thickness"] not in ["below average"]
)
is_unfilled_cavity = is_cavity and is_empty
is_partial_filled_cavity = is_cavity and is_partial_filled
if is_unfilled_cavity:
self.cavity = {
"suitablility": True,
"type": "empty",
}
return
if is_partial_filled_cavity:
self.cavity = {
"suitability": True,
"type": "partial"
}
return
self.cavity = {
"suitability": False,
"type": "full"
}
def check_gbis(self):
"""
The Eligibility criteria for the Great British Insulation Scheme (GBIS) can be found here:
https://www.ofgem.gov.uk/environmental-and-social-schemes/great-british-insulation-scheme/homeowners-and-tenants
At a high level, the criteria is the following:
- The home must be within council tax bands A-D in England, A-E in Scotland, A-E in Wales
- It must have an EPC rating of D or below
For the moment, we won't check whether a property is in the correct council tax band. There is likely
to be public data for this since there is a govenment website which allows you to search for properties:
https://www.gov.uk/council-tax-bands
This data is possibly contained on the council tax valuation list but it remains to be see (seems unlikely)
whether or not the data is openly accessible
https://www.gov.uk/government/statistics/quality-assurance-of-administrative-data-in-the-uk-house-price-index
/valuation-office-agency-council-tax-valuation-lists
Currently, we tailor this module to the Warmfront Team and their delivery capabilities (both practically and
commercially). Therefore, we will check:
1) Whether the property is an EPC D or below
2) Whether the property is suitible for cavity wall insulation
However, GBIS applies to many insulation measures, which can be seen in the ofgem document
GBIS does not have any minimum upgrade requirement so we don't need to simulate the post retrofit sap score
using the machine learning model
"""
# Check if the property is suitable for cavity wall
self.cavity_insulation()
self.loft_insulation()
self.gbis = (self.cavity["suitablility"] or self.loft["suitibility"]) and (
int(self.epc["current-energy-efficiency"]) <= 68
)
def check_eco4(self, post_retrofit_sap=None):
"""
This funciton will check if the property is eligible for funding under the ECO4 scheme
For the moment, this function will consider just measures that can be implemented by the
Warmfront team, therefore we will only check if a property has an uninsulated loft AND uninsulated
cavity
We use Ofgem's V1.1 ECO 4 guidance document for the conditions under which a property is elligible
This document can be found here:
https://www.ofgem.gov.uk/sites/default/files/2023-02/ECO4%20Delivery%20Guidance%20v1.1%20%281%29.pdf
The conditions (to be reviewed) to be eligible for retrofit, under ECO4, are the following:
1) The property is a social home (This is assumed prior to this function as this code will often
be run on property lists provided by a HA
2) The property is an EPC E or below
3) The property has an unfilled cavity and uninsulated loft
4) After retrofit, the property will hit an EPC C
Note: This criteria will likely be adjusted depending on the properties that can be served right now
If the post_retrofit_sap is provided, then is this value is 69 or higher, the property will be deemed
to be eligible for ECO4 funding. If the post_retrofit_sap is not provided, the property will be
deemed to be eligible, conditional to the post_retrofit_sap score check
:param post_retrofit_sap:
:return:
"""
current_sap = int(self.epc["current-energy-efficiency"])
if current_sap > 54:
self.eco4 = {
"eligible": False,
"message": "sap too high"
}
return
self.cavity_insulation()
self.loft_insulation()
# make sure conditions 2 and 3 are true
is_eligible = self.cavity["suitablility"] & self.loft["suitablility"]
if post_retrofit_sap is None:
self.eco4 = {
"eligible": is_eligible,
"message": "subject to post retrofit sap"
}
return
is_eligible = is_eligible & (post_retrofit_sap >= 69)
self.eco4 = {
"eligible": is_eligible,
"message": None
}
return

View file

@ -1,83 +0,0 @@
from recommendations.recommendation_utils import convert_thickness_to_numeric
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes
class MeasureSuitibility:
"""
Given the epc data about a property, this class holds the logic for determining if the home
is eligible for a specific retrofit measure.
For example, this could be whether the loft has insulation below a standardised threshold, or
if it has an empty cavity
"""
# If the loft has less than 100mm of insulation, we classify the home has needing loft insulation
LOFT_INSULATION_THRESHOLD = 100
def __init__(self, epc, cleaned):
self.epc = epc
self.cleaned = cleaned
self.walls = self.parse_fabric("walls-description")
self.roof = self.parse_fabric("roof-description")
def parse_fabric(self, key):
if "thermal transmittance" in self.epc[key]:
if key == "walls-description":
return WallAttributes(self.epc["walls-description"]).process()
if key == "roof-description":
return RoofAttributes(self.epc["roof-description"]).process()
raise ValueError("Invalid Key")
# Get the cleaned version of the description
return [
data for data in self.cleaned[key] if
data["original_description"] == self.epc[key]
][0]
def loft_insulation(self, loft_thickness_threshold: int = None):
"""
Given the description of roof, this function determines whether or not the property is suitable for loft
insulation. A loft existing insulation with a thickness below loft_thickness_threshold, is deemed to
be suitable for loft insulation
:param loft_thickness_threshold: Integer, Optional. If provided, any loft found with insulation lower than
this thickness is deemed to be suitable for loft insulation. If this
parameter is not provided, this method will default to the variable specified
in LOFT_INSULATION_THRESHOLD
"""
loft_thickness_threshold = (
self.LOFT_INSULATION_THRESHOLD if loft_thickness_threshold is None else loft_thickness_threshold
)
# We firstly check if the roof is a loft
is_loft = self.roof["is_pitched"] and (not self.roof["is_roof_room"])
if not is_loft:
return {
"suitablility": False,
"thickness": None
}
# If it is a loft, we'll convert the textual thickenss to a numerical value we can easily use
insulation_thickness = convert_thickness_to_numeric(
string_thickness=self.roof["insulation_thickness"],
is_pitched=self.roof["is_pitched"],
is_flat=self.roof["is_flat"]
)
if insulation_thickness > loft_thickness_threshold:
# Insulation is already thick enough
return {
"suitablility": False,
"thickness": insulation_thickness
}
return {
"suitablility": True,
"thickness": insulation_thickness
}

View file

@ -8,14 +8,16 @@ from tqdm import tqdm
import pandas as pd import pandas as pd
import numpy as np import numpy as np
import msgpack import msgpack
from datetime import datetime
from utils.logger import setup_logger from utils.logger import setup_logger
from utils.s3 import read_from_s3 from utils.s3 import read_from_s3
from dotenv import load_dotenv from dotenv import load_dotenv
from backend.SearchEpc import SearchEpc from backend.SearchEpc import SearchEpc
from backend.Property import Property from backend.Property import Property
from etl.eligibility.MeasureSuitibility import MeasureSuitibility from etl.eligibility.Eligibility import Eligibility
from etl.epc.DataProcessor import DataProcessor from etl.epc.DataProcessor import DataProcessor
from backend.app.utils import read_parquet_from_s3 from backend.app.utils import read_parquet_from_s3
from backend.app.plan.utils import create_recommendation_scoring_data
ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env" ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
@ -323,6 +325,79 @@ def merge_ha_15(asset_list, identified_addresses):
return merged_data, dropped_identified_merge_keys return merged_data, dropped_identified_merge_keys
def prepare_model_data_row(property_id, modelling_epc, cleaned, cleaning_data, created_at):
"""
This function prepares the data for modelling, in the same fashion as the recommendation engine
With up-coming refactoring, this will change
:param modelling_epc:
:return:
"""
p = Property(
id=property_id,
postcode=modelling_epc["postcode"],
address1=modelling_epc["address1"],
epc_client=None,
data=modelling_epc
)
p.get_components(cleaned)
# This is temp - this should happen after scoring
cleaned_property_data = DataProcessor.apply_averages_cleaning(
data_to_clean=pd.DataFrame([dict(**p.get_model_data(), LOCAL_AUTHORITY=p.data["local-authority"])]),
cleaning_data=cleaning_data,
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
)
p.set_number_lighting_outlets(cleaned_property_data)
data_processor = DataProcessor(None, newdata=True)
data_processor.insert_data(pd.DataFrame([p.get_model_data()]))
data_processor.pre_process()
starting_epc_data = data_processor.get_component_features(suffix="_STARTING")
ending_epc_data = data_processor.get_component_features(suffix="_ENDING")
fixed_data = data_processor.get_fixed_features()
# We update the ending record with the recommended updates and we set lodgement date to today
ending_epc_data["DAYS_TO_ENDING"] = data_processor.calculate_days_to(created_at)
# We simulate the impact of the retrofit using expected performance of the wall and roof,
# after retrofit. We use the minimal u-values required to meet building regulations part L
# TODO: Check the performance of the materials warmfront's installers use, particularly for
# cavity
simulation_recommendations = [
{
"recommendation_id": "-".join([property_id, "cavity"]),
"type": "cavity_wall_insulation",
"new_u_value": 0.55,
"parts": [{}]
},
{
"recommendation_id": "-".join([property_id, "loft"]),
"type": "loft_insulation",
"new_u_value": 0.16,
"parts": [{"depth": 270}]
}
]
scoring_dict = {}
for recommendation in simulation_recommendations:
scoring_dict = create_recommendation_scoring_data(
property=p,
recommendation=recommendation,
starting_epc_data=starting_epc_data,
ending_epc_data=ending_epc_data,
fixed_data=fixed_data,
)
# At each iteration, we want to update the ending_epc_data, so in the end, ending_epc_data contains
# all of the updates
for k in scoring_dict.keys():
if k in ending_epc_data.columns:
ending_epc_data[k] = scoring_dict[k]
return scoring_dict
def app(): def app():
ha32_asset_list, ha15_asset_list, ha32_identified_addresses, ha15_identified_addresses = load_data() ha32_asset_list, ha15_asset_list, ha32_identified_addresses, ha15_identified_addresses = load_data()
@ -342,14 +417,19 @@ def app():
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet", bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
) )
created_at = datetime.now().isoformat()
# We want to retrieve EPCs for every single property # We want to retrieve EPCs for every single property
# NOTE: HA32 is MOSTLY cavity via GBIS
ha_data = ha32 ha_data = ha32
house_number_key = "Dwelling num" house_number_key = "Dwelling num"
address_key = "Street" address_key = "Street"
postcode_key = "Postcode" postcode_key = "Postcode"
def get_data(ha_data, house_number_key, address_key, postcode_key): def get_data(ha_data, house_number_key, address_key, postcode_key):
ha_scoring_data = []
scoring_data = []
results = []
for _, house in tqdm(ha_data.iterrows(), total=len(ha_data)): for _, house in tqdm(ha_data.iterrows(), total=len(ha_data)):
searcher = SearchEpc( searcher = SearchEpc(
address1=" ".join([house[house_number_key], house[address_key]]), address1=" ".join([house[house_number_key], house[address_key]]),
@ -364,42 +444,50 @@ def app():
if not penultimate_epc: if not penultimate_epc:
penultimate_epc = newest_epc penultimate_epc = newest_epc
suitability = MeasureSuitibility( eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
epc=newest_epc, cleaned=cleaned eligibility.check_gbis()
) eligibility.check_eco4()
suitable = suitability.loft_insulation()
modelling_epc = newest_epc.copy() # If there is no eligibility, we need to check the penultimate epc
if not suitable["suitablility"]: if (not eligibility.eco4["eligible"]) and (not eligibility.gbis):
# if unsuccessful with newest EPC, try penultimate eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
suitability = MeasureSuitibility( eligibility.check_gbis()
epc=penultimate_epc, cleaned=cleaned eligibility.check_eco4()
if eligibility.eco4["eligible"]:
# TODO: Check me
scoring_dictionary = prepare_model_data_row(
property_id=house["row_id"],
modelling_epc=eligibility.epc,
cleaned=cleaned,
cleaning_data=cleaning_data,
created_at=created_at
) )
suitable = suitability.loft_insulation() scoring_data.append(scoring_dictionary)
modelling_epc = penultimate_epc.copy() results.append(
{
"row_id": house["row_id"],
"warmfront_identified": house["identified"],
"gbis_eligible": eligibility.gbis,
"eco4_eligible": eligibility.eco4["eligible"],
"date_epc": eligibility.epc["lodgement-date"],
"eco4_note": "conditional on post sap"
}
)
continue
if not suitable["suitablility"]: if (house["identified"] and not eligibility.gbis) and (
raise ValueError("DO SOMETHING") house["identified"] and not eligibility.eco4["eligible"]):
raise NotImplementedError("Investigate ms")
p = Property( # If nothing is eligible or gbis is eligible, then we make a record this
id=house["row_id"], results.append(
postcode=modelling_epc["postcode"], {
address1=modelling_epc["address1"], "row_id": house["row_id"],
epc_client=None, "warmfront_identified": house["identified"],
data=modelling_epc "gbis_eligible": eligibility.gbis,
"eco4_eligible": eligibility.eco4["eligible"],
"date_epc": eligibility.epc["lodgement-date"],
"eco4_note": None
}
) )
################################################################################
# Prepare the data for modelling, in the same fasion as the engine
################################################################################
p.get_components(cleaned)
# This is temp - this should happen after scoring
cleaned_property_data = DataProcessor.apply_averages_cleaning(
data_to_clean=pd.DataFrame([dict(**p.get_model_data(), LOCAL_AUTHORITY=p.data["local-authority"])]),
cleaning_data=cleaning_data,
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
)
p.set_number_lighting_outlets(cleaned_property_data)
from pprint import pprint
len(searcher.data["rows"])