fixed merge conflict with refactor

This commit is contained in:
Michael Duong 2024-01-16 16:57:45 +00:00
commit 90ba851cc0
57 changed files with 5589 additions and 419 deletions

5
.gitignore vendored
View file

@ -265,4 +265,7 @@ model_data/simulation_system/predictions/
.idea/misc.iml .idea/misc.iml
adhoc adhoc
adhoc/* adhoc/*
etl-router-venv/
refactor_datasets/

7
backend/DbClient.py Normal file
View file

@ -0,0 +1,7 @@
class DbClient:
def __init__(self):
"""
This class handles interaction with the database
"""
pass

105
backend/OrdnanceSurvey.py Normal file
View file

@ -0,0 +1,105 @@
from functools import lru_cache
import urllib.parse
import requests
from utils.logger import setup_logger
logger = setup_logger()
class OrdnanceSuveyClient:
def __init__(self, address, postcode, api_key):
"""
This class is tasked with interaction with the ordnance survey API.
:param address: The address for the property to search for
:param postcode: The postcode for the property to search for
"""
self.address = address
self.postcode = postcode
self.full_address = ", ".join([self.address, self.postcode])
self.api_key = api_key
self.results = None
self.most_relevant_result = None
self.property_type = None
self.built_form = None
# This will be postcode and address, as returned by the ordnance survey
self.address_os = None
self.postcode_os = None
def set_places_address(self):
"""
Given a response from the places api, this function will set the address and postcode of the property
"""
if self.most_relevant_result is None:
raise ValueError("No results found - run get_places_api first")
self.address_os = self.most_relevant_result["ADDRESS"]
self.postcode_os = self.most_relevant_result["POSTCODE"]
# We strip out the postcode from the address as this is already stored separately
self.address_os = self.address_os.replace(self.postcode_os, "").strip()
# Remove trailing comma
self.address_os = self.address_os.rstrip(",").strip()
# Convert to title case
self.address_os = self.address_os.title()
# Make sure postcode is upper case
self.postcode_os = self.postcode_os.upper()
@lru_cache(maxsize=128)
def get_places_api(self):
"""
This method is tasked with getting the places api from the Ordnance Survey.
"""
if not self.api_key:
raise ValueError("Ordnance Survey API key not specified")
encoded_address_query = urllib.parse.quote(self.full_address)
url = (f"https://api.os.uk/search/places/v1/find?query={encoded_address_query}&key="
f"{self.api_key}")
response = requests.get(url)
if response.status_code == 200:
data = response.json()
results = data['results']
self.results = results
# Extract some details about the best match
self.most_relevant_result = self.results[0]["DPA"]
self.parse_classification_code(self.most_relevant_result["CLASSIFICATION_CODE"])
self.set_places_address()
else:
logger.info("Could not find any results for the provided address and postcode")
return {"status": response.status_code}
def parse_classification_code(self, classification_code: str):
"""
This function will convert the classification code, returned by the OS places api, to a property type that is
compatible with the EPC database.
The various classifications cane be found here:
https://osdatahub.os.uk/docs/places/technicalSpecification
Under LPI Output, CLASSIFICATION_CODE is described, and a link is provided to the full table of classifications
For these purposes, we do not need the full classification as this includes non-residential properties. We only
parse the ones of interest to us
:return:
"""
value_map = {
# In the OS api, "RD" is a "Dwelling" however this is not valid property type in the EPC database
'RD': {},
'RD02': {'property_type': 'House', 'built_form': 'Detatched'},
'RD03': {'property_type': 'House', 'built_form': 'Semi-Detatched'},
'RD04': {'property_type': 'House', 'built_form': 'Mid-Terrace'},
'RD06': {'property_type': 'Flat'},
}
mapped = value_map.get(classification_code, {})
self.property_type = mapped.get("property_type", "")
self.built_form = mapped.get("built_form", "")

View file

@ -9,18 +9,17 @@ from etl.epc.DataProcessor import EPCDataProcessor
from etl.epc.Dataset import TrainingDataset from etl.epc.Dataset import TrainingDataset
from etl.epc.settings import LATEST_FIELD, MANDATORY_FIXED_FEATURES, POTENTIAL_COLUMNS, EFFICIENCY_FEATURES, BUILT_FORM_REMAP from etl.epc.settings import LATEST_FIELD, MANDATORY_FIXED_FEATURES, POTENTIAL_COLUMNS, EFFICIENCY_FEATURES, BUILT_FORM_REMAP
from etl.epc_clean.epc_attributes.all_cleaners import all_cleaner_map from etl.epc_clean.epc_attributes.all_cleaners import all_cleaner_map
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
from utils.logger import setup_logger from utils.logger import setup_logger
from utils.s3 import read_dataframe_from_s3_parquet from utils.s3 import read_dataframe_from_s3_parquet
from epc_api.client import EpcClient
from BaseUtility import Definitions from BaseUtility import Definitions
from recommendations.rdsap_tables import england_wales_age_band_lookup, FLOOR_LEVEL_MAP from recommendations.rdsap_tables import england_wales_age_band_lookup, FLOOR_LEVEL_MAP
from recommendations.recommendation_utils import ( from recommendations.recommendation_utils import (
estimate_perimeter, get_wall_type, estimate_external_wall_area, esimtate_pitched_roof_area estimate_perimeter, get_wall_type, estimate_external_wall_area, esimtate_pitched_roof_area, estimate_windows
) )
ENVIRONMENT = os.environ.get('ENVIRONMENT', 'dev') ENVIRONMENT = os.environ.get('ENVIRONMENT', 'dev')
EPC_AUTH_TOKEN = os.environ.get('EPC_AUTH_TOKEN')
DATA_BUCKET = os.environ.get('DATA_BUCKET', 'retrofit-data-dev' if ENVIRONMENT == 'dev' else None) DATA_BUCKET = os.environ.get('DATA_BUCKET', 'retrofit-data-dev' if ENVIRONMENT == 'dev' else None)
logger = setup_logger() logger = setup_logger()
@ -51,13 +50,14 @@ class Property(Definitions):
spatial = None spatial = None
def __init__(self, id, postcode, address1, epc_record, data=None): def __init__(self, id, postcode, address, epc_record, data=None):
self.epc_record = epc_record self.epc_record = epc_record
self.id = id self.id = id
self.address = address
self.postcode = postcode self.postcode = postcode
self.address1 = address1
self.data = {k.replace("_", "-"): v for k,v in epc_record.get("prepared_epc").items()} self.data = {k.replace("_", "-"): v for k,v in epc_record.get("prepared_epc").items()}
self.old_data = epc_record.get("old_data") self.old_data = epc_record.get("old_data")
self.property_dimensions = None self.property_dimensions = None
@ -112,6 +112,9 @@ class Property(Definitions):
self.insulation_floor_area = None self.insulation_floor_area = None
self.number_lighting_outlets = epc_record.prepared_epc.get("fixed_lighting_outlets_count") self.number_lighting_outlets = epc_record.prepared_epc.get("fixed_lighting_outlets_count")
self.floor_level = None self.floor_level = None
self.number_of_windows = None
self.solar_pv_roof_area = None
self.solar_pv_percentage = None
self.current_adjusted_energy = None self.current_adjusted_energy = None
self.expected_adjusted_energy = None self.expected_adjusted_energy = None
@ -177,81 +180,51 @@ class Property(Definitions):
recommendation_record["walls_insulation_thickness_ending"] = "above average" recommendation_record["walls_insulation_thickness_ending"] = "above average"
recommendation_record["walls_energy_eff_ending"] = "Good" recommendation_record["walls_energy_eff_ending"] = "Good"
else: else:
if recommendation_record["walls_thermal_transmittance_ending"] is None: wind_turbine_count = int(wind_turbine_count)
raise ValueError("We should not have a None value for the u value")
if recommendation_record["walls_insulation_thickness_ending"] is None: self.wind_turbine = {
recommendation_record["walls_insulation_thickness_ending"] = "none" "wind_turbine": wind_turbine_count,
}
# Update description to indicate it's insulate def set_count_variables(self):
if recommendation["type"] in ["solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation"]:
if len(recommendation["parts"]) > 1:
raise NotImplementedError("Have more than 1 floor insulation part - handle this case")
recommendation_record["floor_thermal_transmittance_ending"] = recommendation["new_u_value"] """
# We don't really see above average for this in the training data For EPC fields that are just counts, we'll set them here
recommendation_record["floor_insulation_thickness_ending"] = "average" These are fields that are integers but may contain additional values such as "" so we can't do a direct
recommendation_record["floor_energy_eff_ending"] = "Good" conversion straight to an integer
else: :return:
if recommendation_record["floor_thermal_transmittance_ending"] is None: """
raise ValueError("We should not have a None value for the u value")
if recommendation_record["floor_insulation_thickness_ending"] is None: fields = {
recommendation_record["floor_insulation_thickness_ending"] = "none" "number_of_open_fireplaces": "number-open-fireplaces",
"number_of_extensions": "extension-count",
"number_of_storeys": "flat-storey-count",
"number_of_rooms": "number-habitable-rooms",
}
if recommendation["type"] in ["loft_insulation", "room_roof_insulation", "flat_roof_insulation"]: null_attributes = ["number_of_storeys", "number_of_rooms"]
recommendation_record["roof_thermal_transmittance_ending"] = recommendation["new_u_value"]
parts = recommendation["parts"] for attribute, epc_field in fields.items():
if len(parts) != 1: value = self.data["extension-count"]
raise ValueError("More than one part for roof insulation - investiage me") if value == "" or value in self.DATA_ANOMALY_MATCHES:
if attribute in null_attributes:
value = None
else:
value = 0
else:
value = int(value)
# This is based on the values we have in the training data setattr(self, attribute, value)
valid_numeric_values = [
12, 25, 50, 75, 100, 150, 200, 250, 270, 300, 350, 400
]
proposed_depth = int(parts[0]["depth"]) def get_components(self, cleaned, photo_supply_lookup, floor_area_decile_thresholds):
if proposed_depth not in valid_numeric_values:
# Take the nearest value for scoring
proposed_depth = min(valid_numeric_values, key=lambda x: abs(x - proposed_depth))
recommendation_record["roof_insulation_thickness_ending"] = str(proposed_depth)
recommendation_record["roof_energy_eff_ending"] = "Very Good"
else:
# Fill missing roof u-values - this fill is not based on recommended upgrades
if recommendation_record["roof_thermal_transmittance_ending"] is None:
raise ValueError("We should not have a None value for the u value")
if recommendation_record["roof_insulation_thickness_ending"] is None:
recommendation_record["roof_insulation_thickness_ending"] = "none"
if recommendation["type"] == "mechanical_ventilation":
recommendation_record["mechanical_ventilation_ending"] = 'mechanical, extract only'
if recommendation["type"] == "sealing_open_fireplace":
recommendation_record["number_open_fireplaces_ending"] = 0
if recommendation["type"] == "low_energy_lighting":
recommendation_record["low_energy_lighting_ending"] = 100
recommendation_record["lighting_energy_eff_starting"] = "Very Good"
if recommendation["type"] not in [
"mechanical_ventilation", "sealing_open_fireplace", "low_energy_lighting",
"internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
"loft_insulation", "room_roof_insulation", "flat_roof_insulation",
"solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation"
]:
raise NotImplementedError("Implement me")
return recommendation_record
def get_components(self, cleaned):
""" """
Given the cleaning that has been performed, we'll use this to identify the property Given the cleaning that has been performed, we'll use this to identify the property
components, from roof to walls to windows, heating and hot water components, from roof to walls to windows, heating and hot water
:param cleaned: This is the dictionary of components found in cleaner.cleaned :param cleaned: This is the dictionary of components found in cleaner.cleaned
:param photo_supply_lookup: This is the lookup table for the photo supply, used to estimate the percentage
of the roof that is suitable for solar panels
:param floor_area_decile_thresholds: This is the decile thresholds for the floor area, used in estimating the
solar pv roof area
:return: :return:
""" """
@ -301,6 +274,10 @@ class Property(Definitions):
self.set_wall_type() self.set_wall_type()
self.set_floor_type() self.set_floor_type()
self.set_floor_level() self.set_floor_level()
self.set_windows_count()
self.set_solar_panel_area(
photo_supply_lookup=photo_supply_lookup, floor_area_decile_thresholds=floor_area_decile_thresholds
)
def set_spatial(self, spatial: pd.DataFrame): def set_spatial(self, spatial: pd.DataFrame):
""" """
@ -368,7 +345,7 @@ class Property(Definitions):
""" """
Utility function for usage in the lambda, for preparing the _rating fields Utility function for usage in the lambda, for preparing the _rating fields
""" """
return rating_lookup[field].value if field not in cls.DATA_ANOMALY_MATCHES else None return rating_lookup[field].value if (field not in cls.DATA_ANOMALY_MATCHES) and (field is not None) else None
def get_property_details_epc(self, portfolio_id: int, rating_lookup): def get_property_details_epc(self, portfolio_id: int, rating_lookup):
@ -409,6 +386,7 @@ class Property(Definitions):
"primary_energy_consumption": self.energy["primary_energy_consumption"], "primary_energy_consumption": self.energy["primary_energy_consumption"],
"co2_emissions": self.energy["co2_emissions"], "co2_emissions": self.energy["co2_emissions"],
"adjusted_energy_consumption": self.current_adjusted_energy, "adjusted_energy_consumption": self.current_adjusted_energy,
"estimated": self.data.get("estimated", False)
} }
return property_details_epc return property_details_epc
@ -664,7 +642,7 @@ class Property(Definitions):
:return: :return:
""" """
if self.data["fixed-lighting-outlets-count"] == "": if self.data["fixed-lighting-outlets-count"] in [None, ""]:
# We check old EPCs and the full SAP EPC # We check old EPCs and the full SAP EPC
@ -693,3 +671,52 @@ class Property(Definitions):
""" """
self.current_adjusted_energy = current_adjusted_energy self.current_adjusted_energy = current_adjusted_energy
self.expected_adjusted_energy = expected_adjusted_energy self.expected_adjusted_energy = expected_adjusted_energy
def set_windows_count(self):
"""
Using the estimate_windows function, this method will set the number of windows in the property
:return:
"""
self.number_of_windows = estimate_windows(
property_type=self.data["property-type"],
built_form=self.data["built-form"],
construction_age_band=self.construction_age_band,
floor_area=self.floor_area,
number_habitable_rooms=self.number_of_rooms,
extension_count=float(self.data["extension-count"]),
)
def set_solar_panel_area(self, photo_supply_lookup, floor_area_decile_thresholds):
"""
Sets the approximate area of the solar panels
:return:
"""
if (self.insulation_floor_area is None) and (self.pitched_roof_area is None):
raise ValueError(
"Need to set insulation floor area and pitched roof area before setting solar pv roof area"
)
photo_supply_matched = SolarPhotoSupply.filter_photo_supply_lookup(
photo_supply_lookup=photo_supply_lookup,
floor_area_decile_thresholds=floor_area_decile_thresholds,
tenure=self.data["tenure"],
built_form=self.data["built-form"],
property_type=self.data["property-type"],
construction_age_band=self.construction_age_band,
is_flat=self.roof["is_flat"],
is_pitched=self.roof["is_pitched"],
is_roof_room=self.roof["is_roof_room"],
floor_area=self.floor_area
)
percentage_of_roof = photo_supply_matched["photo_supply_median"].mean()
percentage_of_roof = percentage_of_roof / 100
self.solar_pv_roof_area = (
self.insulation_floor_area * percentage_of_roof if self.roof["is_flat"] else
self.pitched_roof_area * percentage_of_roof
)
self.solar_pv_percentage = percentage_of_roof

View file

@ -1,12 +1,114 @@
import os import os
import time import time
import re
import usaddress
import pandas as pd
import numpy as np
from epc_api.client import EpcClient from epc_api.client import EpcClient
from backend.OrdnanceSurvey import OrdnanceSuveyClient
from BaseUtility import Definitions
from utils.logger import setup_logger from utils.logger import setup_logger
from typing import List from typing import List
from fuzzywuzzy import process from fuzzywuzzy import process
logger = setup_logger() logger = setup_logger()
vartypes = {
'low-energy-fixed-light-count': "Int64",
# 'address': 'str',
# 'uprn-source': 'str',
'floor-height': 'float',
'heating-cost-potential': 'float',
'unheated-corridor-length': 'float',
'hot-water-cost-potential': 'float',
'construction-age-band': 'str',
'potential-energy-rating': 'str',
'mainheat-energy-eff': 'str',
'windows-env-eff': 'str',
'lighting-energy-eff': 'str',
'environment-impact-potential': "Int64",
'glazed-type': 'str',
'heating-cost-current': 'float',
'address3': 'str',
'mainheatcont-description': 'str',
'sheating-energy-eff': 'str',
'property-type': 'str',
'local-authority-label': 'str',
'fixed-lighting-outlets-count': "Int64",
'energy-tariff': 'str',
'mechanical-ventilation': 'str',
'hot-water-cost-current': 'str',
'county': 'str',
'postcode': 'str',
'solar-water-heating-flag': 'str',
'constituency': 'str',
'co2-emissions-potential': 'float',
'number-heated-rooms': 'float',
'floor-description': 'str',
'energy-consumption-potential': 'float',
'local-authority': 'str',
'built-form': 'str',
'number-open-fireplaces': "Int64",
'windows-description': 'str',
'glazed-area': 'str',
# 'inspection-date': str,
'mains-gas-flag': 'str',
'co2-emiss-curr-per-floor-area': 'float',
'address1': 'str',
'heat-loss-corridor': 'str',
'flat-storey-count': "Int64",
'constituency-label': 'str',
'roof-energy-eff': 'str',
'total-floor-area': 'float',
'building-reference-number': 'str',
'environment-impact-current': 'float',
'co2-emissions-current': 'float',
'roof-description': 'str',
'floor-energy-eff': 'str',
'number-habitable-rooms': 'float',
'address2': 'str',
'hot-water-env-eff': 'str',
'posttown': 'str',
'mainheatc-energy-eff': 'str',
'main-fuel': 'str',
'lighting-env-eff': 'str',
'windows-energy-eff': 'str',
'floor-env-eff': 'str',
'sheating-env-eff': 'str',
'lighting-description': 'str',
'roof-env-eff': 'str',
'walls-energy-eff': 'str',
'photo-supply': 'float',
'lighting-cost-potential': 'float',
'mainheat-env-eff': 'str',
'multi-glaze-proportion': 'float',
'main-heating-controls': 'str',
# 'lodgement-datetime',
'flat-top-storey': 'str',
'current-energy-rating': 'str',
'secondheat-description': 'str',
'walls-env-eff': 'str',
'transaction-type': 'str',
# 'uprn': "Int64",
'current-energy-efficiency': 'float',
'energy-consumption-current': 'float',
'mainheat-description': 'str',
'lighting-cost-current': 'float',
# 'lodgement-date',
'extension-count': "Int64",
'mainheatc-env-eff': 'str',
'lmk-key': 'str',
'wind-turbine-count': "Int64",
'tenure': 'str',
'floor-level': 'str',
'potential-energy-efficiency': "Int64",
'hot-water-energy-eff': 'str',
'low-energy-lighting': 'float',
'walls-description': 'str',
'hotwater-description': 'str'
}
class SearchEpc: class SearchEpc:
""" """
@ -38,53 +140,127 @@ class SearchEpc:
self, self,
address1: str, address1: str,
postcode: str, postcode: str,
address2: str = None, auth_token: str,
address3: str = None, os_api_key: str,
address4: str = None, full_address: str | None = None,
max_retries: int = None max_retries: int = None,
uprn: [int, None] = None,
size=None,
property_type=None,
): ):
""" """
Address lines 1 and postcode are mandatory fields. The other address lines are optional Address lines 1 and postcode are mandatory fields. The other address lines are optional
but can be used to find the epc for the home, if address1 and postcode are insufficient but can be used to find the epc for the home, if address1 and postcode are insufficient
:param address1: string, propery's address line 1 :param address1: string, propery's address line 1
:param postcode: string, propery's postcode :param postcode: string, propery's postcode
:param address2: string, optional, propery's address line 2 :param full_address: string, optional parameter, the full address of the property
:param address3: string, optional, propery's address line 3 :param max_retries: int, optional, number of retries to make when searching the api
:param address4: string, optional, propery's address line 4 :param uprn: int, optional, the uprn of the property
:param size: int, optional, the number of results to return. If not provided, defaults to 25 which is the api's
default
:param property_type: str, optional, the property type of the property, if known before hand
""" """
self.address1 = address1 self.address1 = address1
self.postcode = postcode self.postcode = postcode
self.address2 = address2 self.full_address = full_address
self.address3 = address3 self.uprn = uprn
self.address4 = address4 self.house_number = self.get_house_number(self.address1)
self.numeric_house_number = self.extract_numeric_housenumber_part(self.house_number)
self.max_retries = max_retries if max_retries is not None else self.MAX_RETRIES self.max_retries = max_retries if max_retries is not None else self.MAX_RETRIES
self.client = EpcClient(auth_token=os.getenv("EPC_AUTH_TOKEN")) self.client = EpcClient(auth_token=auth_token)
self.ordnance_survey_client = OrdnanceSuveyClient(
address=self.address1, postcode=self.postcode, api_key=os_api_key
)
self.data = None self.data = None
self.newest_epc = None
self.older_epcs = None
self.full_sap_epc = None
def search(self): # These are the address and postcode values, which we store in the database
self.address_clean = None
self.postcode_clean = None
self.size = size if size is not None else 25
self.property_type = property_type
@classmethod
def get_house_number(cls, address: str) -> str | None:
"""
This method will use the usaddress library to parse an address and extract the house number
:return:
"""
parsed = usaddress.parse(address)
parsed_house_number = [x for x in parsed if (x[1] == "AddressNumber")]
parsed_house_number = parsed_house_number[0][0] if parsed_house_number else None
if parsed_house_number is None:
# Because usaddress isn't optimal for parsing addresses with some prefixes such as 'Flat',
# we also add a custom approach
# Pattern to look for 'Flat' or 'Apartment' followed by a number, or just a number at the beginning
pattern = r'(?i)(?:flat|apartment)\s*(\d+)|^\s*(\d+)'
match = re.search(pattern, address)
if match:
# Return the first non-None group found
return next(g for g in match.groups() if g is not None)
else:
return None
# Remove training commas
parsed_house_number = parsed_house_number.replace(",", "")
return parsed_house_number
@staticmethod
def extract_numeric_housenumber_part(house_number: str | None) -> int | None:
# Regular expression to find the first occurrence of one or more digits
if house_number is None:
return None
match = re.search(r'\d+', house_number)
if match:
return int(match.group())
else:
return None
def get_epc(self, params=None, size=None):
# Get the EPC data with retries # Get the EPC data with retries
size = size if size is not None else self.size
if params is None:
if self.uprn:
params = {"uprn": self.uprn}
else:
params = {"address": self.address1, "postcode": self.postcode}
for retry in range(self.max_retries): for retry in range(self.max_retries):
try: try:
response = self.client.domestic.search(
params={"address": self.address1, "postcode": self.postcode} if "uprn" in params:
) # We use the direct call method inside, since we need to implement uprn as a valid
# parameter for the search function
url = os.path.join(self.client.domestic.host, "search")
response = self.client.domestic.call(method="get", url=url, params=params)
else:
response = self.client.domestic.search(params=params, size=size)
if response: if response:
self.data = response self.data = response
return self.SUCCESS return self.SUCCESS
if retry > 0: if retry > 0:
print("Failed previous attempt but retry successful") logger.info("Failed previous attempt but retry successful")
# If we got nothing, final try # If we got nothing, final try
if not response: if not response:
# TODO: Make a call to OS uprn service and get the address' uprn, just in case there is an
# issue with how we are searching the api
return { return {
"status": 204, "status": 204,
"message": "no data", "message": "no data",
@ -127,7 +303,6 @@ class SearchEpc:
if len(uprns) == 1: if len(uprns) == 1:
return rows return rows
logger.error("Multiple UPRNS found - we should use an alternate method of searching - TODO")
if property_type is not None: if property_type is not None:
# We can do a filter on the property type # We can do a filter on the property type
rows_filtered = [r for r in rows if r["property-type"] == property_type] rows_filtered = [r for r in rows if r["property-type"] == property_type]
@ -147,7 +322,24 @@ class SearchEpc:
return rows return rows
def retrieve(self, property_type=None, address=None): @staticmethod
def format_address(newest_epc):
"""
Format address and postcode for storage in the database
"""
postcode = newest_epc["postcode"]
address = newest_epc["address"]
# Format them
address = address.replace(postcode, "").strip()
address = address.rstrip(",").strip()
address = address.title()
postcode = postcode.upper()
return address, postcode
def extract_epc_data(self, address=None):
""" """
Given a successful search, this method will format the data and return it Given a successful search, this method will format the data and return it
@ -163,7 +355,7 @@ class SearchEpc:
# Firstly, we should only have 1 urpn so if we have multiple, we'll need to filter down the # Firstly, we should only have 1 urpn so if we have multiple, we'll need to filter down the
# property further # property further
rows = self.filter_rows(rows, property_type=property_type, address=None) rows = self.filter_rows(rows, property_type=self.property_type, address=None)
rows = self.filter_rows(rows, property_type=None, address=address) rows = self.filter_rows(rows, property_type=None, address=address)
# We now check for a full sap epc: # We now check for a full sap epc:
@ -173,7 +365,26 @@ class SearchEpc:
# Finally, we identify the newest epc and the rest, and then return # Finally, we identify the newest epc and the rest, and then return
newest_epc, older_epcs = self.filter_newest_epc(list_of_epcs=rows) newest_epc, older_epcs = self.filter_newest_epc(list_of_epcs=rows)
return newest_epc, older_epcs, full_sap_epc # Retrieve postcode and address
address_epc, postcode_epc = self.format_address(newest_epc=newest_epc)
# Ge the uprn from the newest record for this home
uprns = {r["uprn"] for r in rows if r["uprn"]}
# We can sometimes have no uprn for a property
if (len(uprns) == 0) and len(rows) > 0:
logger.warning("Found data but missing uprn")
elif len(uprns) != 1:
# There is a possibility that we have multiple UPRNs for a single property, which is an error
addresses = {r["address"] for r in rows}
if len(addresses) == 1:
# Take the uprn from the most recent
uprns = {newest_epc["uprn"]}
else:
raise ValueError("Multiple UPRNs found - investigate me")
uprn = uprns.pop() if uprns else None
return newest_epc, older_epcs, full_sap_epc, address_epc, postcode_epc, uprn
@staticmethod @staticmethod
def filter_newest_epc(list_of_epcs: List): def filter_newest_epc(list_of_epcs: List):
@ -186,8 +397,334 @@ class SearchEpc:
return {}, [] return {}, []
if len(newest_response) != 1: if len(newest_response) != 1:
raise Exception("More than one result found for this address - investigate me") # It is possible (but rare, and likely an error on EPC lodgement) that we have multiple EPCs that
# were lodged at the exact same time. In this case, we will take the first one
newest_response = [newest_response[0]]
older_epcs = [epc for epc in list_of_epcs if epc["lmk-key"] != newest_response[0]["lmk-key"]] older_epcs = [epc for epc in list_of_epcs if epc["lmk-key"] != newest_response[0]["lmk-key"]]
return newest_response[0], older_epcs return newest_response[0], older_epcs
@staticmethod
def _get_epc_mode(col: str, epc_data: pd.DataFrame):
"""
Simple method to extract the mode value from the EPC data
:param col: name of the column to take the mode of
:param epc_data: pandas dataframe of epc data
"""
mode_value = epc_data[[col]].mode(dropna=True)
if len(mode_value) != 1:
raise NotImplementedError("TODO: Handle multiple modes")
mode_value = mode_value.iloc[0][col]
return mode_value
def fetch_nearby_epcs(
self, initial_postcode: str,
lmks_to_drop: list[str] | None = None,
built_form: str = "",
property_type: str = ""
):
"""
Fetches and processes EPC data for a given initial postcode, applying successive trimming
to the postcode and filtering the data until a non-empty result set is found.
The function queries the EPC API with the provided postcode, and if no data is found or
if the data doesn't meet certain criteria, it progressively shortens the postcode by
removing the last character and retries the query. This process continues until a valid
set of EPC data is obtained or the postcode is exhausted.
Additional filtering is applied to the obtained EPC data based on 'lmk-key', 'built-form',
and 'property-type'. The data is also processed to extract and numerically interpret house
numbers, calculate house number distances, and apply weights based on these distances.
:param initial_postcode: The initial full postcode for the EPC data query.
:param lmks_to_drop: List of 'lmk-key' values to be excluded from the EPC data.
:param built_form: The 'built-form' value to be used for filtering the EPC data.
:param property_type: The 'property-type' value to be used for filtering the EPC data.
:return:
"""
property_type_api_map = {
"Bungalow": "bungalow",
"Flat": "flat",
"House": "house",
"Maisonette": "maisonette",
"Park home": "park home",
}
postcode = initial_postcode
while postcode:
# Fetch data from EPC API
params = {"postcode": postcode}
if property_type:
params["property-type"] = property_type_api_map[property_type]
# We take the 20 nearest homes of the relevant type, so not to pull in too many irrelevant homes
epc_response = self.get_epc(params=params, size=100)
if epc_response["status"] == 200:
epc_data = pd.DataFrame(self.data["rows"])
if lmks_to_drop is not None:
epc_data = epc_data[~epc_data["lmk-key"].isin(lmks_to_drop)]
if not epc_data.empty:
# Further processing of the EPC data
epc_data['lodgement-datetime'] = pd.to_datetime(epc_data['lodgement-datetime'], format='mixed')
epc_data = epc_data.sort_values("lodgement-datetime", ascending=False).groupby("uprn").head(1)
epc_data["house_number"] = epc_data["address"].apply(lambda add1: self.get_house_number(add1))
epc_data["numeric_house_number"] = epc_data["house_number"].apply(
lambda house_num: self.extract_numeric_housenumber_part(house_num)
)
if self.numeric_house_number is None:
# If we don't have a house number, we treat all weights as equal
epc_data["weight"] = 1
else:
epc_data["house_number_distance"] = abs(
epc_data["numeric_house_number"] - self.numeric_house_number
)
# # We add 1, just in case we have a 0 weight (e.g. comparing house number 7a to 7b, or 9A to 9)
# epc_data["weight"] = 1 / (epc_data["house_number_distance"] + 1)
# # If we have a home without a house number, fill that weight with average
# epc_data["weight"] = epc_data["weight"].fillna(epc_data["weight"].mean())
# # Finally, we might not have any house numbers whatsoever so everything could be
# # missing, so we fill with 1
# epc_data["weight"] = epc_data["weight"].fillna(1)
# TODO: Testing
# If the postcode is different from the initial postcode, it doesn't make sense to have
# any weightings
if all(pd.isnull(epc_data["house_number_distance"])) or (postcode != initial_postcode):
epc_data["weight"] = 1
else:
epc_data["weight"] = 1 / np.sqrt(epc_data["house_number_distance"] + 1)
epc_data["weight"] = epc_data["weight"].fillna(epc_data["weight"].mean())
estimation_property_type = self._estimate_str(
key="property-type", estimation_data=epc_data
) if property_type == "" else property_type
epc_built_form = self._estimate_str(
key="built-form",
estimation_data=epc_data[epc_data["property-type"] == estimation_property_type]
)
if built_form == "Semi-Detached" and epc_built_form in ["End-Terraced", "Mid-Terraced"]:
estimation_built_form = "End-Terraced"
elif (built_form == "") or (pd.isnull(built_form)):
estimation_built_form = epc_built_form
else:
estimation_built_form = built_form
# We handle some edge cases experiences with maisonettes - if built form is detatched, just filter
# on maisonette
# We also add some additional logic for Park homes, because they are far less common than other
# property types
is_maisonette_with_bad_built_form = (estimation_property_type == "Maisonette") & (
estimation_built_form in ["Detached", "Semi-Detached"]
)
is_park_home_without_built_form = (estimation_property_type == "Park home") & (
sum(epc_data["built-form"] == estimation_built_form) == 0
)
has_missing_built_form = not estimation_built_form
if is_maisonette_with_bad_built_form or is_park_home_without_built_form or has_missing_built_form:
epc_data = epc_data[epc_data["property-type"] == estimation_property_type]
else:
epc_data = epc_data[
(epc_data["built-form"] == estimation_built_form) & (
epc_data["property-type"] == estimation_property_type)
]
if not epc_data.empty:
return epc_data # Return the filtered data if it's not empty
# Shorten the postcode by one character for the next iteration
postcode = postcode[:-1].rstrip()
# If loop finishes without a valid response, raise an exception
raise Exception("Unable to find postcode data after trimming - investigate me")
def estimate_epc(self, property_type, built_form, lmks_to_drop=None):
"""
For a property that does not have an EPC, we retrieve the EPC data for the closest properties
and estimate the EPC for the property in question.
Note - do we have postcodes with just a single address? We would need to use a different approach
to find the closest homes
:param property_type: This is the property type of the property we are estimating, that can be retrieved from
the ordnance survey api
:param built_form: This is the built form of the property we are estimating, that can be retrieved from
the ordnance survey api
:param lmks_to_drop: This is a list of LMK keys that should be dropped from the estimation process. This
is used as an override for testing, to drop EPCs for the property we are testing
:return:
"""
# From the ordnance survey data, we want to determine the property type and then use only similar property
# types for the estimation process
epc_data = self.fetch_nearby_epcs(
initial_postcode=self.postcode,
lmks_to_drop=lmks_to_drop,
built_form=built_form,
property_type=property_type
)
# For each attribute, we need to determine the datatype and use an appropriate method
# to estimate.
estimated_epc = {}
for key, vartype in vartypes.items():
epc_data[key] = np.where(pd.isnull(epc_data[key]), None, epc_data[key])
epc_data[key] = np.where(epc_data[key] == "", None, epc_data[key])
estimation_data = epc_data[[key, "weight", "lodgement-datetime"]].copy()
estimation_data = estimation_data[~pd.isnull(estimation_data[key])]
estimation_data = estimation_data[~estimation_data[key].isin(Definitions.DATA_ANOMALY_MATCHES)]
if vartype == "Int64":
# We have some edge cases where we get the error "invalid literal for int() with base 10: '1.0'"
# so this handles this
estimation_data[key] = estimation_data[key].astype(float).astype(vartype)
else:
estimation_data[key] = estimation_data[key].astype(vartype)
if estimation_data.shape[0] == 0:
estimated_epc[key] = None
continue
if vartype == "Int64":
estimated_value = self._estimate_int(estimation_data, key)
elif vartype == "float":
estimated_value = self._estimate_float(estimation_data, key)
elif vartype == "str":
estimated_value = self._estimate_str(estimation_data, key)
else:
raise NotImplementedError("estimation method not implemented for type")
estimated_epc[key] = estimated_value
# Insert an estimated lodgement datetime, with a weighted average
estimated_epc["lodgement-datetime"] = self.calculate_weighted_lodgement_datetime(epc_data=epc_data)
# Extract logement date
estimated_epc["lodgement-date"] = estimated_epc["lodgement-datetime"].strftime("%Y-%m-%d")
estimated_epc["postcode"] = self.postcode
estimated_epc["uprn"] = self.uprn
estimated_epc["address"] = self.full_address
# Indicate that this epc was estimated
estimated_epc["estimated"] = True
return estimated_epc
@staticmethod
def calculate_weighted_lodgement_datetime(epc_data):
numeric_dates = pd.to_datetime(epc_data['lodgement-datetime']).view('int64')
# Calculate the weighted sum of dates
weighted_sum = (numeric_dates * epc_data['weight']).sum()
# Calculate the sum of weights
total_weights = epc_data['weight'].sum()
# Calculate the weighted mean in numeric format
weighted_mean_numeric = weighted_sum / total_weights
# Convert the numeric weighted mean back to datetime
weighted_mean_datetime = pd.to_datetime(weighted_mean_numeric)
return weighted_mean_datetime
@staticmethod
def _estimate_int(estimation_data, key):
return round(np.average(a=estimation_data[key], weights=estimation_data["weight"]))
@staticmethod
def _estimate_float(estimation_data, key):
return round(np.average(a=estimation_data[key], weights=estimation_data["weight"]), 2)
@staticmethod
def _estimate_str(estimation_data, key):
agg = estimation_data.groupby(key)["weight"].sum().reset_index()
agg = agg[agg["weight"] == agg["weight"].max()]
if agg.shape[0] != 1:
# If we have multiple modes, we take the more recent data on average
recent_grouped = estimation_data[
estimation_data[key].isin(agg[key].values)
].groupby(key)["lodgement-datetime"].mean()
newest_group = recent_grouped.idxmax()
return newest_group
return agg[key].values[0]
def find_property(self, skip_os=False):
"""
This method will attempt to identify a property. It will, at first, use the EPC api to try and
find the EPC for the property and the associated UPRN. If this fails, it will use the Ordnance Survey API to
find the UPRN of the address.
Because no result may have been provided by the EPC api because of formatting issues with the address,
if the ordnance survey api is used and the uprn retrieved, the EPC api is queried again with the UPRN, just
as a final check to see if there is any EPC data.
If there is no EPC data, the epc data will be estimated based on the surrounding properties
"""
# Step 1: use the epc api to find the property and uprn
response = self.get_epc()
if response["status"] == 200:
(
self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean, self.uprn
) = self.extract_epc_data(address=self.full_address)
return
# Step 2: If we don't have an EPC, we use the ordnance survey api to find the uprn
if skip_os:
if self.ordnance_survey_client.property_type is not None:
# We can try and estimate
estimated_epc = self.estimate_epc(
property_type=self.ordnance_survey_client.property_type,
built_form=self.ordnance_survey_client.built_form
)
self.newest_epc = estimated_epc
self.older_epcs = []
self.full_sap_epc = {}
# Finally, set a standardised address 1 and postcode
self.address_clean = self.ordnance_survey_client.address_os
self.postcode_clean = self.ordnance_survey_client.postcode_os
return
os_response = self.ordnance_survey_client.get_places_api()
if os_response["status"] != 200:
# Investigate this if it happens
raise Exception("Unable to find property - investigate me")
# Step 3: Now that we have a urpn, do another check against the epc api, this time searching with the uprn
self.uprn = self.ordnance_survey_client.most_relevant_result["UPRN"]
response = self.get_epc()
if response["status"] == 200:
(
self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean, self.uprn
) = self.extract_epc_data()
return
# Step 4: If we still don't have an EPC, we estimate the EPC data
self.full_address = self.ordnance_survey_client.most_relevant_result["ADDRESS"]
estimated_epc = self.estimate_epc(
property_type=self.ordnance_survey_client.property_type,
built_form=self.ordnance_survey_client.built_form
)
self.newest_epc = estimated_epc
self.older_epcs = []
self.full_sap_epc = {}
# Finally, set a standardised address 1 and postcode
self.address_clean = self.ordnance_survey_client.address_os
self.postcode_clean = self.ordnance_survey_client.postcode_os
return

View file

@ -13,6 +13,7 @@ class Settings(BaseSettings):
HEAT_PREDICTIONS_BUCKET: str HEAT_PREDICTIONS_BUCKET: str
PLAN_TRIGGER_BUCKET: str PLAN_TRIGGER_BUCKET: str
EPC_AUTH_TOKEN: str EPC_AUTH_TOKEN: str
ORDNANCE_SURVEY_API_KEY: str
DB_HOST: str DB_HOST: str
DB_PASSWORD: str DB_PASSWORD: str
DB_USERNAME: str DB_USERNAME: str

View file

@ -11,7 +11,7 @@ from backend.app.db.models.portfolio import (
from sqlalchemy.orm.exc import NoResultFound from sqlalchemy.orm.exc import NoResultFound
def create_property(session: Session, portfolio_id: int, address: str, postcode: str) -> (int, bool): def create_property(session: Session, portfolio_id: int, address: str, postcode: str, uprn: str) -> (int, bool):
""" """
This function will create a record for the property in the database if it does not exist. This function will create a record for the property in the database if it does not exist.
If it does exist, it will just update the updated_at field. If it does exist, it will just update the updated_at field.
@ -25,7 +25,7 @@ def create_property(session: Session, portfolio_id: int, address: str, postcode:
try: try:
# Attempt to fetch the existing property # Attempt to fetch the existing property
existing_property = session.query(PropertyModel).filter_by( existing_property = session.query(PropertyModel).filter_by(
address=address, postcode=postcode, portfolio_id=portfolio_id uprn=uprn, portfolio_id=portfolio_id
).one() ).one()
# Update the 'updated_at' field # Update the 'updated_at' field
@ -43,6 +43,7 @@ def create_property(session: Session, portfolio_id: int, address: str, postcode:
address=address, address=address,
postcode=postcode, postcode=postcode,
portfolio_id=portfolio_id, portfolio_id=portfolio_id,
uprn=uprn,
creation_status=PropertyCreationStatus.LOADING, creation_status=PropertyCreationStatus.LOADING,
status=PortfolioStatus.ASSESSMENT.value, status=PortfolioStatus.ASSESSMENT.value,
has_pre_condition_report=False, has_pre_condition_report=False,

View file

@ -19,7 +19,6 @@ class MaterialType(enum.Enum):
flat_roof_insulation = "flat_roof_insulation" flat_roof_insulation = "flat_roof_insulation"
room_roof_insulation = "room_roof_insulation" room_roof_insulation = "room_roof_insulation"
windows_glazing = "windows_glazing" windows_glazing = "windows_glazing"
iwi_wall_demolition = "iwi_wall_demolition" iwi_wall_demolition = "iwi_wall_demolition"
iwi_vapour_barrier = "iwi_vapour_barrier" iwi_vapour_barrier = "iwi_vapour_barrier"

View file

@ -153,6 +153,7 @@ class PropertyDetailsEpcModel(Base):
primary_energy_consumption = Column(Float) primary_energy_consumption = Column(Float)
co2_emissions = Column(Float) co2_emissions = Column(Float)
adjusted_energy_consumption = Column(Float) adjusted_energy_consumption = Column(Float)
estimated = Column(Boolean, default=False)
class PropertyDetailsSpatial(Base): class PropertyDetailsSpatial(Base):

View file

@ -4,6 +4,7 @@ import numpy as np
import pandas as pd import pandas as pd
from epc_api.client import EpcClient from epc_api.client import EpcClient
from etl.epc.Record import EPCRecord from etl.epc.Record import EPCRecord
from backend.SearchEpc import SearchEpc
from fastapi import APIRouter, Depends from fastapi import APIRouter, Depends
from sqlalchemy.exc import IntegrityError, OperationalError from sqlalchemy.exc import IntegrityError, OperationalError
from sqlalchemy.orm import sessionmaker from sqlalchemy.orm import sessionmaker
@ -30,6 +31,8 @@ from backend.ml_models.api import ModelApi
from backend.Property import Property from backend.Property import Property
from etl.epc.DataProcessor import EPCDataProcessor from etl.epc.DataProcessor import EPCDataProcessor
from etl.epc.settings import COLUMNS_TO_MERGE_ON from etl.epc.settings import COLUMNS_TO_MERGE_ON
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
from recommendations.optimiser.CostOptimiser import CostOptimiser from recommendations.optimiser.CostOptimiser import CostOptimiser
from recommendations.optimiser.GainOptimiser import GainOptimiser from recommendations.optimiser.GainOptimiser import GainOptimiser
from recommendations.optimiser.optimiser_functions import prepare_input_measures from recommendations.optimiser.optimiser_functions import prepare_input_measures
@ -43,54 +46,6 @@ logger = setup_logger()
BATCH_SIZE = 5 BATCH_SIZE = 5
class DummyDownloader:
def __init__(self, postcode, address1, id, epc_client):
self.id = id
self.postcode = postcode
self.address1 = address1
self.data = None
self.old_data = None
self.epc_client = epc_client
def search_address_epc(self):
"""
This method searches for an address in the EPC database and returns the first result
:return: property data
"""
if self.data:
return
# This will fail if a property does not have an EPC - this has been documented as a case to handle
response = self.epc_client.domestic.search(params={"address": self.address1, "postcode": self.postcode})
# Check if we have a full sap EPC
self.full_sap_epc = [r for r in response["rows"] if r["transaction-type"] == "new dwelling"]
self.full_sap_epc = self.full_sap_epc[0] if self.full_sap_epc else self.full_sap_epc
if len(response["rows"]) > 1:
newest_response = [
r for r in response["rows"] if
r["lodgement-datetime"] == max([x["lodgement-datetime"] for x in response["rows"]])
]
if len(newest_response) > 1:
raise Exception("More than one result found for this address - investigate me")
# We'll keep old EPCs in case it contains information, not present on the newest one
self.old_data = [epc for epc in response["rows"] if epc["lmk-key"] != newest_response[0]["lmk-key"]]
response["rows"] = newest_response
self.data = response["rows"][0]
# For the moment, if we don't have a UPRN, we don't do anything about it, however we'll handle this in
# the future by using the Ordnance Survey places API
if not self.data["uprn"]:
logger.warning("We do not have a UPRN for this property")
else:
self.uprn = int(self.data["uprn"])
router = APIRouter( router = APIRouter(
prefix="/plan", prefix="/plan",
tags=["plan"], tags=["plan"],
@ -103,37 +58,34 @@ router = APIRouter(
@router.post("/trigger") @router.post("/trigger")
async def trigger_plan(body: PlanTriggerRequest): async def trigger_plan(body: PlanTriggerRequest):
logger.info("Connecting to db") logger.info("Connecting to db")
# session = sessionmaker(bind=db_engine)() session = sessionmaker(bind=db_engine)()
created_at = datetime.now().isoformat() created_at = datetime.now().isoformat()
try: try:
session.begin() session.begin()
logger.info("Getting the inputs") logger.info("Getting the inputs")
Body = {'portfolio_id': '56', 'housing_type': 'Social', 'goal': 'Increase EPC', 'goal_value': 'A', 'trigger_file_path': '8/56/windows_portfolio_inputs.csv'}
body = PlanTriggerRequest(**Body)
epc_client = EpcClient(auth_token=get_settings().EPC_AUTH_TOKEN) epc_client = EpcClient(auth_token=get_settings().EPC_AUTH_TOKEN)
plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path) plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path)
uprn_filenames = read_dataframe_from_s3_parquet(
bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
)
cleaning_data = read_parquet_from_s3(
bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
)
input_properties = [] input_properties = []
for config in plan_input: for config in plan_input:
# We validate each record in the file. If the record is NOT valid, we need to handle this accordingly # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
# TODO: implment validation. We should also standardise postcode and address in some fashion as
# a postcode of abcdef would be considered different to ABCDEF epc_searcher = SearchEpc(
address1=config["address"],
postcode=config["postcode"],
auth_token=get_settings().EPC_AUTH_TOKEN,
os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY
)
epc_searcher.find_property()
# Create a record in db # Create a record in db
property_id, is_new = create_property( property_id, is_new = create_property(
session, portfolio_id=body.portfolio_id, address=config['address'], postcode=config['postcode'] session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
) )
# if a new record was not created, we don't produduce recommendations # if a new record was not created, we don't produduce recommendations
if not is_new: if not is_new:
continue continue
# TODO: Need to add heat demand target
create_property_targets( create_property_targets(
session, session,
@ -143,29 +95,21 @@ async def trigger_plan(body: PlanTriggerRequest):
heat_demand_target=None heat_demand_target=None
) )
epc_downloader = DummyDownloader(id=0, epc_client=epc_client, postcode=config['postcode'], address1=config['address'])
epc_downloader.search_address_epc()
epc_records ={ epc_records ={
'original_epc': epc_downloader.data.copy(), 'original_epc': epc_searcher.newest_epc,
'full_sap_epc': epc_downloader.full_sap_epc.copy() if epc_downloader.full_sap_epc else [], 'full_sap_epc': epc_searcher.full_sap_epc,
'old_data': epc_downloader.old_data.copy() if epc_downloader.old_data else [] 'old_data': epc_searcher.old_data,
} }
prepared_epc = EPCRecord(epc_records=epc_records, run_mode="newdata", cleaning_data=cleaning_data) # This uses all the epc records to clean the data prepared_epc = EPCRecord(epc_records=epc_records, run_mode="newdata", cleaning_data=cleaning_data) # This uses all the epc records to clean the data
p = Property( input_properties.append(
Property(
id=property_id, id=property_id,
address1=config['address'], address1=config['address'],
postcode=config['postcode'], postcode=config['postcode'],
epc_record=prepared_epc, epc_record=prepared_epc,
) )
logger.info("Getting spatial data")
p.get_spatial_data(uprn_filenames)
input_properties.append(
p
) )
@ -180,10 +124,19 @@ async def trigger_plan(body: PlanTriggerRequest):
materials = get_materials(session) materials = get_materials(session)
cleaned = get_cleaned() cleaned = get_cleaned()
logger.info("Getting components and epc recommendations") uprn_filenames = read_dataframe_from_s3_parquet(
bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
)
cleaning_data = read_dataframe_from_s3_parquet(
bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
)
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket=get_settings().DATA_BUCKET)
# TODO: Move this to a class. We probably want a Recommender class which takes the injects the optimisers logger.info("Getting spatial data")
# in as a dependency and then the optimisers can take the input measures in as part of the setup() method for p in input_properties:
p.get_spatial_data(uprn_filenames)
logger.info("Getting components and epc recommendations")
recommendations = {} recommendations = {}
recommendations_scoring_data = [] recommendations_scoring_data = []
@ -192,7 +145,7 @@ async def trigger_plan(body: PlanTriggerRequest):
for p in input_properties: for p in input_properties:
# Property recommendations # Property recommendations
p.get_components(cleaned) p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
recommender = Recommendations(property_instance=p, materials=materials) recommender = Recommendations(property_instance=p, materials=materials)
property_recommendations = recommender.recommend() property_recommendations = recommender.recommend()

View file

@ -175,11 +175,34 @@ def create_recommendation_scoring_data(
scoring_dict["LOW_ENERGY_LIGHTING_ENDING"] = 100 scoring_dict["LOW_ENERGY_LIGHTING_ENDING"] = 100
scoring_dict["LIGHTING_ENERGY_EFF_STARTING"] = "Very Good" scoring_dict["LIGHTING_ENERGY_EFF_STARTING"] = "Very Good"
if recommendation["type"] == "windows_glazing":
scoring_dict["MULTI_GLAZE_PROPORTION_ENDING"] = 100
scoring_dict["WINDOWS_ENERGY_EFF_ENDING"] = "Average"
is_secondary_glazing = recommendation["is_secondary_glazing"]
if scoring_dict["glazing_type_ENDING"] == "multiple":
pass
elif scoring_dict["glazing_type_ENDING"] == "single":
scoring_dict["glazing_type_ENDING"] = "secondary" if is_secondary_glazing else "double"
elif scoring_dict["glazing_type_ENDING"] == "double":
scoring_dict["glazing_type_ENDING"] = "multiple" if is_secondary_glazing else "double"
elif scoring_dict["glazing_type_ENDING"] == "secondary":
scoring_dict["glazing_type_ENDING"] = "secondary" if is_secondary_glazing else "multiple"
elif scoring_dict["glazing_type_ENDING"] in ["triple", "high performance"]:
scoring_dict["glazing_type_ENDING"] = "multiple"
else:
raise ValueError("Invalid glazing type - implement me")
if recommendation["type"] == "solar_pv":
scoring_dict["PHOTO_SUPPLY_ENDING"] = recommendation["photo_supply"]
if recommendation["type"] not in [ if recommendation["type"] not in [
"mechanical_ventilation", "sealing_open_fireplace", "low_energy_lighting", "mechanical_ventilation", "sealing_open_fireplace", "low_energy_lighting",
"internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation", "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
"loft_insulation", "room_roof_insulation", "flat_roof_insulation", "loft_insulation", "room_roof_insulation", "flat_roof_insulation",
"solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation" "solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation",
"windows_glazing", "solar_pv"
]: ]:
raise NotImplementedError("Implement me") raise NotImplementedError("Implement me")

View file

@ -121,19 +121,6 @@ def epc_to_sap_lower_bound(epc: str):
raise ValueError("EPC rating should be between A and G") raise ValueError("EPC rating should be between A and G")
def read_parquet_from_s3(bucket_name, file_key):
client = boto3.client('s3')
# Get the object
s3_object = client.get_object(Bucket=bucket_name, Key=file_key)
# Read the CSV body into a DataFrame
csv_body = s3_object["Body"].read()
df = pd.read_parquet(BytesIO(csv_body))
return df
def save_dataframe_to_s3_parquet(df, bucket_name, file_key): def save_dataframe_to_s3_parquet(df, bucket_name, file_key):
""" """
Save a pandas DataFrame to S3 as a Parquet file. Save a pandas DataFrame to S3 as a Parquet file.

View file

@ -19,7 +19,9 @@ class PropertyValuation:
100070505235: 344000, # Based on Zoopla's estimation of 131 School road, which is also semi-detached 100070505235: 344000, # Based on Zoopla's estimation of 131 School road, which is also semi-detached
100070513306: 182000, # Based on Zoopla's estimation of 61 Simmons Drive 100070513306: 182000, # Based on Zoopla's estimation of 61 Simmons Drive
100071306896: 77000, # Based on Flat 2 of 44 Wedgewood Road on Zoopla 100071306896: 77000, # Based on Flat 2 of 44 Wedgewood Road on Zoopla
100021192109: 650000 # Based on Zoopla 100021192109: 650000, # Based on Zoopla
766249482: 358000, # Based on Zoopla estimate for 19 Spring Lane, 3 bedroom semi-detached
100120703802: 277000, # Based on Zoopla
} }
# We base our valuation uplifts on a number of sources # We base our valuation uplifts on a number of sources
@ -93,7 +95,13 @@ class PropertyValuation:
value = cls.UPRN_VALUE_LOOKUP.get(property_instance.uprn) value = cls.UPRN_VALUE_LOOKUP.get(property_instance.uprn)
if not value: if not value:
raise ValueError("Have not implemented valuation for this property") return {
"current_value": None,
"lower_bound_increased_value": None,
"upper_bound_increased_value": None,
"average_increased_value": None,
"average_increase": None
}
current_epc = property_instance.data["current-energy-rating"] current_epc = property_instance.data["current-energy-rating"]
# We get the spectrum of ratings between the current and target EPC # We get the spectrum of ratings between the current and target EPC
@ -119,4 +127,5 @@ class PropertyValuation:
"lower_bound_increased_value": value * (1 + min_increase), "lower_bound_increased_value": value * (1 + min_increase),
"upper_bound_increased_value": value * (1 + max_increase), "upper_bound_increased_value": value * (1 + max_increase),
"average_increased_value": value * (1 + avg_increase), "average_increased_value": value * (1 + avg_increase),
"average_increase": value * (1 + avg_increase) - value
} }

View file

@ -2,8 +2,7 @@ import pandas as pd
import requests import requests
from requests.exceptions import RequestException from requests.exceptions import RequestException
from utils.logger import setup_logger from utils.logger import setup_logger
from utils.s3 import save_dataframe_to_s3_parquet from utils.s3 import save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
from backend.app.utils import read_parquet_from_s3
logger = setup_logger() logger = setup_logger()
@ -125,7 +124,7 @@ class ModelApi:
# Retrieve the predictions # Retrieve the predictions
predictions_df = pd.DataFrame( predictions_df = pd.DataFrame(
read_parquet_from_s3( read_dataframe_from_s3_parquet(
bucket_name=predictions_bucket, bucket_name=predictions_bucket,
file_key=response["storage_filepath"].split(predictions_bucket + "/")[1] file_key=response["storage_filepath"].split(predictions_bucket + "/")[1]
) )

View file

@ -35,4 +35,5 @@ mip==1.15.0
boto3==1.28.3 boto3==1.28.3
pandas==1.5.3 pandas==1.5.3
pyarrow==12.0.1 pyarrow==12.0.1
textblob textblob
usaddress==0.5.10

View file

@ -9,6 +9,7 @@ from etl.epc_clean.EpcClean import EpcClean
mock_epc_response = { mock_epc_response = {
"rows": [ "rows": [
{ {
"tenure": "rental (social)",
"lmk-key": 1, "lmk-key": 1,
"uprn": 1, "uprn": 1,
"number-habitable-rooms": 5, "number-habitable-rooms": 5,
@ -17,7 +18,7 @@ mock_epc_response = {
"inspection-date": "2023-06-01", "inspection-date": "2023-06-01",
'lodgement-datetime': '2023-06-01 20:29:01', 'lodgement-datetime': '2023-06-01 20:29:01',
"some-other-key": "some-value", "some-other-key": "some-value",
"roof-description": "Roof Description", "roof-description": "pitched, no insulation",
"walls-description": "Walls Description", "walls-description": "Walls Description",
"windows-description": "Windows Description", "windows-description": "Windows Description",
"mainheat-description": "Main Heating Description", "mainheat-description": "Main Heating Description",
@ -37,7 +38,8 @@ mock_epc_response = {
"floor-height": 2.5, "floor-height": 2.5,
"total-floor-area": 100, "total-floor-area": 100,
"construction-age-band": "England and Wales: 1967-1975", "construction-age-band": "England and Wales: 1967-1975",
"floor-description": "Floor Description" "floor-description": "Floor Description",
"floor-level": "Ground"
}, },
{ {
"lmk-key": 2, "lmk-key": 2,
@ -68,7 +70,8 @@ mock_epc_response = {
"floor-height": 2.5, "floor-height": 2.5,
"total-floor-area": 100, "total-floor-area": 100,
"construction-age-band": "England and Wales: 1967-1975", "construction-age-band": "England and Wales: 1967-1975",
"floor-description": "Floor Description" "floor-description": "Floor Description",
"floor-level": "Ground"
} }
] ]
} }
@ -100,7 +103,8 @@ mock_epc_response_dupe = {
"floor-height": 2.5, "floor-height": 2.5,
"total-floor-area": 100, "total-floor-area": 100,
"construction-age-band": "England and Wales: 1967-1975", "construction-age-band": "England and Wales: 1967-1975",
"floor-description": "Floor Description" "floor-description": "Floor Description",
"floor-level": "Ground"
}, },
{ {
"lmk-key": 2, "lmk-key": 2,
@ -128,7 +132,8 @@ mock_epc_response_dupe = {
"floor-height": 2.5, "floor-height": 2.5,
"total-floor-area": 100, "total-floor-area": 100,
"construction-age-band": "England and Wales: 1967-1975", "construction-age-band": "England and Wales: 1967-1975",
"floor-description": "Floor Description" "floor-description": "Floor Description",
"floor-level": "Ground"
}, },
{ {
"lmk-key": 3, "lmk-key": 3,
@ -156,36 +161,62 @@ mock_epc_response_dupe = {
"floor-height": 2.5, "floor-height": 2.5,
"total-floor-area": 100, "total-floor-area": 100,
"construction-age-band": "England and Wales: 1967-1975", "construction-age-band": "England and Wales: 1967-1975",
"floor-description": "Floor Description" "floor-description": "Floor Description",
"floor-level": "Ground"
} }
] ]
} }
class TestProperty: class TestProperty:
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
def property_instance(self, mock_epc_client, mock_cleaner): def mock_photo_supply_lookup(self):
property_instance = Property(1, "AB12CD", "Test Address", epc_client=mock_epc_client) return pd.DataFrame(
[
dict(
tenure="rental (social)",
built_form="Detached",
property_type="House",
construction_age_band="England and Wales: 1967-1975",
is_flat=False,
is_pitched=True,
is_roof_room=False,
floor_area_decile=2,
photo_supply_median=40
)
]
)
@pytest.fixture(autouse=True)
def mock_floor_area_decile_thresholds(self):
return pd.DataFrame(
{"floor_area_decile_thresholds": [0, 10, 30, 50]}
)
@pytest.fixture(autouse=True)
def property_instance(self, mock_cleaner):
property_instance = Property(id=1, postcode="AB12CD", address="Test Address", data=mock_epc_response["rows"][0])
return property_instance return property_instance
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
def property_instance_dupe_data(self, mock_epc_client_dupe_data): def property_instance_dupe_data(self):
property_instance_dupe_data = Property(2, "AB12CD", "Test Address", epc_client=mock_epc_client_dupe_data) property_instance_dupe_data = Property(id=2, postcode="AB12CD", address="Test Address")
return property_instance_dupe_data return property_instance_dupe_data
@pytest.fixture # @pytest.fixture
def mock_epc_client(self): # def mock_epc_client(self):
mock_epc_client = Mock(spec=EpcClient(auth_token="mocked_auth_token")) # mock_epc_client = Mock(spec=EpcClient(auth_token="mocked_auth_token"))
mock_epc_client.domestic.search.return_value = mock_epc_response.copy() # mock_epc_client.domestic.search.return_value = mock_epc_response.copy()
mock_epc_client.auth_token = "mocked_auth_token" # mock_epc_client.auth_token = "mocked_auth_token"
return mock_epc_client # return mock_epc_client
#
@pytest.fixture # @pytest.fixture
def mock_epc_client_dupe_data(self): # def mock_epc_client_dupe_data(self):
mock_epc_client_dupe_data = Mock(spec=EpcClient(auth_token="mocked_auth_token")) # mock_epc_client_dupe_data = Mock(spec=EpcClient(auth_token="mocked_auth_token"))
mock_epc_client_dupe_data.domestic.search.return_value = mock_epc_response_dupe.copy() # mock_epc_client_dupe_data.domestic.search.return_value = mock_epc_response_dupe.copy()
mock_epc_client_dupe_data.auth_token = "mocked_auth_token" # mock_epc_client_dupe_data.auth_token = "mocked_auth_token"
return mock_epc_client_dupe_data # return mock_epc_client_dupe_data
@pytest.fixture @pytest.fixture
def mock_cleaner(self): def mock_cleaner(self):
@ -224,7 +255,11 @@ class TestProperty:
} }
mock_cleaner.cleaned = { mock_cleaner.cleaned = {
"roof-description": [{"original_description": "Roof Description"}], "roof-description": [
{"original_description": "Roof Description"},
{"original_description": "pitched, no insulation", "is_pitched": True, "is_flat": False,
"is_roof_room": False}
],
"walls-description": [walls_data], "walls-description": [walls_data],
"windows-description": [{"original_description": "Windows Description"}], "windows-description": [{"original_description": "Windows Description"}],
"mainheat-description": [{"original_description": "Main Heating Description"}], "mainheat-description": [{"original_description": "Main Heating Description"}],
@ -235,37 +270,32 @@ class TestProperty:
} }
return mock_cleaner return mock_cleaner
def test_init(self, mock_epc_client): def test_init(self):
inst1 = Property(0, "AB12CD", "Test Address", epc_client=mock_epc_client) inst1 = Property(0, postcode="AB12CD", address="Test Address")
# Should be mocked auth token
assert inst1.epc_client.auth_token == "mocked_auth_token"
inst2 = Property(3, "AB12CD", "Test Address", epc_client=mock_epc_client) assert inst1.data is None
assert inst2.epc_client.auth_token
inst3 = Property(4, "AB12CD", "Test Address", data={"some": "data"}, epc_client=mock_epc_client) inst2 = Property(3, "AB12CD", "Test Address")
assert inst3.data == {"some": "data"} assert inst2.id == 3
data = inst3.search_address_epc() inst3 = Property(4, "AB12CD", "Test Address", data={"some": "data", "uprn": 123})
assert data is None assert inst3.data == {"some": "data", "uprn": 123}
def test_search_address_epc(self, property_instance): def test_get_components(
# Call the method to test self, property_instance, mock_cleaner, mock_photo_supply_lookup, mock_floor_area_decile_thresholds
property_instance.search_address_epc() ):
property_instance.get_components(
# Verify that the correct data is being returned mock_cleaner.cleaned,
assert property_instance.data == mock_epc_response["rows"][0] photo_supply_lookup=mock_photo_supply_lookup,
floor_area_decile_thresholds=mock_floor_area_decile_thresholds
def test_search_address_epc_multiple_results(self, property_instance_dupe_data, mock_epc_client_dupe_data): )
with pytest.raises(Exception, match="More than one result found for this address - investigate me"):
property_instance_dupe_data.search_address_epc()
def test_get_components(self, property_instance, mock_cleaner, mock_epc_client):
property_instance.search_address_epc()
property_instance.get_components(mock_cleaner.cleaned)
# Verify that the components are set correctly # Verify that the components are set correctly
assert property_instance.roof == {"original_description": "Roof Description"} assert property_instance.roof == {
'original_description': 'pitched, no insulation', 'is_pitched': True,
'is_flat': False, 'is_roof_room': False
}
assert property_instance.walls == { assert property_instance.walls == {
"original_description": "Walls Description", "original_description": "Walls Description",
"is_cavity_wall": True, "is_cavity_wall": True,
@ -289,24 +319,15 @@ class TestProperty:
# Verify that ValueError is raised when EpcClean doesn't contain cleaned data # Verify that ValueError is raised when EpcClean doesn't contain cleaned data
with pytest.raises(ValueError, match="Cleaner does not contain cleaned data"): with pytest.raises(ValueError, match="Cleaner does not contain cleaned data"):
property_instance.get_components(mock_cleaner.cleaned) property_instance.get_components(mock_cleaner.cleaned, pd.DataFrame(), pd.DataFrame())
def test_get_components_no_data(self, property_instance, mock_cleaner): def test_get_components_no_attributes(
self, property_instance, mock_cleaner, mock_photo_supply_lookup, mock_floor_area_decile_thresholds
):
# Modify the mock cleaner to have no attributes for a specific description # Modify the mock cleaner to have no attributes for a specific description
mock_cleaner.cleaned = { mock_cleaner.cleaned = {
"roof-description": [] "roof-description": []
} }
# Verify that ValueError is raised when no attributes are found
with pytest.raises(ValueError, match="Property does not contain data"):
property_instance.get_components(mock_cleaner.cleaned)
def test_get_components_no_attributes(self, property_instance, mock_cleaner):
# Modify the mock cleaner to have no attributes for a specific description
mock_cleaner.cleaned = {
"roof-description": []
}
property_instance.search_address_epc()
property_instance.data["roof-description"] = "Pitched, no insulation" property_instance.data["roof-description"] = "Pitched, no insulation"
property_instance.walls = { property_instance.walls = {
"original_description": "Walls Description", "original_description": "Walls Description",
@ -327,14 +348,17 @@ class TestProperty:
} }
# Assert backup cleaning has been applied # Assert backup cleaning has been applied
property_instance.get_components(mock_cleaner.cleaned) property_instance.get_components(
mock_cleaner.cleaned, mock_photo_supply_lookup, mock_floor_area_decile_thresholds
)
assert property_instance.roof["clean_description"] == "Pitched, no insulation" assert property_instance.roof["clean_description"] == "Pitched, no insulation"
assert property_instance.roof["is_pitched"] assert property_instance.roof["is_pitched"]
def test_get_components_multiple_attributes(self, property_instance, mock_cleaner): def test_get_components_multiple_attributes(
self, property_instance, mock_cleaner, mock_photo_supply_lookup, mock_floor_area_decile_thresholds
):
# This shouldn't happen - it would mean a cleaning error # This shouldn't happen - it would mean a cleaning error
property_instance.search_address_epc()
property_instance.data["roof-description"] = "Roof Description" property_instance.data["roof-description"] = "Roof Description"
cleaned = { cleaned = {
"roof-description": [ "roof-description": [
@ -345,10 +369,10 @@ class TestProperty:
# Verify that ValueError is raised when multiple attributes are found # Verify that ValueError is raised when multiple attributes are found
with pytest.raises(ValueError, match="Either No attributes or multiple found for roof-description"): with pytest.raises(ValueError, match="Either No attributes or multiple found for roof-description"):
property_instance.get_components(cleaned) property_instance.get_components(cleaned, mock_photo_supply_lookup, mock_floor_area_decile_thresholds)
def test_set_spatial(self, mock_epc_client): def test_set_spatial(self):
prop = Property(1, "AB12CD", "Test Address", mock_epc_client) prop = Property(1, postcode="AB12CD", address="Test Address")
spatial1 = pd.DataFrame([{ spatial1 = pd.DataFrame([{
'X_COORDINATE': 411143.0, 'Y_COORDINATE': 281701.0, 'LATITUDE': 52.4331896, 'LONGITUDE': -1.8375238, 'X_COORDINATE': 411143.0, 'Y_COORDINATE': 281701.0, 'LATITUDE': 52.4331896, 'LONGITUDE': -1.8375238,
@ -362,7 +386,7 @@ class TestProperty:
assert prop.is_heritage assert prop.is_heritage
assert prop.restricted_measures assert prop.restricted_measures
prop2 = Property(1, "AB12CD", "Test Address", mock_epc_client) prop2 = Property(1, "AB12CD", "Test Address")
spatial2 = pd.DataFrame([{ spatial2 = pd.DataFrame([{
'X_COORDINATE': 411143.0, 'Y_COORDINATE': 281701.0, 'LATITUDE': 52.4331896, 'LONGITUDE': -1.8375238, 'X_COORDINATE': 411143.0, 'Y_COORDINATE': 281701.0, 'LATITUDE': 52.4331896, 'LONGITUDE': -1.8375238,
@ -376,10 +400,10 @@ class TestProperty:
assert not prop2.is_heritage assert not prop2.is_heritage
assert not prop2.restricted_measures assert not prop2.restricted_measures
def test_set_floor_level(self, mock_epc_client): def test_set_floor_level(self):
# In this case, we have a flat which looks looks it's on the first floor, but it's actually on the ground # In this case, we have a flat which looks looks it's on the first floor, but it's actually on the ground
# floor, so we should set floor_level to 0 # floor, so we should set floor_level to 0
prop = Property(1, "AB12CD", "Test Address", mock_epc_client) prop = Property(1, postcode="AB12CD", address="Test Address")
prop.data = {'floor-level': '01', 'property-type': 'Flat'} prop.data = {'floor-level': '01', 'property-type': 'Flat'}
prop.floor = { prop.floor = {
'original_description': 'Solid, no insulation (assumed)', 'clean_description': 'Solid, no insulation', 'original_description': 'Solid, no insulation (assumed)', 'clean_description': 'Solid, no insulation',
@ -395,7 +419,7 @@ class TestProperty:
# This property is labelled as being on the ground floor but actually has another property below # This property is labelled as being on the ground floor but actually has another property below
# so we set floor level to 1 # so we set floor level to 1
prop2 = Property(1, "AB12CD", "Test Address", mock_epc_client) prop2 = Property(1, postcode="AB12CD", address="Test Address")
prop2.data = {'floor-level': 'Ground', 'property-type': 'Flat'} prop2.data = {'floor-level': 'Ground', 'property-type': 'Flat'}
prop2.floor = { prop2.floor = {
'original_description': '(Another dwelling below)', 'clean_description': 'Solid, no insulation', 'original_description': '(Another dwelling below)', 'clean_description': 'Solid, no insulation',
@ -410,7 +434,7 @@ class TestProperty:
assert prop2.floor_level == 1 assert prop2.floor_level == 1
# this property is correctly labelled as being on the 2nd floor # this property is correctly labelled as being on the 2nd floor
prop3 = Property(1, "AB12CD", "Test Address", mock_epc_client) prop3 = Property(1, postcode="AB12CD", address="Test Address")
prop3.data = {'floor-level': '02', 'property-type': 'Flat'} prop3.data = {'floor-level': '02', 'property-type': 'Flat'}
prop3.floor = { prop3.floor = {
'original_description': '(Another dwelling below)', 'clean_description': 'Solid, no insulation', 'original_description': '(Another dwelling below)', 'clean_description': 'Solid, no insulation',
@ -425,7 +449,7 @@ class TestProperty:
assert prop3.floor_level == 2 assert prop3.floor_level == 2
# Example of a house # Example of a house
prop4 = Property(1, "AB12CD", "Test Address", mock_epc_client) prop4 = Property(1, postcode="AB12CD", address="Test Address")
prop4.data = {'floor-level': '', 'property-type': 'House'} prop4.data = {'floor-level': '', 'property-type': 'House'}
prop4.floor = { prop4.floor = {
'original_description': '(Another dwelling below)', 'clean_description': 'Solid, no insulation', 'original_description': '(Another dwelling below)', 'clean_description': 'Solid, no insulation',

View file

@ -2,13 +2,11 @@ from backend.Property import Property
from etl.epc.DataProcessor import DataProcessor from etl.epc.DataProcessor import DataProcessor
from backend.app.plan.utils import create_recommendation_scoring_data, get_cleaned from backend.app.plan.utils import create_recommendation_scoring_data, get_cleaned
from etl.epc.settings import COLUMNS_TO_MERGE_ON from etl.epc.settings import COLUMNS_TO_MERGE_ON
from epc_api.client import EpcClient
import pandas as pd import pandas as pd
import pytest import pytest
import msgpack import msgpack
from utils.s3 import read_dataframe_from_s3_parquet, read_from_s3 from utils.s3 import read_dataframe_from_s3_parquet, read_from_s3
from tqdm import tqdm
# Handy code for selecting testing data # Handy code for selecting testing data
@ -122,7 +120,21 @@ class TestSapModelPrep:
cleaned = msgpack.unpackb(cleaned, raw=False) cleaned = msgpack.unpackb(cleaned, raw=False)
return cleaned return cleaned
def test_fill_cavity_wall(self, cleaned, cleaning_data): @pytest.fixture
def photo_supply_lookup(self):
photo_supply_lookup = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev", file_key="solar_pv_supply/photo_supply_lookup.parquet",
)
return photo_supply_lookup
@pytest.fixture
def floor_area_decile_thresholds(self):
floor_area_decile_thresholds = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev", file_key="solar_pv_supply/floor_area_decile_thresholds.parquet",
)
return floor_area_decile_thresholds
def test_fill_cavity_wall(self, cleaned, cleaning_data, photo_supply_lookup, floor_area_decile_thresholds):
""" """
We ensure that the process that prepares the data in the engine code results in the same data as We ensure that the process that prepares the data in the engine code results in the same data as
the model is trained on the model is trained on
@ -288,11 +300,10 @@ class TestSapModelPrep:
home = Property( home = Property(
id=0, id=0,
postcode=starting_epc["postcode"], postcode=starting_epc["postcode"],
address1=starting_epc["address1"], address=starting_epc["address1"],
epc_client=EpcClient(auth_token="notoken"),
data=starting_epc data=starting_epc
) )
home.get_components(cleaned) home.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
data_processor = DataProcessor(None, newdata=True) data_processor = DataProcessor(None, newdata=True)
data_processor.insert_data(pd.DataFrame([home.get_model_data()])) data_processor.insert_data(pd.DataFrame([home.get_model_data()]))
@ -356,7 +367,7 @@ class TestSapModelPrep:
assert test_record[c].values[0] == row[c] assert test_record[c].values[0] == row[c]
def test_internal_wall_insulation(self, cleaned, cleaning_data): def test_internal_wall_insulation(self, cleaned, cleaning_data, photo_supply_lookup, floor_area_decile_thresholds):
starting_epc2 = { starting_epc2 = {
'low-energy-fixed-light-count': '2', 'address': 'FLAT 12, WAREHOUSE W, 3 WESTERN GATEWAY', 'low-energy-fixed-light-count': '2', 'address': 'FLAT 12, WAREHOUSE W, 3 WESTERN GATEWAY',
@ -508,11 +519,10 @@ class TestSapModelPrep:
home2 = Property( home2 = Property(
id=0, id=0,
postcode=starting_epc2["postcode"], postcode=starting_epc2["postcode"],
address1=starting_epc2["address1"], address=starting_epc2["address1"],
epc_client=EpcClient(auth_token="notoken"),
data=starting_epc2 data=starting_epc2
) )
home2.get_components(cleaned) home2.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
home2.set_number_lighting_outlets(None) home2.set_number_lighting_outlets(None)
data_processor2 = DataProcessor(None, newdata=True) data_processor2 = DataProcessor(None, newdata=True)
@ -578,7 +588,7 @@ class TestSapModelPrep:
assert test_record2[c].values[0] == row2[c] assert test_record2[c].values[0] == row2[c]
def test_ventilation(self, cleaned, cleaning_data): def test_ventilation(self, cleaned, cleaning_data, photo_supply_lookup, floor_area_decile_thresholds):
starting_epc3 = { starting_epc3 = {
'low-energy-fixed-light-count': '', 'address': '45 Shepperson Road', 'uprn-source': 'Energy Assessor', 'low-energy-fixed-light-count': '', 'address': '45 Shepperson Road', 'uprn-source': 'Energy Assessor',
@ -728,11 +738,10 @@ class TestSapModelPrep:
home3 = Property( home3 = Property(
id=0, id=0,
postcode=starting_epc3["postcode"], postcode=starting_epc3["postcode"],
address1=starting_epc3["address1"], address=starting_epc3["address1"],
epc_client=EpcClient(auth_token="notoken"),
data=starting_epc3 data=starting_epc3
) )
home3.get_components(cleaned) home3.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
home3.set_number_lighting_outlets(None) home3.set_number_lighting_outlets(None)
data_processor3 = DataProcessor(None, newdata=True) data_processor3 = DataProcessor(None, newdata=True)
@ -782,7 +791,7 @@ class TestSapModelPrep:
assert test_record3[c].values[0] == row3[c] assert test_record3[c].values[0] == row3[c]
def test_fireplaces(self, cleaned, cleaning_data): def test_fireplaces(self, cleaned, cleaning_data, photo_supply_lookup, floor_area_decile_thresholds):
starting_epc4 = { starting_epc4 = {
'low-energy-fixed-light-count': '', 'address': '9 Glebe Road, Asfordby Hill', 'low-energy-fixed-light-count': '', 'address': '9 Glebe Road, Asfordby Hill',
@ -937,11 +946,10 @@ class TestSapModelPrep:
home4 = Property( home4 = Property(
id=0, id=0,
postcode=starting_epc4["postcode"], postcode=starting_epc4["postcode"],
address1=starting_epc4["address1"], address=starting_epc4["address1"],
epc_client=EpcClient(auth_token="notoken"),
data=starting_epc4 data=starting_epc4
) )
home4.get_components(cleaned) home4.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
home4.set_number_lighting_outlets(None) home4.set_number_lighting_outlets(None)
data_processor4 = DataProcessor(None, newdata=True) data_processor4 = DataProcessor(None, newdata=True)

View file

@ -75,6 +75,7 @@ def app():
ewi_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="external_wall_insulation", header=0) ewi_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="external_wall_insulation", header=0)
lel_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="low_energy_lighting", header=0) lel_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="low_energy_lighting", header=0)
flat_roof_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="flat_roof_insulation", header=0) flat_roof_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="flat_roof_insulation", header=0)
window_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="window_glazing", header=0)
# Form a single table to be uploaded # Form a single table to be uploaded
costs = pd.concat( costs = pd.concat(

View file

@ -33,6 +33,7 @@ class Eligibility:
# If the loft has less than 100mm of insulation, we classify the home has needing loft insulation # If the loft has less than 100mm of insulation, we classify the home has needing loft insulation
LOFT_INSULATION_THRESHOLD = 100 LOFT_INSULATION_THRESHOLD = 100
HIGH_LOFT_INSULATION_THRESHOLD = 269
# Because EPCS have different values for tenure, we need to remap them to a common set of values # Because EPCS have different values for tenure, we need to remap them to a common set of values
tenure_remap = { tenure_remap = {
@ -104,6 +105,8 @@ class Eligibility:
self.LOFT_INSULATION_THRESHOLD if loft_thickness_threshold is None else loft_thickness_threshold self.LOFT_INSULATION_THRESHOLD if loft_thickness_threshold is None else loft_thickness_threshold
) )
high_loft_thickness_threshold = self.HIGH_LOFT_INSULATION_THRESHOLD
# We firstly check if the roof is a loft # We firstly check if the roof is a loft
is_loft = self.roof["is_pitched"] and (not self.roof["is_roof_room"]) is_loft = self.roof["is_pitched"] and (not self.roof["is_roof_room"])
@ -122,7 +125,22 @@ class Eligibility:
is_flat=self.roof["is_flat"] is_flat=self.roof["is_flat"]
) )
if insulation_thickness > loft_thickness_threshold: if insulation_thickness <= loft_thickness_threshold:
self.loft = {
"suitability": True,
"thickness": insulation_thickness,
"reason": None
}
if insulation_thickness <= high_loft_thickness_threshold:
self.loft = {
"suitability": True,
"thickness": insulation_thickness,
"reason": "high loft thickness but below regulation"
}
return
if insulation_thickness > high_loft_thickness_threshold:
# Insulation is already thick enough # Insulation is already thick enough
self.loft = { self.loft = {
"suitability": False, "suitability": False,
@ -131,12 +149,6 @@ class Eligibility:
} }
return return
self.loft = {
"suitability": True,
"thickness": insulation_thickness,
"reason": None
}
def cavity_insulation(self): def cavity_insulation(self):
""" """
@ -152,9 +164,25 @@ class Eligibility:
is_partial_filled = ( is_partial_filled = (
self.walls["is_as_built"] and self.walls["insulation_thickness"] not in ["below average"] self.walls["is_as_built"] and self.walls["insulation_thickness"] not in ["below average"]
) )
# We look for potentially under performing cavities - anything that is assumed, as built and insulated
is_underperforming = (
self.walls["is_as_built"] and self.walls["insulation_thickness"] in ["average"] and self.walls["is_assumed"]
)
is_unfilled_cavity = is_cavity and is_empty is_unfilled_cavity = is_cavity and is_empty
is_partial_filled_cavity = is_cavity and is_partial_filled is_partial_filled_cavity = is_cavity and is_partial_filled
is_underperforming_cavity = is_cavity and is_underperforming
# Check if it has internal or external wall insulation
has_internal_wall_insulation = self.walls["internal_insulation"]
has_external_wall_insulation = self.walls["external_insulation"]
if has_internal_wall_insulation or has_external_wall_insulation:
self.cavity = {
"suitability": False,
"type": "internal or external wall insulation"
}
return
if is_unfilled_cavity: if is_unfilled_cavity:
self.cavity = { self.cavity = {
@ -170,6 +198,13 @@ class Eligibility:
} }
return return
if is_underperforming_cavity:
self.cavity = {
"suitability": True,
"type": "underperforming"
}
return
self.cavity = { self.cavity = {
"suitability": False, "suitability": False,
"type": "full" "type": "full"
@ -223,6 +258,14 @@ class Eligibility:
} }
def suspended_floor_insulation(self): def suspended_floor_insulation(self):
if "no_data" in self.floor.keys():
if self.floor["no_data"]:
self.suspended_floor = {
"suitability": False,
}
return
is_suspended = self.floor["is_suspended"] is_suspended = self.floor["is_suspended"]
is_insulated = self.floor["insulation_thickness"] in ["average", "above average"] is_insulated = self.floor["insulation_thickness"] in ["average", "above average"]
@ -232,6 +275,14 @@ class Eligibility:
return return
def solid_floor_insulation(self): def solid_floor_insulation(self):
if "no_data" in self.floor.keys():
if self.floor["no_data"]:
self.solid_floor = {
"suitability": False,
}
return
is_solid = self.floor["is_solid"] is_solid = self.floor["is_solid"]
is_insulated = self.floor["insulation_thickness"] in ["average", "above average"] is_insulated = self.floor["insulation_thickness"] in ["average", "above average"]
@ -305,7 +356,8 @@ class Eligibility:
""" """
current_sap = int(self.epc["current-energy-efficiency"]) current_sap = int(self.epc["current-energy-efficiency"])
if current_sap > 54:
if current_sap >= 69:
self.eco4_warmfront = { self.eco4_warmfront = {
"eligible": False, "eligible": False,
"message": "sap too high" "message": "sap too high"
@ -319,9 +371,22 @@ class Eligibility:
is_eligible = self.cavity["suitability"] & self.loft["suitability"] is_eligible = self.cavity["suitability"] & self.loft["suitability"]
if post_retrofit_sap is None: if post_retrofit_sap is None:
if current_sap >= 55:
message = "Possibly eligible but property currently EPC D"
else:
message = "subject to post retrofit sap" if is_eligible else "not eligible"
# Update the message to flag properties that failed just because of a full cavity.
# We need to double check that the wall is a cavity, that the loft is suitable and that the
# sap is within reason
# We can then estimate the age of the cavity fill
if not is_eligible and (current_sap < 69) and self.loft["suitability"] and self.walls["is_cavity_wall"]:
message = "Failed due to full cavity - check cavity age"
self.eco4_warmfront = { self.eco4_warmfront = {
"eligible": is_eligible, "eligible": is_eligible,
"message": "subject to post retrofit sap" "message": message
} }
return return

View file

@ -11,13 +11,12 @@ import numpy as np
import msgpack import msgpack
from datetime import datetime, timedelta from datetime import datetime, timedelta
from utils.logger import setup_logger from utils.logger import setup_logger
from utils.s3 import read_from_s3 from utils.s3 import read_from_s3, read_dataframe_from_s3_parquet
from dotenv import load_dotenv from dotenv import load_dotenv
from backend.SearchEpc import SearchEpc from backend.SearchEpc import SearchEpc
from backend.Property import Property from backend.Property import Property
from etl.eligibility.Eligibility import Eligibility from etl.eligibility.Eligibility import Eligibility
from etl.epc.DataProcessor import DataProcessor from etl.epc.DataProcessor import DataProcessor
from backend.app.utils import read_parquet_from_s3
from backend.app.plan.utils import create_recommendation_scoring_data from backend.app.plan.utils import create_recommendation_scoring_data
from etl.epc.settings import COLUMNS_TO_MERGE_ON from etl.epc.settings import COLUMNS_TO_MERGE_ON
from backend.ml_models.api import ModelApi from backend.ml_models.api import ModelApi
@ -247,6 +246,8 @@ def merge_ha_15(asset_list, identified_addresses):
identified_addresses = identified_addresses.drop_duplicates("merge_key") identified_addresses = identified_addresses.drop_duplicates("merge_key")
# We pull out raw counts for the survey lists
# Check asset list for dupes # Check asset list for dupes
asset_list_dupes = asset_list["merge_key"].duplicated() asset_list_dupes = asset_list["merge_key"].duplicated()
if asset_list_dupes.sum(): if asset_list_dupes.sum():
@ -336,7 +337,10 @@ def merge_ha_15(asset_list, identified_addresses):
return merged_data, dropped_identified_merge_keys return merged_data, dropped_identified_merge_keys
def prepare_model_data_row(property_id, modelling_epc, cleaned, cleaning_data, created_at): def prepare_model_data_row(
property_id, modelling_epc, cleaned, cleaning_data, created_at,
photo_supply_lookup, floor_area_decile_thresholds, old_data=None, full_sap_epc=None,
):
""" """
This function prepares the data for modelling, in the same fashion as the recommendation engine This function prepares the data for modelling, in the same fashion as the recommendation engine
With up-coming refactoring, this will change With up-coming refactoring, this will change
@ -346,15 +350,24 @@ def prepare_model_data_row(property_id, modelling_epc, cleaned, cleaning_data, c
p = Property( p = Property(
id=property_id, id=property_id,
postcode=modelling_epc["postcode"], postcode=modelling_epc["postcode"],
address1=modelling_epc["address1"], address=modelling_epc["address1"],
epc_client=None, data=modelling_epc,
data=modelling_epc old_data=old_data,
full_sap_epc=full_sap_epc
) )
p.get_components(cleaned) p.get_components(cleaned, photo_supply_lookup=photo_supply_lookup,
floor_area_decile_thresholds=floor_area_decile_thresholds)
# THIS IS TEMP AND SHOULDN'T BE HERE
data_to_clean = p.get_model_data()
if data_to_clean["NUMBER_HEATED_ROOMS"] in ['', None]:
data_to_clean["NUMBER_HEATED_ROOMS"] = data_to_clean["NUMBER_HABITABLE_ROOMS"]
p.data["number-heated-rooms"] = data_to_clean["NUMBER_HABITABLE_ROOMS"]
# This is temp - this should happen after scoring # This is temp - this should happen after scoring
cleaned_property_data = DataProcessor.apply_averages_cleaning( cleaned_property_data = DataProcessor.apply_averages_cleaning(
data_to_clean=pd.DataFrame([dict(**p.get_model_data(), LOCAL_AUTHORITY=p.data["local-authority"])]), data_to_clean=pd.DataFrame([dict(**data_to_clean, LOCAL_AUTHORITY=p.data["local-authority"])]),
cleaning_data=cleaning_data, cleaning_data=cleaning_data,
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'], cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
) )
@ -829,6 +842,18 @@ def analyse_ha_32_results(results, ha32, no_house_numbers):
results_df["warmfront_identified"] results_df["warmfront_identified"]
] ]
# Aggregates of no eco and gbis jobs identified
n_eco = results_df["eco4_eligible"].sum()
# Gbis is rows where eco4 is not eligible
n_gbis = results_df[
(results_df["gbis_eligible"] == True) & (results_df["eco4_eligible"] == False)
]["gbis_eligible"].sum()
pipeline_potential = results_df[
(results_df["warmfront_identified"] == True) | (results_df["eco4_eligible"] == True) | (
results_df["gbis_eligible"] == True)
]
success_rate = warmfront_identified["gbis_eligible"].sum() / warmfront_identified.shape[0] success_rate = warmfront_identified["gbis_eligible"].sum() / warmfront_identified.shape[0]
# For HA32, this is 89% # For HA32, this is 89%
@ -886,8 +911,16 @@ def analyse_ha_32_results(results, ha32, no_house_numbers):
new_possibilities = results_df[ new_possibilities = results_df[
(~results_df["warmfront_identified"]) & (~results_df["warmfront_identified"]) &
(results_df["gbis_eligible"] | results_df["eco4_eligible"]) & (results_df["gbis_eligible"] | results_df["eco4_eligible"])
(results_df["tenure"] == "Rented (social)") ].copy()
new_possibilities_eco = results_df[
(~results_df["warmfront_identified"]) &
(results_df["eco4_eligible"] == True)
].copy()
new_possibilities_gbis = results_df[
(~results_df["warmfront_identified"]) &
(results_df["eco4_eligible"] == False) & (results_df["gbis_eligible"] == True)
].copy() ].copy()
future_possibilities_eco = results_df[ future_possibilities_eco = results_df[
@ -947,6 +980,8 @@ def analyse_ha_15_results(results_df, ha15, no_house_numbers):
results_df["warmfront_identified"] results_df["warmfront_identified"]
] ]
warmfront_identified = warmfront_identified
n_identified = (warmfront_identified["gbis_eligible"] | warmfront_identified["eco4_eligible"]).sum() n_identified = (warmfront_identified["gbis_eligible"] | warmfront_identified["eco4_eligible"]).sum()
success_rate = n_identified / warmfront_identified.shape[0] success_rate = n_identified / warmfront_identified.shape[0]
@ -955,6 +990,11 @@ def analyse_ha_15_results(results_df, ha15, no_house_numbers):
"eligibility_classification"].value_counts() "eligibility_classification"].value_counts()
# For HA15 this is 50.3% # For HA15 this is 50.3%
pipeline_potential = results_df[
(results_df["warmfront_identified"] == True) | (results_df["eco4_eligible"] == True) | (
results_df["gbis_eligible"] == True)
]
# of the properties we identify, what is the mix of confidenc # of the properties we identify, what is the mix of confidenc
missed = results_df[ missed = results_df[
@ -973,32 +1013,37 @@ def analyse_ha_15_results(results_df, ha15, no_house_numbers):
missed["sap"] < 69 missed["sap"] < 69
] ]
sap_low_enough["walls"].value_counts() # Aggregates of no eco and gbis jobs identified
z = ha15[ha15["row_id"].isin(sap_too_high["row_id"].values)] n_eco = results_df["eco4_eligible"].sum()
# Gbis is rows where eco4 is not eligible
investigate_1 = ha15[ha15["row_id"].isin(sap_too_high["row_id"])][ n_gbis = results_df[
["row_id", "Postcode", "Address Line 1", "Address Line 2", "Address Line 3"]] (results_df["gbis_eligible"] == True) & (results_df["eco4_eligible"] == False)
]["gbis_eligible"].sum()
investigate_2 = ha15[ha15["row_id"].isin(sap_low_enough["row_id"])][
["row_id", "Postcode", "Address Line 1", "Address Line 2", "Address Line 3"]]
missed["message"].value_counts()
# We now look for properties that we identified, that were not identified by Warmfront # We now look for properties that we identified, that were not identified by Warmfront
new_possibilities = results_df[ new_possibilities = results_df[
(~results_df["warmfront_identified"]) & (~results_df["warmfront_identified"]) &
((results_df["gbis_eligible"] == True) | (results_df["eco4_eligible"] == True)) & ((results_df["gbis_eligible"] == True) | (results_df["eco4_eligible"] == True))
(results_df["tenure"] == "Rented (social)") ].copy()
new_possibilities_eco = results_df[
(~results_df["warmfront_identified"]) &
(results_df["eco4_eligible"] == True)
].copy()
new_possibilities_gbis = results_df[
(~results_df["warmfront_identified"]) &
(results_df["eco4_eligible"] == False) & (results_df["gbis_eligible"] == True)
].copy() ].copy()
# These are future possibilityies # These are future possibilityies
new_possibilities_eco = results_df[ future_possibilities_eco = results_df[
(~results_df["warmfront_identified"]) & (~results_df["warmfront_identified"]) &
(results_df["eco4_eligible_future"] == True) & (~(results_df["gbis_eligible"] | results_df["eco4_eligible"])) (results_df["eco4_eligible_future"] == True) & (~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
].copy() ].copy()
new_possibilities_gbis = results_df[ future_possibilities_gbis = results_df[
(~results_df["warmfront_identified"]) & (~results_df["warmfront_identified"]) &
(results_df["gbis_eligible_future"] == True) & (results_df["eco4_eligible_future"] == False) & ( (results_df["gbis_eligible_future"] == True) & (results_df["eco4_eligible_future"] == False) & (
~(results_df["gbis_eligible"] | results_df["eco4_eligible"])) ~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
@ -1058,7 +1103,7 @@ def app():
) )
cleaned = msgpack.unpackb(cleaned, raw=False) cleaned = msgpack.unpackb(cleaned, raw=False)
cleaning_data = read_parquet_from_s3( cleaning_data = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet", bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
) )

View file

@ -0,0 +1,647 @@
import os
import msgpack
import openpyxl
from pathlib import Path
from datetime import datetime
import pandas as pd
import numpy as np
from utils.s3 import read_from_s3
from utils.logger import setup_logger
from dotenv import load_dotenv
from utils.s3 import read_dataframe_from_s3_parquet
from tqdm import tqdm
from backend.SearchEpc import SearchEpc
from etl.eligibility.Eligibility import Eligibility
from etl.eligibility.ha_15_32.app import prepare_model_data_row
from etl.epc.DataProcessor import DataProcessor
from etl.epc.settings import COLUMNS_TO_MERGE_ON
from backend.ml_models.api import ModelApi
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
from recommendations.recommendation_utils import calculate_cavity_age
from recommendation_utils import convert_thickness_to_numeric
import re
ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
logger = setup_logger()
load_dotenv(ENV_FILE)
def load_data():
# This asset list is spread across two sheets, which we need to combine
asset_list_filenames = [
"HESTIA - HA 16 ASSET LIST PART 1 OF 2.xlsx",
"HESTIA - HA 16 ASSET LIST PART 2 OF 2.xlsx",
]
# Prepare lists to collect rows data and their colors
rows_data = []
rows_colors = []
colnames = []
for asset_list_filename in asset_list_filenames:
workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/{asset_list_filename}')
sheet = workbook.active
sheet_colnames = [cell.value for cell in sheet[1]]
colnames.append(sheet_colnames)
for row in sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
row_data = [cell.value for cell in row] # This will get you the cell values
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
# row_color = COLOR_INDEX[row_color]
rows_data.append(row_data)
rows_colors.append(row_color)
asset_list = pd.DataFrame(rows_data, columns=colnames[0])
# Remove None columns
asset_list = asset_list.iloc[:, 0:12]
asset_list['row_color'] = rows_colors
asset_list["row_colour_name"] = np.where(
asset_list["row_color"] == "FFFF0000", "red",
np.where(asset_list["row_color"] == "FF92D050", "green", "yellow")
)
# Split up the address on commas, which is useful for matching later
split_addresses = asset_list['Address'].str.split(',', expand=True)
split_addresses.columns = ['temp', 'address2', 'address3', 'address4', 'address5']
asset_list = pd.concat([asset_list, split_addresses], axis=1)
# There is no commas separating house number and address 1
split_addresses2 = asset_list['temp'].str.split(' ', expand=True)
split_addresses2.columns = ['HouseNo', 'part1', 'part2', "part3", "part4"]
# We could re-concatenate but we only care about HouseNo for the moment
asset_list = pd.concat([asset_list, split_addresses2[["HouseNo"]]], axis=1)
# We now read in the survey list
survey_workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/HESTIA- HA 16 ECO4 SURVEY LIST.xlsx')
survey_sheet = survey_workbook.active
survey_rows = []
survey_colors = []
for row in survey_sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
row_data = [cell.value for cell in row] # This will get you the cell values
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
# row_color = COLOR_INDEX[row_color]
survey_rows.append(row_data)
survey_colors.append(row_color)
survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
# For the survey list, we don't need the colours, since there is a column called "INSTALLED OR CANCELLED"
# which describes the status of the property
survey_list["row_colour"] = survey_colors
survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(survey_list))]
# Tidy up the street/block name a bit
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("/", ", ")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.lower()
survey_list["Street / Block Name"] = np.where(
survey_list["Street / Block Name"] == "REEDS RD",
"Reeds ROAD",
survey_list["Street / Block Name"]
)
# Replace " rd " with "road"
survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(r'\brd\b', 'road', regex=True)
# Replace " , " with ", "
survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(
" , ", ', ',
)
# Fix "{place} ,{place}" with "{place}, {place}"
survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(r'\s*,\s*', ', ', regex=True)
# Strip whitespace
survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.strip()
# Correct errors
survey_list["Post Code"] = np.where(
survey_list["Post Code"] == "M38 0SA",
"M38 9SA",
survey_list["Post Code"]
)
survey_list["Post Code"] = np.where(
(survey_list["Street / Block Name"] == "nelson drive") & (survey_list["Post Code"] == "M44 5JE"),
"M44 5JF",
survey_list["Post Code"]
)
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("eccels", "eccles")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("chatley, road", "chatley road")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("vaughen", "Vaughan")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("cresent", "crescent")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("plantation road",
"plantation avenue")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("how clough drive",
"howclough drive")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brockhurst lane",
"brookhurst lane")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("biirch road",
"birch road")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hadson road",
"hodson road")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("harbonne avennue",
"narbonne avenue")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("cumberland road, cadishead",
"cumberland avenue, cadishead")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("aston field drive",
"ashton field drive")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("wedgewood road",
"wedgwood road")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hamilton close",
"hamilton avenue")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("lichens crescent, fitton hill",
"lichens crescent")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("south croft, fitton hill",
"south croft")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(", fitton hill", "")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("firtree dr", "fir tree avenue")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hawthorne road",
"hawthorn crescent")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("rein lee avenue",
"reins lee avenue")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("westerhill road",
"wester hill road")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("st martins road",
"saint martins road")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("timperley avenue",
"timperley close")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("eastwood road",
"eastwood avenue")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("new road", "new street")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("grassmere road",
"grasmere road")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hulton road",
"hulton avenue")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("beechfield avenue",
"beechfield road")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("princess avenue",
"princes avenue")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("edge ford crecent",
"edge fold crescent")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("conniston avenue",
"coniston avenue")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("blackthorne crescent",
"blackthorn crescent")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("wellstock road",
"wellstock lane")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brackley avenue",
"brackley street")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brook avenue swinton",
"brook avenue, swinton")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("green avenue swinton",
"green avenue, swinton")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("grasmere avenue wardley",
"grasmere avenue, wardley")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("mardale avenue wardle",
"mardale avenue, wardle")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("carleach grove",
"cartleach Grove")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("arbour grove",
"arbor Grove")
# Replacement for clively avenue 66-68
survey_list["NO."] = np.where(
survey_list["NO."] == "66-68",
"66",
survey_list["NO."]
)
# asset_list[asset_list["Address"].str.lower().str.contains("clively")]
# We now need to merge the survey list onto the asset list
# Could be easier just to do a search on each row, even though it's much slower
matched = []
for _, row in tqdm(survey_list.iterrows(), total=len(survey_list)):
house_number = row["NO."]
if isinstance(house_number, str):
house_number = house_number.lower()
# Filter on the first line of the address
df = asset_list[asset_list["Address"].str.lower().str.contains(row["Street / Block Name"].lower())].copy()
# df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
df = df[df["Address"].str.lower().str.contains(str(house_number))]
if df.shape[0] != 1:
df = df[df["HouseNo"] == str(house_number)]
if df.shape[0] != 1:
df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
if df.shape[0] != 1:
raise ValueError("Investigate")
matched.append(
{
"survey_key": row["survey_key"],
"matched_address": df["Address"].values[0],
"survey_house_no": row["NO."],
"survey_street_name": row["Street / Block Name"],
"survey_postcode": row["Post Code"],
"survey_status": row["INSTALLED OR CANCELLED"]
}
)
matched = pd.DataFrame(matched)
matched["warmfront_identified"] = True
# Combine asset list and surveys
data = asset_list.merge(
matched, how="left", left_on="Address", right_on="matched_address",
)
data["warmfront_identified"] = data["warmfront_identified"].fillna(False)
return data, survey_list
def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
scoring_data = []
results = []
nodata = []
property_type_lookup = {
'Semi Detached Bungalow': {"property-type": "Bungalow", "built-form": "Semi-Detached"},
'Mid Terraced House': {"property-type": "House", "built-form": "Mid-Terrace"},
'End Terraced House': {"property-type": "House", "built-form": "End-Terrace"},
'Low Rise Flat': {"property-type": "Flat", "built-form": "Mid-Terrace"},
'Semi-Detached House': {"property-type": "House", "built-form": "Semi-Detached"},
'Detached Bungalow': {"property-type": "Bungalow", "built-form": "Detached"},
'End Terraced Bungalow': {"property-type": "Bungalow", "built-form": "End-Terrace"},
'Mid Terraced Bungalow': {"property-type": "Bungalow", "built-form": "Mid-Terrace"},
'Medium Rise Flat': {"property-type": "Flat", "built-form": "Mid-Terrace"},
'Detached House': {"property-type": "House", "built-form": "Detached"},
'Cottage Flat': {"property-type": "Flat", "built-form": "Semi-Detached"},
'Maisonette Medium Rise': {"property-type": "Flat", "built-form": "Mid-Terrace"},
'Maisonette Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"},
'End Terraced Town House': {"property-type": "House", "built-form": "End-Terrace"},
'Flat Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"},
'Mid Terraced Town House': {"property-type": "House", "built-form": "Mid-Terrace"},
}
for index, property_meta in tqdm(data.iterrows(), total=len(data)):
searcher = SearchEpc(
address1=property_meta["HouseNo"],
postcode=property_meta["Postcode"],
auth_token=EPC_AUTH_TOKEN,
os_api_key=None,
full_address=property_meta["Address"]
)
searcher.ordnance_survey_client.property_type = property_type_lookup[property_meta["Type"]]["property-type"]
searcher.ordnance_survey_client.built_form = property_type_lookup[property_meta["Type"]]["built-form"]
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
nodata.append(property_meta)
continue
if searcher.newest_epc.get("estimated"):
# We insert the row ID as our proxy for UPRN
proxy_uprn = int(property_meta["row_id"].split("_")[1])
searcher.newest_epc["uprn"] = proxy_uprn
newest_epc = searcher.newest_epc
older_epcs = searcher.older_epcs
full_sap_epc = searcher.full_sap_epc
# We also want to get the penultimate epc
penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
if not penultimate_epc:
penultimate_epc = newest_epc
eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
eligibility.check_gbis_warmfront()
eligibility.check_eco4_warmfront()
if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
eligibility.check_gbis_warmfront()
eligibility.check_eco4_warmfront()
# If this is the case, we need to update the older epcs
# We don't update just to make data cleaning easier
if penultimate_epc.get("estimated") is None:
older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
# If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity
# Loft MUST be suitable
cavity_age = None
if (
eligibility.walls["is_cavity_wall"] and
eligibility.walls["is_filled_cavity"] and
eligibility.loft["suitability"] and
eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
):
# We check the age of the cavity and if it's particularly old, we flag it
cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
# Full checks
eligibility.check_gbis()
eligibility.check_eco4()
if eligibility.eco4_warmfront["eligible"]:
if eligibility.epc["uprn"] == "":
eligibility.epc["uprn"] = int(property_meta["row_id"].split("_")[1])
scoring_dictionary = prepare_model_data_row(
property_id=property_meta["row_id"],
modelling_epc=eligibility.epc,
cleaned=cleaned,
cleaning_data=cleaning_data,
created_at=created_at,
old_data=older_epcs,
full_sap_epc=full_sap_epc,
photo_supply_lookup=photo_supply_lookup,
floor_area_decile_thresholds=floor_area_decile_thresholds
)
scoring_data.extend(scoring_dictionary)
results.append(
{
"row_id": property_meta["row_id"],
"uprn": eligibility.epc["uprn"],
"Address": property_meta["Address"],
"Postcode": property_meta["Postcode"],
"property_type": eligibility.epc["property-type"],
"gbis_eligible": eligibility.gbis_warmfront,
"eco4_eligible": eligibility.eco4_warmfront["eligible"],
"eco4_message": eligibility.eco4_warmfront["message"],
"sap": float(eligibility.epc["current-energy-efficiency"]),
"gbis_eligible_future": eligibility.gbis["eligible"],
"gbis_eligible_future_message": eligibility.gbis["message"],
"eco4_eligible_future": eligibility.eco4["eligible"],
"eco4_eligible_future_message": eligibility.eco4["message"],
# Property components
"roof": eligibility.roof["clean_description"],
"walls": eligibility.walls["clean_description"],
"cavity_type": eligibility.cavity["type"],
"heating": eligibility.epc["mainheat-description"],
"tenure": eligibility.tenure,
"date_epc": eligibility.epc["lodgement-date"],
"loft_thickness": eligibility.roof["insulation_thickness"],
"cavity_age": cavity_age,
**eligibility.walls,
**eligibility.roof,
}
)
scoring_df = pd.DataFrame(scoring_data)
# Perform the same cleaning as in the model - first clean number of room variables though
scoring_df = DataProcessor.apply_averages_cleaning(
data_to_clean=scoring_df,
cleaning_data=cleaning_data,
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
)
scoring_df = DataProcessor.apply_averages_cleaning(
data_to_clean=scoring_df,
cleaning_data=cleaning_data,
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
).drop(columns=["LOCAL_AUTHORITY"])
scoring_df = DataProcessor.clean_missings_after_description_process(
scoring_df,
ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
"insulation_thickness" in c) or ("ENERGY_EFF" in c)]
)
scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
scoring_df["UPRN"] = scoring_df["UPRN"].astype(int)
model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
all_predictions = model_api.predict_all(
df=scoring_df,
bucket="retrofit-data-dev",
prediction_buckets={
"sap_change_predictions": "retrofit-sap-predictions-dev",
"heat_demand_predictions": "retrofit-heat-predictions-dev",
"carbon_change_predictions": "retrofit-carbon-predictions-dev"
}
)
predictions = all_predictions["sap_change_predictions"].copy()
results_df = pd.DataFrame(results)
predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
results_df[["row_id", "sap"]], how="left", on="row_id"
)
predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
results_df = results_df.merge(
predictions[["sap_uplift", "row_id"]],
how="left",
on="row_id"
)
results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
eligibility_assessment = []
for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
# The upgrade requirements are dependent on the current SAP
# If the property is an F or G, it only needs to upgrade to an %
if row["sap"] <= 38:
if row["post_install_sap"] >= 57:
eligibility_classification = "highest confidence"
elif row["post_install_sap"] >= 55:
eligibility_classification = "high confidence"
elif row["post_install_sap"] >= 53:
eligibility_classification = "medium confidence"
else:
eligibility_classification = "unlikely"
else:
if row["post_install_sap"] >= 71:
eligibility_classification = "highest confidence"
elif row["post_install_sap"] >= 69:
eligibility_classification = "high confidence"
elif row["post_install_sap"] >= 67:
eligibility_classification = "medium confidence"
else:
eligibility_classification = "unlikely"
eligibility_assessment.append(
{
"row_id": row["row_id"],
"eligibility_classification": eligibility_classification
}
)
eligibility_assessment = pd.DataFrame(eligibility_assessment)
results_df = results_df.merge(
eligibility_assessment, how="left", on="row_id"
)
return results_df, scoring_data, nodata
def analyse_results(results_df, data, survey_list):
analysis_data = data[["row_id", "survey_key", "warmfront_identified", "row_colour_name"]].merge(
results_df, how="left", on="row_id"
).merge(
survey_list[["survey_key", survey_list.columns[0]]].rename(columns={survey_list.columns[0]: "funding_scheme"}),
how="left", on="survey_key"
)
analysis_data["roof_insulation_thickness"] = np.where(
pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
)
analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
)
warmfront_sold_eco4 = analysis_data[
(analysis_data["warmfront_identified"] == True) & (
analysis_data["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"]))
] # 1407
warmfront_sold_gbis = analysis_data[
(analysis_data["warmfront_identified"] == True) & (
analysis_data["funding_scheme"].isin(["ECO4 GBIS (ECO+)"]))
]
ideal_eco4_warmfront_not_sold = analysis_data[
(analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
analysis_data["roof_insulation_thickness_numeric"] <= 100)
]
secondary_eco4_warmfront_not_sold = analysis_data[
(analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
analysis_data["roof_insulation_thickness_numeric"] > 100)
]
# underperforming cavities
underperforming_cavities = analysis_data[
(analysis_data["eco4_message"] == "Failed due to full cavity - check cavity age") & (
analysis_data["cavity_age"] > 10 * 365
) & (analysis_data["roof_insulation_thickness_numeric"] <= 100)
]
identified_gbis_not_sold = analysis_data[
(analysis_data["gbis_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
analysis_data["eco4_eligible"] == False
)
]
eco_eligible = analysis_data[analysis_data["eco4_eligible"] == True]
eco_ineligible = analysis_data[analysis_data["eco4_eligible"] == False]
eco_ineligible["eco4_message"].value_counts()
# SAP too high:
sap_too_high = eco_ineligible[eco_ineligible["eco4_message"] == "sap too high"].copy()
further_possibilities = sap_too_high[
sap_too_high["walls"].isin(
[
"Cavity wall, as built, insulated",
"Cavity wall, as built, no insulation",
"Cavity wall, as built, partial insulation",
"Cavity wall, no insulation",
"Cavity wall, partial insulation"
]
)
]
filled_cavities = eco_ineligible[
eco_ineligible["eco4_message"] == "sap too high"
]
warmfront_identified = analysis_data[analysis_data["warmfront_identified"]]
warmfront_identified["walls"].value_counts()
all_identified_gbis = analysis_data[
(analysis_data["warmfront_identified"] & analysis_data["funding_scheme"].isin(
["ECO4 GBIS (ECO+)"])) |
(analysis_data["gbis_eligible"] & analysis_data["eco4_eligible"].isin([False, None]))
]
empty_cavity_desriptions = [
"Cavity wall, as built, no insulation", "Cavity wall, as built, partial insulation",
"Cavity wall, no insulation", "Cavity wall, partial insulation"
]
empty_cavities = analysis_data[analysis_data["walls"].isin(empty_cavity_desriptions)]
remaining_empty = empty_cavities[~empty_cavities["warmfront_identified"]]
warmfront_identified = analysis_data[analysis_data["warmfront_identified"]]
# Of the ECO jobs, what proportion to we get right
warmfront_identified_eco = warmfront_identified[
warmfront_identified["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"])
]
eco_success_rate = warmfront_identified_eco["eco4_eligible"].sum() / warmfront_identified_eco.shape[0]
warmfront_identified_gbis = warmfront_identified[
warmfront_identified["funding_scheme"].isin(["ECO4 GBIS (ECO+)"])
]
gbis_success_rate = warmfront_identified_gbis["gbis_eligible"].sum() / warmfront_identified_gbis.shape[0]
# Additional identified
additional_identified_eco = analysis_data[
(analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False)
]
additional_identified_eco["eligibility_classification"].value_counts()
additional_identified_gbis = analysis_data[
(analysis_data["gbis_eligible"] == True) & (analysis_data["eco4_eligible"] == False) & (
analysis_data["warmfront_identified"] == False
)
].shape[0]
# Future
additional_identified_eco_future = analysis_data[
(analysis_data["eco4_eligible_future"] == True) & (analysis_data["warmfront_identified"] == False)
].shape[0]
additional_identified_gbis_future = analysis_data[
(analysis_data["gbis_eligible_future"] == True) & (analysis_data["eco4_eligible_future"] == False) & (
analysis_data["warmfront_identified"] == False
)
].shape[0]
def app():
data, survey_list = load_data()
data["row_id"] = ["ha16_" + str(i) for i in range(0, len(data))]
cleaned = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-dev"
)
cleaned = msgpack.unpackb(cleaned, raw=False)
cleaning_data = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
)
created_at = datetime.now().isoformat()
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
results_df, scoring_data, nodata = get_epc_data(
data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds
)
# Store
# Old file was ha16.pickle
# import pickle
# with open("ha16_10_jan.pickle", "wb") as f:
# pickle.dump(
# {
# "scoring_data": scoring_data,
# "results": results_df,
# "nodata": nodata
# }, f
# )
# Read pickle
# import pickle
# with open("ha16_10_jan.pickle", "rb") as f:
# saved = pickle.load(f)
# scoring_data = saved["scoring_data"]
# results_df = saved["results"]
# nodata = saved["nodata"]

View file

@ -0,0 +1,524 @@
import os
import msgpack
import openpyxl
from pathlib import Path
from datetime import datetime
import pandas as pd
import numpy as np
from utils.s3 import read_from_s3, read_dataframe_from_s3_parquet
from utils.logger import setup_logger
from dotenv import load_dotenv
from tqdm import tqdm
from backend.SearchEpc import SearchEpc
from etl.eligibility.Eligibility import Eligibility
from etl.eligibility.ha_15_32.app import prepare_model_data_row
from etl.epc.DataProcessor import DataProcessor
from etl.epc.settings import COLUMNS_TO_MERGE_ON
from backend.ml_models.api import ModelApi
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
from recommendations.recommendation_utils import calculate_cavity_age
from recommendation_utils import convert_thickness_to_numeric
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
logger = setup_logger()
load_dotenv(ENV_FILE)
def load_data():
workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/HESTIA - HA 24 ASSET LIST.xlsx')
sheet = workbook.active
sheet_colnames = [cell.value for cell in sheet[1]]
rows_data = []
rows_colors = []
for row in sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
row_data = [cell.value for cell in row] # This will get you the cell values
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
# row_color = COLOR_INDEX[row_color]
rows_data.append(row_data)
rows_colors.append(row_color)
asset_list = pd.DataFrame(rows_data, columns=sheet_colnames)
# Remove None columns
asset_list = asset_list.iloc[:, 0:10]
asset_list['row_color'] = rows_colors
asset_list["row_colour_name"] = np.where(
asset_list["row_color"] == "FFFF0000", "red",
np.where(asset_list["row_color"] == "FF92D050", "green", "yellow")
)
asset_list["row_colour_code"] = np.where(
asset_list["row_colour_name"] == "red", "does not meet criteria",
np.where(asset_list["row_colour_name"] == "green", "identified potential eco", "maybe in the future")
)
# The third column is listed as "Address" but it's actually the postcode". We have two Address columns so we
# change just the third
asset_list.columns.values[2] = "Postcode"
# Split up the address on commas, which is useful for matching later
split_addresses = asset_list['Address'].str.split(',', expand=True)
split_addresses.columns = ['temp', 'address2', 'address3', 'address4', 'address5', 'address6']
asset_list = pd.concat([asset_list, split_addresses], axis=1)
# There is no commas separating house number and address 1
split_addresses2 = asset_list['temp'].str.split(' ', expand=True)
split_addresses2.columns = ['HouseNo', 'part1', 'part2', "part3", "part4"]
# We could re-concatenate but we only care about HouseNo for the moment
asset_list = pd.concat([asset_list, split_addresses2[["HouseNo"]]], axis=1)
# Read in surveys
survey_workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/HESTIA - HA 24 ECO4 SURVEY LIST.xlsx')
survey_sheet = survey_workbook.active
survey_rows = []
survey_colors = []
for row in survey_sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
row_data = [cell.value for cell in row] # This will get you the cell values
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
# row_color = COLOR_INDEX[row_color]
survey_rows.append(row_data)
survey_colors.append(row_color)
survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
survey_list["row_colour"] = survey_colors
survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(survey_list))]
# Tidy up the street/block name a bit
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("/", ", ")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.lower()
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.strip()
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
"council house, nidds lane", "nidds lane"
)
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
"wirral avenue", "wirrall avenue"
)
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
"st ives road", "st. ives crescent"
)
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
"sundringham road", "sandringham road"
)
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
"milton avenue", "milton road"
)
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
"st ives crescent", "st. ives crescent"
)
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
"council house, waterbelly lane", "waterbelly lane"
)
# Generally remove "councile house, " from the start of the street name
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
"council house, ", ""
)
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
"st. leodegars close", "st leodegars close"
)
# asset_list[asset_list["Address"].str.lower().str.contains("wirral")]["Address"]
# Drop all None rows
survey_list = survey_list[~pd.isnull(survey_list["Street / Block Name"])]
survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(survey_list))]
matched = []
for _, row in tqdm(survey_list.iterrows(), total=len(survey_list)):
house_number = row["NO."]
if isinstance(house_number, str):
house_number = house_number.lower()
# Filter on the first line of the address
df = asset_list[asset_list["Address"].str.lower().str.contains(row["Street / Block Name"].lower())].copy()
# df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
df = df[df["Address"].str.lower().str.contains(str(house_number))]
if df.shape[0] != 1:
df = df[df["HouseNo"] == str(house_number)]
if df.shape[0] != 1:
df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
if df.shape[0] != 1:
print(row["Street / Block Name"])
print(house_number)
print(row["Post Code"].lower())
raise ValueError("Investigate")
matched.append(
{
"survey_key": row["survey_key"],
"matched_address": df["Address"].values[0],
"survey_house_no": row["NO."],
"survey_street_name": row["Street / Block Name"],
"survey_postcode": row["Post Code"],
"survey_status": row["INSTALLED OR CANCELLED"]
}
)
matched = pd.DataFrame(matched)
matched["warmfront_identified"] = True
# Combine asset list and surveys
data = asset_list.merge(
matched, how="left", left_on="Address", right_on="matched_address",
)
data["warmfront_identified"] = data["warmfront_identified"].fillna(False)
return data, survey_list
def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
scoring_data = []
results = []
nodata = []
property_type_lookup = {
"01 HOUSE": "House",
"02 FLAT": "Flat",
"03 BUNGALOW": "Bungalow",
"05 BEDSIT": "Flat",
"04 MAISONETTE": "Maisonette",
"01 HOUSE MID": "House",
"10 PBUNGALOW": "Bungalow",
"14 SFLAT": "Flat",
"12 SBEDSIT": "Flat",
"11 PFLAT": "Flat",
"13 SBUNGALOW": "Bungalow",
" 01 HOUSE MID": "House",
"09 PBEDSIT": "Flat"
}
for _, property_meta in tqdm(data.iterrows(), total=len(data)):
searcher = SearchEpc(
address1=property_meta["HouseNo"],
postcode=property_meta["Postcode"],
auth_token=EPC_AUTH_TOKEN,
os_api_key=None,
full_address=property_meta["Address"]
)
searcher.ordnance_survey_client.property_type = property_type_lookup[property_meta["Property Type"]]
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
nodata.append(property_meta)
continue
newest_epc = searcher.newest_epc
older_epcs = searcher.older_epcs
full_sap_epc = searcher.full_sap_epc
# We also want to get the penultimate epc
penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
if not penultimate_epc:
penultimate_epc = newest_epc
eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
eligibility.check_gbis_warmfront()
eligibility.check_eco4_warmfront()
if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
eligibility.check_gbis_warmfront()
eligibility.check_eco4_warmfront()
# If this is the case, we need to update the older epcs
# older_epcs = [
# x for x in older_epcs if x["lmk-key"] not in [newest_epc["lmk-key"], penultimate_epc["lmk-key"]]
# ]
# If this is the case, we need to update the older epcs
# We don't update just to make data cleaning easier
if penultimate_epc.get("estimated") is None:
older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
# Loft MUST be suitable
cavity_age = None
if (
eligibility.walls["is_cavity_wall"] and
eligibility.walls["is_filled_cavity"] and
eligibility.loft["suitability"] and
eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
):
# We check the age of the cavity and if it's particularly old, we flag it
cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
# Full checks
eligibility.check_gbis()
eligibility.check_eco4()
if eligibility.eco4_warmfront["eligible"]:
if eligibility.epc["uprn"] in ["", None]:
eligibility.epc["uprn"] = int(property_meta["row_id"].split("_")[1])
scoring_dictionary = prepare_model_data_row(
property_id=property_meta["row_id"],
modelling_epc=eligibility.epc,
cleaned=cleaned,
cleaning_data=cleaning_data,
created_at=created_at,
old_data=older_epcs,
full_sap_epc=full_sap_epc,
photo_supply_lookup=photo_supply_lookup,
floor_area_decile_thresholds=floor_area_decile_thresholds
)
scoring_data.extend(scoring_dictionary)
results.append(
{
"row_id": property_meta["row_id"],
"uprn": eligibility.epc["uprn"],
"Address": property_meta["Address"],
"Postcode": property_meta["Postcode"],
"property_type": eligibility.epc["property-type"],
"gbis_eligible": eligibility.gbis_warmfront,
"eco4_eligible": eligibility.eco4_warmfront["eligible"],
"eco4_message": eligibility.eco4_warmfront["message"],
"sap": float(eligibility.epc["current-energy-efficiency"]),
"gbis_eligible_future": eligibility.gbis["eligible"],
"gbis_eligible_future_message": eligibility.gbis["message"],
"eco4_eligible_future": eligibility.eco4["eligible"],
"eco4_eligible_future_message": eligibility.eco4["message"],
# Property components
"roof": eligibility.roof["clean_description"],
"walls": eligibility.walls["clean_description"],
"cavity_type": eligibility.cavity["type"],
"heating": eligibility.epc["mainheat-description"],
"tenure": eligibility.tenure,
"date_epc": eligibility.epc["lodgement-date"],
"cavity_age": cavity_age,
**eligibility.walls,
**eligibility.roof,
}
)
scoring_df = pd.DataFrame(scoring_data)
# Perform the same cleaning as in the model - first clean number of room variables though
scoring_df = DataProcessor.apply_averages_cleaning(
data_to_clean=scoring_df,
cleaning_data=cleaning_data,
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
)
scoring_df = DataProcessor.apply_averages_cleaning(
data_to_clean=scoring_df,
cleaning_data=cleaning_data,
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
).drop(columns=["LOCAL_AUTHORITY"])
scoring_df = DataProcessor.clean_missings_after_description_process(
scoring_df,
ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
"insulation_thickness" in c) or ("ENERGY_EFF" in c)]
)
scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
scoring_df["UPRN"] = scoring_df["UPRN"].astype(int)
model_api = ModelApi(portfolio_id="ha24-eligibility", timestamp=created_at)
all_predictions = model_api.predict_all(
df=scoring_df,
bucket="retrofit-data-dev",
prediction_buckets={
"sap_change_predictions": "retrofit-sap-predictions-dev",
"heat_demand_predictions": "retrofit-heat-predictions-dev",
"carbon_change_predictions": "retrofit-carbon-predictions-dev"
}
)
predictions = all_predictions["sap_change_predictions"].copy()
results_df = pd.DataFrame(results)
predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
results_df[["row_id", "sap"]], how="left", on="row_id"
)
predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
results_df = results_df.merge(
predictions[["sap_uplift", "row_id"]],
how="left",
on="row_id"
)
results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
eligibility_assessment = []
for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
# The upgrade requirements are dependent on the current SAP
# If the property is an F or G, it only needs to upgrade to an %
if row["sap"] <= 38:
if row["post_install_sap"] >= 57:
eligibility_classification = "highest confidence"
elif row["post_install_sap"] >= 55:
eligibility_classification = "high confidence"
elif row["post_install_sap"] >= 53:
eligibility_classification = "medium confidence"
else:
eligibility_classification = "unlikely"
else:
if row["post_install_sap"] >= 71:
eligibility_classification = "highest confidence"
elif row["post_install_sap"] >= 69:
eligibility_classification = "high confidence"
elif row["post_install_sap"] >= 67:
eligibility_classification = "medium confidence"
else:
eligibility_classification = "unlikely"
eligibility_assessment.append(
{
"row_id": row["row_id"],
"eligibility_classification": eligibility_classification
}
)
eligibility_assessment = pd.DataFrame(eligibility_assessment)
results_df = results_df.merge(
eligibility_assessment, how="left", on="row_id"
)
return results_df, scoring_data, nodata
def analyse_results(results_df, data, survey_list):
analysis_data = data[["row_id", "survey_key", "warmfront_identified"]].merge(
results_df, how="left", on="row_id"
).merge(
survey_list[["survey_key", survey_list.columns[0]]].rename(columns={survey_list.columns[0]: "funding_scheme"}),
how="left", on="survey_key"
)
# NEW
analysis_data["roof_insulation_thickness"] = np.where(
pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
)
analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
)
warmfront_sold_eco4 = analysis_data[
(analysis_data["warmfront_identified"] == True) & (
analysis_data["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"]))
]
warmfront_sold_gbis = analysis_data[
(analysis_data["warmfront_identified"] == True) & (
analysis_data["funding_scheme"].isin(["ECO4 GBIS (ECO+)"]))
]
# 1407
additional_eco4_warmfront_not_sold = analysis_data[
(analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
analysis_data["roof_insulation_thickness_numeric"] <= 100)
]
additional_gbis_warmfront_not_sold = analysis_data[
(analysis_data["gbis_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
~analysis_data["row_id"].isin(additional_eco4_warmfront_not_sold["row_id"].values)
)
]
additional_gbis_warmfront_not_sold["walls"].value_counts()
analysis_data["walls"].value_counts()
# END NEW
all_identified_eco = analysis_data[
(analysis_data["warmfront_identified"] & analysis_data["funding_scheme"].isin(
["ECO4 A/W"])) |
(analysis_data["eco4_eligible"])
]
all_identified_gbis = analysis_data[
(analysis_data["warmfront_identified"] & analysis_data["funding_scheme"].isin(
["ECO4 GBIS (ECO+)"])) |
(analysis_data["gbis_eligible"] & analysis_data["eco4_eligible"].isin([False, None]))
]
warmfront_identified = analysis_data[analysis_data["warmfront_identified"]]
# Of the ECO jobs, what proportion to we get right
warmfront_identified_eco = warmfront_identified[
warmfront_identified["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"])
]
eco_success_rate = warmfront_identified_eco["eco4_eligible"].sum() / warmfront_identified_eco.shape[0]
warmfront_identified_gbis = warmfront_identified[
warmfront_identified["funding_scheme"].isin(["ECO4 GBIS (ECO+)"])
]
# No gbis for this
# gbis_success_rate = warmfront_identified_gbis["gbis_eligible"].sum() / warmfront_identified_gbis.shape[0]
# Additional identified
additional_identified_eco = analysis_data[
(analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False)
]
additional_identified_eco["eligibility_classification"].value_counts()
additional_identified_gbis = analysis_data[
(analysis_data["gbis_eligible"] == True) & (analysis_data["eco4_eligible"] == False) & (
analysis_data["warmfront_identified"] == False
)
].shape[0]
# Future
additional_identified_eco_future = analysis_data[
(analysis_data["eco4_eligible_future"] == True) & (analysis_data["warmfront_identified"] == False)
].shape[0]
additional_identified_gbis_future = analysis_data[
(analysis_data["gbis_eligible_future"] == True) & (analysis_data["eco4_eligible_future"] == False) & (
analysis_data["warmfront_identified"] == False
)
].shape[0]
def app():
data, survey_list = load_data()
data["row_id"] = ["ha24_" + str(i) for i in range(0, len(data))]
cleaned = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-dev"
)
cleaned = msgpack.unpackb(cleaned, raw=False)
cleaning_data = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
)
created_at = datetime.now().isoformat()
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
results_df, scoring_data, nodata = get_epc_data(
data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds
)
# Pickle results just in case
# import pickle
# with open("ha24_10_jan.pickle", "wb") as f:
# pickle.dump(
# {
# "scoring_data": scoring_data,
# "results": results_df,
# "nodata": nodata
# }, f
# )
# Read in pickle
# import pickle
# with open("ha24_10_jan.pickle", "rb") as f:
# saved = pickle.load(f)
# scoring_data = saved["scoring_data"]
# results_df = saved["results"]
# nodata = saved["nodata"]

View file

@ -0,0 +1,883 @@
import os
import msgpack
import openpyxl
from pathlib import Path
from datetime import datetime
import pandas as pd
import numpy as np
from utils.s3 import read_from_s3
from utils.logger import setup_logger
from dotenv import load_dotenv
from utils.s3 import read_dataframe_from_s3_parquet
from tqdm import tqdm
from backend.SearchEpc import SearchEpc
from etl.eligibility.Eligibility import Eligibility
from etl.eligibility.ha_15_32.app import prepare_model_data_row
from etl.epc.DataProcessor import DataProcessor
from etl.epc.settings import COLUMNS_TO_MERGE_ON
from backend.ml_models.api import ModelApi
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
from recommendations.recommendation_utils import calculate_cavity_age
from recommendation_utils import convert_thickness_to_numeric
import re
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
logger = setup_logger()
load_dotenv(ENV_FILE)
def load_data():
workbook = openpyxl.load_workbook('etl/eligibility/ha_15_32/HESTIA - HA 25 ASSET LIST.xlsx', data_only=True)
sheet = workbook.active
rows_data = []
rows_colors = []
for row in sheet.iter_rows(min_row=1, values_only=True): # use values_only=True to get values
row_data = list(row) # No need for comprehension, values_only=True returns a tuple of values
rows_data.append(row_data)
# Headers are on the final row. Pop them off and store them and then remove them from rows_data
headers = rows_data.pop()
# The postcode header is None, so we replace it with "postcode"
headers[-1] = "postcode"
# Handle colours separately
for row in sheet.iter_rows(min_row=1, values_only=False):
# Assume first cell color is indicative of entire row
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
rows_colors.append(row_color)
# Remove the final row of colours, which is the header
rows_colors.pop()
asset_list = pd.DataFrame(rows_data, columns=headers)
asset_list['row_color'] = rows_colors
asset_list["row_colour_name"] = np.where(
asset_list["row_color"] == "FFFF0000", "red",
np.where(asset_list["row_color"] == "FF00B050", "green", "yellow")
)
asset_list["row_colour_code"] = np.where(
asset_list["row_colour_name"] == "red", "does not meet criteria",
np.where(asset_list["row_colour_name"] == "green", "identified potential eco", "maybe in the future")
)
asset_list["address"] = asset_list["T1_Address"].copy().str.lower()
asset_list["address"] = asset_list["address"].str.replace("flat", "")
asset_list["address"] = asset_list["address"].str.strip()
split_addresses = asset_list['address'].str.split(' ', expand=True)
split_addresses.columns = ['HouseNo', 'address2', 'address3', 'address4', 'address5', 'address6', 'address7',
'address8',
'address9', 'address10', 'address11', 'address12', 'address13', 'address14', ]
split_addresses["HouseNo"] = split_addresses["HouseNo"].str.replace(";", "")
# We could re-concatenate but we only care about HouseNo for the moment
asset_list = pd.concat([asset_list, split_addresses[["HouseNo"]]], axis=1)
asset_list["postcode"] = asset_list["postcode"].str.strip()
# We analysis historical ECO3 survey list
eco3_survey_workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/HESTIA - HA 25 ECO3 SURVEY LIST.xlsx')
eco3_survey_sheet = eco3_survey_workbook["CAVITY"]
eco3_survey_rows = []
eco3_survey_colors = []
for row in eco3_survey_sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
row_data = [cell.value for cell in row] # This will get you the cell values
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
# row_color = COLOR_INDEX[row_color]
eco3_survey_rows.append(row_data)
eco3_survey_colors.append(row_color)
# Some adhoc analysis on the eco3 survey list, just to get completion and cancellation rates historically
eco3_survey_list = pd.DataFrame(eco3_survey_rows, columns=[cell.value for cell in eco3_survey_sheet[1]])
eco3_survey_list["row_colour"] = eco3_survey_colors
# Remove rows where street name is missing
eco3_survey_list = eco3_survey_list[~pd.isnull(eco3_survey_list["Street / Block Name"])]
# We need to parse the row colours
# We have the following mappings:
# FF7030A0: purple
# FF92D050: green
# FFFF0000: red
# FFFFFF00: yellow
# FF38FD23: green
eco3_survey_list["row_colour_name"] = np.where(
eco3_survey_list["row_colour"] == "FF7030A0", "purple",
np.where(eco3_survey_list["row_colour"] == "FF92D050", "green",
np.where(eco3_survey_list["row_colour"] == "FFFF0000", "red",
np.where(eco3_survey_list["row_colour"] == "FFFFFF00", "yellow",
np.where(eco3_survey_list["row_colour"] == "FF38FD23", "green", "unknown")
)
)
)
)
# We map the meaning:
# red: cancelled
# green: installed advised install complete
# purple: installer advised install complete + post works EPC
# yellow: filler row - drop
eco3_survey_list["row_colour_code"] = np.where(
eco3_survey_list["row_colour_name"] == "red", "cancelled",
np.where(eco3_survey_list["row_colour_name"] == "green", "installed advised install complete",
np.where(eco3_survey_list["row_colour_name"] == "purple",
"installer advised install complete + post works EPC",
np.where(eco3_survey_list["row_colour_name"] == "yellow", "filler row - drop", "unknown")
)
)
)
# This is good enough for the indicative cancellation rates
# We now read in the indicative survey list which identified pospects for ECO4 works
eco4_survey_workbook = openpyxl.load_workbook(
f'etl/eligibility/ha_15_32/HESTIA - HA 25 ADHOC ISOLATED IDENTIFIED PROPERTIES FOR CWI.xlsx'
)
eco4_prospect_survey_sheet = eco4_survey_workbook["LiveWest"]
eco4_prospects_survey_rows = []
eco4_prospects_survey_colors = []
for row in eco4_prospect_survey_sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
row_data = [cell.value for cell in row] # This will get you the cell values
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
# row_color = COLOR_INDEX[row_color]
eco4_prospects_survey_rows.append(row_data)
eco4_prospects_survey_colors.append(row_color)
# Some adhoc analysis on the eco3 survey list, just to get completion and cancellation rates historically
eco4_prospects_survey_list = pd.DataFrame(
eco4_prospects_survey_rows, columns=[cell.value for cell in eco4_prospect_survey_sheet[1]]
)
eco4_prospects_survey_list["row_colour"] = eco4_prospects_survey_colors
eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.lower()
eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.strip()
eco4_prospects_survey_list = eco4_prospects_survey_list[~pd.isnull(eco4_prospects_survey_list["ADDRESS 1"])]
eco4_prospects_survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(eco4_prospects_survey_list))]
# Correct some errors in the survey list
eco4_prospects_survey_list["POSTCODE"] = np.where(
(eco4_prospects_survey_list["ADDRESS 1"] == "berry park") &
(eco4_prospects_survey_list["POSTCODE"] == "PL12 6HP"),
"PL12 6EN",
eco4_prospects_survey_list["POSTCODE"]
)
# Remove semi colons from address in asset and survey list
asset_list["T1_Address"] = asset_list["T1_Address"].str.replace(";", "")
eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.replace(";", "")
# In the prosepcts survey list, we have 6 WALKHAM MEADOWS listed twice, which should be 6a and 6b
eco4_prospects_survey_list.loc[838, "NO"] = "6a"
eco4_prospects_survey_list.loc[839, "NO"] = "6b"
# 3, 7, 9 BOLDVENTURE ROAD should be BOLDVENTURE CLOSE
eco4_prospects_survey_list["ADDRESS 1"] = np.where(
(eco4_prospects_survey_list["ADDRESS 1"] == "boldventure road") &
(eco4_prospects_survey_list["NO"].isin([3, 7, 9])),
"boldventure close",
eco4_prospects_survey_list["ADDRESS 1"]
)
eco4_prospects_survey_list["ADDRESS 1"] = np.where(
(eco4_prospects_survey_list["ADDRESS 1"] == "old farm road") & (
eco4_prospects_survey_list["POSTCODE"] == "PL5 1EP"),
"old school road",
eco4_prospects_survey_list["ADDRESS 1"]
)
eco4_prospects_survey_list["ADDRESS 1"] = np.where(
(eco4_prospects_survey_list["ADDRESS 1"] == "croft orchard") & (
eco4_prospects_survey_list["POSTCODE"] == "TQ12 6RP") & (
eco4_prospects_survey_list["NO"] == 52),
"drum way",
eco4_prospects_survey_list["ADDRESS 1"]
)
# String replace
eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.replace(
"the gulls, collaton road", "the gulls collaton road"
)
eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.replace(
"crows-an-eglose", "crows-an-eglos"
)
# We have a high volume of rows that do not match
matched = []
nomatch = []
for _, row in tqdm(eco4_prospects_survey_list.iterrows(), total=len(eco4_prospects_survey_list)):
# Not in the asset list
if (row["ADDRESS 1"] == "berry park") and row["NO"] in [40, 42] and row["POSTCODE"] == "PL12 6EN":
nomatch.append(row.to_dict())
continue
# Not in the asset list
if (row["ADDRESS 1"] == "roberts road") and row["NO"] == 23 and row["POSTCODE"] == "PL5 1DP":
nomatch.append(row.to_dict())
continue
# Not in the asset list
if row["ADDRESS 1"] in [
"kaynton mead", "broadmoor lane", "hoopers barton", "ecos court", "selwood road",
"castle street"
]:
nomatch.append(row.to_dict())
continue
house_number = row["NO"]
if isinstance(house_number, str):
house_number = house_number.lower()
if "flat" in house_number:
house_number = house_number.split("flat")[1].strip()
# Filter on the first line of the address
df = asset_list[asset_list["T1_Address"].str.lower().str.contains(row["ADDRESS 1"].lower())].copy()
if house_number is not None:
if df.shape[0] != 1:
df = df[df["T1_Address"].str.lower().str.contains(str(house_number))]
if df.shape[0] != 1:
if house_number is not None:
df = df[df["HouseNo"] == str(house_number)]
if df.shape[0] != 1:
if row["POSTCODE"] is not None:
df = df[df["postcode"].str.lower().str.contains(row["POSTCODE"].lower())]
if df.shape[0] != 1:
nomatch.append(row.to_dict())
continue
matched.append(
{
"survey_key": row["survey_key"],
"matched_address": df["T1_Address"].values[0],
"survey_house_no": row["NO"],
"survey_street_name": row["ADDRESS 1"],
"survey_postcode": row["POSTCODE"],
}
)
nomatch = pd.DataFrame(nomatch)
matched = pd.DataFrame(matched)
matched["warmfront_identified"] = True
# Combine asset list and surveys
data = asset_list.merge(
matched, how="left", left_on="T1_Address", right_on="matched_address",
)
data["warmfront_identified"] = data["warmfront_identified"].fillna(False)
lost_identified_properties = eco4_prospects_survey_list[
~eco4_prospects_survey_list["survey_key"].isin(matched["survey_key"])
]
return data, eco4_prospects_survey_list, lost_identified_properties
def map_year_to_age_band(year):
try:
year = int(year)
except ValueError:
return "Invalid Year" # Or any other way you want to handle invalid inputs
if year < 1900:
return "England and Wales: before 1900"
elif 1900 <= year <= 1929:
return "England and Wales: 1900-1929"
elif 1930 <= year <= 1949:
return "England and Wales: 1930-1949"
elif 1950 <= year <= 1966:
return "England and Wales: 1950-1966"
elif 1967 <= year <= 1975:
return "England and Wales: 1967-1975"
elif 1976 <= year <= 1982:
return "England and Wales: 1976-1982"
elif 1983 <= year <= 1990:
return "England and Wales: 1983-1990"
elif 1991 <= year <= 1995:
return "England and Wales: 1991-1995"
elif 1996 <= year <= 2002:
return "England and Wales: 1996-2002"
elif 2003 <= year <= 2006:
return "England and Wales: 2003-2006"
elif 2007 <= year <= 2011:
return "England and Wales: 2007-2011"
else: # Assuming all remaining years are 2012 onwards
return "England and Wales: 2012 onwards"
def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
scoring_data = []
results = []
nodata = []
property_type_lookup = {
"Flat": {"property-type": "Flat", "built-form": None},
"Mid Terrace House": {"property-type": "House", "built-form": "Mid-Terrace"},
"End Terrace House": {"property-type": "House", "built-form": "End-Terrace"},
"Maisonnette": {"property-type": "Flat", "built-form": None},
"Semi Detached House": {"property-type": "House", "built-form": "Semi-Detached"},
"Detached House": {"property-type": "House", "built-form": "Detached"},
"Coach House": {"property-type": "House", "built-form": "Detached"},
"Bungalow": {"property-type": "Bungalow", "built-form": None},
"Detached Bungalow": {"property-type": "Bungalow", "built-form": "Detached"},
"House": {"property-type": "House", "built-form": None},
"Semi Detached Bung": {"property-type": "Bungalow", "built-form": "Semi-Detached"},
"Bedspace": {"property-type": None, "built-form": None},
"Office Buildings": {"property-type": None, "built-form": None},
"End Terrace Bungalow": {"property-type": "Bungalow", "built-form": "End-Terrace"},
"Mid Terrace Bungalow": {"property-type": "Bungalow", "built-form": "Mid-Terrace"},
"Bedsit": {"property-type": "Flat", "built-form": None},
"Mid Terrace Housekeeping": {"property-type": "House", "built-form": "Mid-Terrace"},
"Mid Terrace Housekeeping ": {"property-type": "House", "built-form": "Mid-Terrace"},
"End Terrace Housex": {"property-type": "House", "built-form": "End-Terrace"},
"Guest Room": {"property-type": None, "built-form": None}
}
for _, property_meta in tqdm(data, total=len(data)):
searcher = SearchEpc(
address1=property_meta["HouseNo"],
postcode=property_meta["postcode"],
auth_token=EPC_AUTH_TOKEN,
os_api_key=None,
full_address=property_meta["address"]
)
searcher.ordnance_survey_client.property_type = property_type_lookup[property_meta["T1_AssetType"]][
"property-type"]
searcher.ordnance_survey_client.built_form = property_type_lookup[property_meta["T1_AssetType"]]["built-form"]
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
nodata.append(property_meta)
continue
if searcher.newest_epc.get("estimated"):
# We insert the row ID as our proxy for UPRN
proxy_uprn = int(property_meta["row_id"].split("_")[1])
searcher.newest_epc["uprn"] = proxy_uprn
newest_epc = searcher.newest_epc
older_epcs = searcher.older_epcs
full_sap_epc = searcher.full_sap_epc
# We also want to get the penultimate epc
# penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
# if not penultimate_epc:
# penultimate_epc = newest_epc
eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
eligibility.check_gbis_warmfront()
eligibility.check_eco4_warmfront()
# if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
# eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
# eligibility.check_gbis_warmfront()
# eligibility.check_eco4_warmfront()
# # If this is the case, we need to update the older epcs
# # We don't update just to make data cleaning easier
# if penultimate_epc.get("estimated") is None:
# older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
# If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity
# Loft MUST be suitable
cavity_age = None
if (
eligibility.walls["is_cavity_wall"] and
eligibility.walls["is_filled_cavity"] and
eligibility.loft["suitability"] and
eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
):
# We check the age of the cavity and if it's particularly old, we flag it
cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
# Full checks
eligibility.check_gbis()
eligibility.check_eco4()
if eligibility.eco4_warmfront["eligible"]:
if eligibility.epc["uprn"] in ["", None]:
eligibility.epc["uprn"] = int(property_meta["row_id"].split("_")[1])
if eligibility.epc["construction-age-band"] in ["", None]:
eligibility.epc["construction-age-band"] = map_year_to_age_band(property_meta["Build Yr"])
# This is not the right place to do this but this is temp
if eligibility.epc["extension-count"] in ["", None]:
eligibility.epc["extension-count"] = 0
# Not in the right place but temp
if eligibility.epc["built-form"] in ["", None]:
if not older_epcs:
eligibility.epc["built-form"] = "Mid-Terrace"
scoring_dictionary = prepare_model_data_row(
property_id=property_meta["row_id"],
modelling_epc=eligibility.epc,
cleaned=cleaned,
cleaning_data=cleaning_data,
created_at=created_at,
old_data=older_epcs,
full_sap_epc=full_sap_epc,
photo_supply_lookup=photo_supply_lookup,
floor_area_decile_thresholds=floor_area_decile_thresholds,
)
scoring_data.extend(scoring_dictionary)
results.append(
{
"row_id": property_meta["row_id"],
"uprn": eligibility.epc["uprn"],
"Address": property_meta["T1_Address"],
"Postcode": property_meta["postcode"],
"property_type": eligibility.epc["property-type"],
"gbis_eligible": eligibility.gbis_warmfront,
"eco4_eligible": eligibility.eco4_warmfront["eligible"],
"eco4_message": eligibility.eco4_warmfront["message"],
"sap": float(eligibility.epc["current-energy-efficiency"]),
"gbis_eligible_future": eligibility.gbis["eligible"],
"gbis_eligible_future_message": eligibility.gbis["message"],
"eco4_eligible_future": eligibility.eco4["eligible"],
"eco4_eligible_future_message": eligibility.eco4["message"],
# Property components
"roof": eligibility.roof["clean_description"],
"walls": eligibility.walls["clean_description"],
"cavity_type": eligibility.cavity["type"],
"heating": eligibility.epc["mainheat-description"],
"tenure": eligibility.tenure,
"date_epc": eligibility.epc["lodgement-date"],
"cavity_age": cavity_age,
**eligibility.walls,
**eligibility.roof,
}
)
scoring_df = pd.DataFrame(scoring_data)
# Perform the same cleaning as in the model - first clean number of room variables though
scoring_df = DataProcessor.apply_averages_cleaning(
data_to_clean=scoring_df,
cleaning_data=cleaning_data,
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
)
scoring_df = DataProcessor.apply_averages_cleaning(
data_to_clean=scoring_df,
cleaning_data=cleaning_data,
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
).drop(columns=["LOCAL_AUTHORITY"])
scoring_df = DataProcessor.clean_missings_after_description_process(
scoring_df,
ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
"insulation_thickness" in c) or ("ENERGY_EFF" in c)]
)
scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
scoring_df["UPRN"] = scoring_df["UPRN"].astype(int)
model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
all_predictions = model_api.predict_all(
df=scoring_df,
bucket="retrofit-data-dev",
prediction_buckets={
"sap_change_predictions": "retrofit-sap-predictions-dev",
"heat_demand_predictions": "retrofit-heat-predictions-dev",
"carbon_change_predictions": "retrofit-carbon-predictions-dev"
}
)
predictions = all_predictions["sap_change_predictions"].copy()
results_df = pd.DataFrame(results)
predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
results_df[["row_id", "sap"]], how="left", on="row_id"
)
predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
results_df = results_df.merge(
predictions[["sap_uplift", "row_id"]],
how="left",
on="row_id"
)
results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
eligibility_assessment = []
for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
# The upgrade requirements are dependent on the current SAP
# If the property is an F or G, it only needs to upgrade to an %
if row["sap"] <= 38:
if row["post_install_sap"] >= 57:
eligibility_classification = "highest confidence"
elif row["post_install_sap"] >= 55:
eligibility_classification = "high confidence"
elif row["post_install_sap"] >= 53:
eligibility_classification = "medium confidence"
else:
eligibility_classification = "unlikely"
else:
if row["post_install_sap"] >= 71:
eligibility_classification = "highest confidence"
elif row["post_install_sap"] >= 69:
eligibility_classification = "high confidence"
elif row["post_install_sap"] >= 67:
eligibility_classification = "medium confidence"
else:
eligibility_classification = "unlikely"
eligibility_assessment.append(
{
"row_id": row["row_id"],
"eligibility_classification": eligibility_classification
}
)
eligibility_assessment = pd.DataFrame(eligibility_assessment)
results_df = results_df.merge(
eligibility_assessment, how="left", on="row_id"
)
return results_df, scoring_data, nodata
def get_epc_data_for_lost_surveys(
lost_identified_properties, cleaned, cleaning_data, created_at, photo_supply_lookup,
floor_area_decile_thresholds
):
lost_identified_properties["row_id"] = [
"lost_surveys_ha25_" + str(i) for i in range(0, len(lost_identified_properties))
]
scoring_data = []
results = []
nodata = []
property_type_lookup = {
"MID-TERRACE": {"property-type": "House", "built-form": "Mid-Terrace"},
"N/A": {"property-type": "House", "built-form": None},
"END-TERRACE": {"property-type": "House", "built-form": "End-Terrace"},
"GROUND-FLOOR": {"property-type": "House", "built-form": None},
"TOP-FLOOR": {"property-type": "House", "built-form": None},
"SEMI-DETACHED": {"property-type": "House", "built-form": "Semi-Detached"},
"MID-FLOOR": {"property-type": "House", "built-form": None},
"TOP-FLOOR FLAT": {"property-type": "House", "built-form": None},
"DETACHED": {"property-type": "House", "built-form": "Detached"},
"MID-FLOOR FLAT": {"property-type": "House", "built-form": None},
"SEMI- DETACHED": {"property-type": "House", "built-form": "Semi-Detached"},
"NO EPC ON GOV": {"property-type": "House", "built-form": None},
"Top-floor flat": {"property-type": "House", "built-form": None},
"GROUND-FLOOR FLAT": {"property-type": "House", "built-form": None},
"NOT ON GOV SITE": {"property-type": "House", "built-form": None}
}
for _, property_meta in tqdm(lost_identified_properties.iterrows(), total=len(lost_identified_properties)):
if property_meta["POSTCODE"] is None:
continue
full_address = ", ".join(
[str(x) for x in [
property_meta["NO"], property_meta["ADDRESS 1"], property_meta["ADDRESS 2"], property_meta["ADDRESS 3"]
] if x is not None]
)
searcher = SearchEpc(
address1=str(property_meta["NO"]),
postcode=property_meta["POSTCODE"],
auth_token=EPC_AUTH_TOKEN,
os_api_key=None,
full_address=full_address
)
property_type_key = property_meta["PROPERTY TYPE"]
if property_type_key is not None:
searcher.ordnance_survey_client.property_type = property_type_lookup[property_type_key.strip()][
"property-type"]
searcher.ordnance_survey_client.built_form = property_type_lookup[property_type_key.strip()][
"built-form"]
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
nodata.append(property_meta)
continue
if searcher.newest_epc.get("estimated"):
# We insert the row ID as our proxy for UPRN
proxy_uprn = int(property_meta["row_id"].split("_")[-1])
searcher.newest_epc["uprn"] = proxy_uprn
newest_epc = searcher.newest_epc
older_epcs = searcher.older_epcs
full_sap_epc = searcher.full_sap_epc
# We also want to get the penultimate epc
penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
if not penultimate_epc:
penultimate_epc = newest_epc
eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
eligibility.check_gbis_warmfront()
eligibility.check_eco4_warmfront()
if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
eligibility.check_gbis_warmfront()
eligibility.check_eco4_warmfront()
# If this is the case, we need to update the older epcs
# We don't update just to make data cleaning easier
if penultimate_epc.get("estimated") is None:
older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
# Full checks
eligibility.check_gbis()
eligibility.check_eco4()
if eligibility.eco4_warmfront["eligible"] & (eligibility.epc["construction-age-band"] not in ["", None]):
if eligibility.epc["uprn"] in ["", None]:
eligibility.epc["uprn"] = int(property_meta["row_id"].split("_")[1])
scoring_dictionary = prepare_model_data_row(
property_id=property_meta["row_id"],
modelling_epc=eligibility.epc,
cleaned=cleaned,
cleaning_data=cleaning_data,
created_at=created_at,
old_data=older_epcs,
full_sap_epc=full_sap_epc,
photo_supply_lookup=photo_supply_lookup,
floor_area_decile_thresholds=floor_area_decile_thresholds,
)
scoring_data.extend(scoring_dictionary)
results.append(
{
"row_id": property_meta["row_id"],
"uprn": eligibility.epc["uprn"],
"Address": property_meta["ADDRESS 1"],
"Postcode": property_meta["POSTCODE"],
"property_type": eligibility.epc["property-type"],
"gbis_eligible": eligibility.gbis_warmfront,
"eco4_eligible": eligibility.eco4_warmfront["eligible"],
"eco4_message": eligibility.eco4_warmfront["message"],
"sap": float(eligibility.epc["current-energy-efficiency"]),
"gbis_eligible_future": eligibility.gbis["eligible"],
"gbis_eligible_future_message": eligibility.gbis["message"],
"eco4_eligible_future": eligibility.eco4["eligible"],
"eco4_eligible_future_message": eligibility.eco4["message"],
# Property components
"roof": eligibility.roof["clean_description"],
"walls": eligibility.walls["clean_description"],
"cavity_type": eligibility.cavity["type"],
"heating": eligibility.epc["mainheat-description"],
"tenure": eligibility.tenure,
"date_epc": eligibility.epc["lodgement-date"],
**eligibility.walls,
**eligibility.roof,
}
)
scoring_df = pd.DataFrame(scoring_data)
# Perform the same cleaning as in the model - first clean number of room variables though
scoring_df = DataProcessor.apply_averages_cleaning(
data_to_clean=scoring_df,
cleaning_data=cleaning_data,
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
)
scoring_df = DataProcessor.apply_averages_cleaning(
data_to_clean=scoring_df,
cleaning_data=cleaning_data,
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
).drop(columns=["LOCAL_AUTHORITY"])
scoring_df = DataProcessor.clean_missings_after_description_process(
scoring_df,
ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
"insulation_thickness" in c) or ("ENERGY_EFF" in c)]
)
scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
scoring_df["UPRN"] = scoring_df["UPRN"].astype(int)
model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
all_predictions = model_api.predict_all(
df=scoring_df,
bucket="retrofit-data-dev",
prediction_buckets={
"sap_change_predictions": "retrofit-sap-predictions-dev",
"heat_demand_predictions": "retrofit-heat-predictions-dev",
"carbon_change_predictions": "retrofit-carbon-predictions-dev"
}
)
predictions = all_predictions["sap_change_predictions"].copy()
results_df = pd.DataFrame(results)
predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
results_df[["row_id", "sap"]], how="left", on="row_id"
)
predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
results_df = results_df.merge(
predictions[["sap_uplift", "row_id"]],
how="left",
on="row_id"
)
results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
eligibility_assessment = []
for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
# The upgrade requirements are dependent on the current SAP
# If the property is an F or G, it only needs to upgrade to an %
if row["sap"] <= 38:
if row["post_install_sap"] >= 57:
eligibility_classification = "highest confidence"
elif row["post_install_sap"] >= 55:
eligibility_classification = "high confidence"
elif row["post_install_sap"] >= 53:
eligibility_classification = "medium confidence"
else:
eligibility_classification = "unlikely"
else:
if row["post_install_sap"] >= 71:
eligibility_classification = "highest confidence"
elif row["post_install_sap"] >= 69:
eligibility_classification = "high confidence"
elif row["post_install_sap"] >= 67:
eligibility_classification = "medium confidence"
else:
eligibility_classification = "unlikely"
eligibility_assessment.append(
{
"row_id": row["row_id"],
"eligibility_classification": eligibility_classification
}
)
eligibility_assessment = pd.DataFrame(eligibility_assessment)
results_df = results_df.merge(
eligibility_assessment, how="left", on="row_id"
)
return results_df, scoring_data, nodata
def analyse_results(results_df, data, eco4_prospects_survey_list):
analysis_data = data[["row_id", "survey_key", "warmfront_identified"]].merge(
results_df, how="left", on="row_id"
)
analysis_data = analysis_data.merge(
eco4_prospects_survey_list[["survey_key", "ADDRESS 1", "NO", "POSTCODE"]],
how="left", on="survey_key"
)
# NEW
analysis_data["roof_insulation_thickness"] = np.where(
pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
)
analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
)
warmfront_identified = analysis_data[
(analysis_data["warmfront_identified"] == True)
] # 2204
# Because we don't know which property is for which scheme, we'll just look at what we found
ideal_eco4 = analysis_data[
(analysis_data["eco4_eligible"] == True) &
(analysis_data["roof_insulation_thickness_numeric"] <= 100) &
(analysis_data["sap"] <= 54)
] # 335
gbis = analysis_data[
(analysis_data["gbis_eligible"] == True) &
~analysis_data["row_id"].isin(ideal_eco4["row_id"].values)
]
ideal_eco4 = ideal_eco4[ideal_eco4["sap"] <= 54]
def analyse_lost_surveys(results_df):
results_df["roof_insulation_thickness"] = np.where(
pd.isnull(results_df["roof_insulation_thickness"]), None, results_df["roof_insulation_thickness"]
)
results_df["roof_insulation_thickness_numeric"] = results_df["roof_insulation_thickness"].apply(
lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
)
ideal_eco4 = results_df[
(results_df["eco4_eligible"] == True) &
(results_df["roof_insulation_thickness_numeric"] <= 100) &
(results_df["sap"] <= 54)
] # 25
gbis = results_df[
(results_df["gbis_eligible"] == True) &
~results_df["row_id"].isin(ideal_eco4["row_id"].values)
] # 82
def app():
data, eco4_prospects_survey_list, lost_identified_properties = load_data()
data["row_id"] = ["ha25_" + str(i) for i in range(0, len(data))]
cleaned = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-dev"
)
cleaned = msgpack.unpackb(cleaned, raw=False)
cleaning_data = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
)
created_at = datetime.now().isoformat()
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
results_df, scoring_data, nodata = get_epc_data(
data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds
)
# Pickle the outputs
# Old data was ha25.pickle
# import pickle
# with open("ha25_10_jan.pickle", "wb") as f:
# pickle.dump(
# {
# "results_df": results_df,
# "scoring_data": scoring_data,
# "nodata": nodata
# },
# f
# )
# Load in pickle
import pickle
with open("ha25_10_jan.pickle", "rb") as f:
saved = pickle.load(f)
results_df = saved["results_df"]
scoring_data = saved["scoring_data"]
nodata = saved["nodata"]

View file

@ -264,21 +264,21 @@ def get_ha_33data(data, cleaned, cleaning_data, created_at):
def analyse_ha_33(results_df, data): def analyse_ha_33(results_df, data):
results_df_social = results_df[results_df["tenure"] == "Rented (social)"] # results_df_social = results_df[results_df["tenure"] == "Rented (social)"]
#
# results_df_social["tenure"].value_counts()
results_df_social["tenure"].value_counts() data[data["row_id"].isin(results_df["row_id"].values)]["PROPERTY TYPE"].value_counts()
data[data["row_id"].isin(results_df_social["row_id"].values)]["PROPERTY TYPE"].value_counts() n_identified = (results_df["gbis_eligible"] | results_df["eco4_eligible"]).sum()
n_eco4 = results_df["eco4_eligible"].sum()
n_gbis = results_df[~results_df["eco4_eligible"]]["gbis_eligible"].sum()
n_identified = (results_df_social["gbis_eligible"] | results_df_social["eco4_eligible"]).sum() eco_eligibile = results_df[results_df["eco4_eligible"]]
n_eco4 = results_df_social["eco4_eligible"].sum()
n_gbis = results_df_social[~results_df_social["eco4_eligible"]]["gbis_eligible"].sum()
eco_eligibile = results_df_social[results_df_social["eco4_eligible"]]
eco_eligibile["walls"].value_counts() eco_eligibile["walls"].value_counts()
eco_eligibile["roof"].value_counts() eco_eligibile["roof"].value_counts()
results_df_social[results_df_social["gbis_eligible"] | results_df_social["eco4_eligible"]]["tenure"].value_counts() results_df[results_df["gbis_eligible"] | results_df["eco4_eligible"]]["tenure"].value_counts()
results_df_social["eligibility_classification"].value_counts() results_df_social["eligibility_classification"].value_counts()
@ -316,3 +316,11 @@ def app():
created_at = datetime.now().isoformat() created_at = datetime.now().isoformat()
results_df, _, _ = get_ha_33data(data, cleaned, cleaning_data, created_at) results_df, _, _ = get_ha_33data(data, cleaned, cleaning_data, created_at)
# Read in
import pickle
with open("ha33_results.pickle", "rb") as f:
data = pickle.load(f)
results_df = pd.DataFrame(data["results"])
scoring_data = data["scoring_data"]
nodata = data["nodata"]

View file

@ -0,0 +1,328 @@
import os
import msgpack
from pathlib import Path
from datetime import datetime
import numpy as np
import pandas as pd
from utils.s3 import read_from_s3
from utils.logger import setup_logger
from dotenv import load_dotenv
from utils.s3 import read_dataframe_from_s3_parquet
from tqdm import tqdm
from backend.SearchEpc import SearchEpc
from etl.eligibility.Eligibility import Eligibility
from etl.eligibility.ha_15_32.app import prepare_model_data_row
from etl.epc.DataProcessor import DataProcessor
from etl.epc.settings import COLUMNS_TO_MERGE_ON
from backend.ml_models.api import ModelApi
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
from recommendations.recommendation_utils import calculate_cavity_age
from recommendation_utils import convert_thickness_to_numeric
import re
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
logger = setup_logger()
load_dotenv(ENV_FILE)
def load_ha_4():
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
data = pd.read_csv(f"etl/eligibility/ha_15_32/HA 4 Asset List.csv", low_memory=False)
return data
def standardise_ha_4(data):
# Location name contains some strings like {0664} which we remove
data['Location Name'] = data['Location Name'].str.replace('\{.*?\}', '', regex=True)
# Trim whitespace from either end of location name
data["Location Name"] = data["Location Name"].str.strip()
# Remove any unusable postcodes
data = data[data["Post Code"] != '\\\\'].copy()
# Some specific replacements
data["Location Name"] = np.where(
data["Location Name"] == "Calderbrook Pl & Cog La",
"Calderbrook Place",
data["Location Name"]
)
return data
def get_ha_4_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
scoring_data = []
results = []
nodata = []
for _, property_meta in tqdm(data.iterrows(), total=len(data)):
# For many of the entries in this dataset, we're actually given an entire building, so we EPCs for every
# building
searcher = SearchEpc(
address1=property_meta["Address Line 1"],
postcode=property_meta["Post Code"],
auth_token=EPC_AUTH_TOKEN,
os_api_key=None,
property_type=property_type_lookup.get(house["Archetype"]),
)
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
searcher = SearchEpc(
address1=property_meta["Location Name"],
postcode=property_meta["Post Code"],
auth_token=EPC_AUTH_TOKEN,
os_api_key=None,
property_type=property_type_lookup.get(house["Archetype"]),
)
searcher.search()
if searcher.newest_epc is None:
nodata.append(house["row_id"])
continue
newest_epc = searcher.newest_epc
older_epcs = searcher.older_epcs
full_sap_epc = searcher.full_sap_epc
searcher.search()
if searcher.data is None:
nodata.append(property_meta.to_dict())
continue
epcs = searcher.data["rows"]
epcs = pd.DataFrame(epcs)
# Take the newest EPC by UPRN
epcs = epcs.sort_values(by=["lodgement-date"], ascending=False)
newest_epcs = epcs.drop_duplicates(subset=["uprn"], keep="first")
# For each EPC, we now check eligibility
for _, epc in newest_epcs.iterrows():
eligibility = Eligibility(epc=epc.to_dict(), cleaned=cleaned)
eligibility.check_gbis_warmfront()
eligibility.check_eco4_warmfront()
# If the house is not identified, we do a full gbis and eco4 check
eligibility.check_gbis()
eligibility.check_eco4()
if eligibility.eco4_warmfront["eligible"]:
# We get old_eps
old_data = epcs[
(epcs["uprn"] == epc["uprn"]) &
(epcs["lmk-key"] != epc["lmk-key"])
].to_dict("records")
full_sap_epc = epcs[
(epcs["uprn"] == epc["uprn"]) &
(epcs["transaction-type"] == "new dwelling")
].to_dict("records")
scoring_dictionary = prepare_model_data_row(
property_id=eligibility.epc["uprn"],
modelling_epc=eligibility.epc,
cleaned=cleaned,
cleaning_data=cleaning_data,
created_at=created_at,
old_data=old_data,
full_sap_epc=full_sap_epc
)
scoring_data.extend(scoring_dictionary)
results.append(
{
"uprn": epc["uprn"],
"Location Name": property_meta["Location Name"],
"Post Code": property_meta["Post Code"],
"property_type": eligibility.epc["property-type"],
"gbis_eligible": eligibility.gbis_warmfront,
"eco4_eligible": eligibility.eco4_warmfront["eligible"],
"eco4_message": eligibility.eco4_warmfront["message"],
"sap": float(eligibility.epc["current-energy-efficiency"]),
"gbis_eligible_future": eligibility.gbis["eligible"],
"gbis_eligible_future_message": eligibility.gbis["message"],
"eco4_eligible_future": eligibility.eco4["eligible"],
"eco4_eligible_future_message": eligibility.eco4["message"],
# Property components
"roof": eligibility.roof["clean_description"],
"walls": eligibility.walls["clean_description"],
"cavity_type": eligibility.cavity["type"],
"heating": eligibility.epc["mainheat-description"],
"tenure": eligibility.tenure,
"date_epc": eligibility.epc["lodgement-date"],
}
)
scoring_df = pd.DataFrame(scoring_data)
# Perform the same cleaning as in the model - first clean number of room variables though
scoring_df = DataProcessor.apply_averages_cleaning(
data_to_clean=scoring_df,
cleaning_data=cleaning_data,
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
)
scoring_df = DataProcessor.apply_averages_cleaning(
data_to_clean=scoring_df,
cleaning_data=cleaning_data,
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
).drop(columns=["LOCAL_AUTHORITY"])
scoring_df = DataProcessor.clean_missings_after_description_process(
scoring_df,
ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
"insulation_thickness" in c) or ("ENERGY_EFF" in c)]
)
scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
all_predictions = model_api.predict_all(
df=scoring_df,
bucket="retrofit-data-dev",
prediction_buckets={
"sap_change_predictions": "retrofit-sap-predictions-dev",
"heat_demand_predictions": "retrofit-heat-predictions-dev",
"carbon_change_predictions": "retrofit-carbon-predictions-dev"
}
)
predictions = all_predictions["sap_change_predictions"].copy()
results_df = pd.DataFrame(results)
predictions = predictions.rename(columns={"property_id": "uprn"}).merge(
results_df[["uprn", "sap"]], how="left", on="uprn"
)
predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
predictions = predictions.groupby("uprn")["sap_uplift"].sum().reset_index()
results_df = results_df.merge(
predictions[["sap_uplift", "uprn"]],
how="left",
on="uprn"
)
results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
results_df = results_df[~pd.isnull(results_df["uprn"])]
eligibility_assessment = []
for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
# The upgrade requirements are dependent on the current SAP
# If the property is an F or G, it only needs to upgrade to an %
if row["sap"] <= 38:
if row["post_install_sap"] >= 57:
eligibility_classification = "highest confidence"
elif row["post_install_sap"] >= 55:
eligibility_classification = "high confidence"
elif row["post_install_sap"] >= 53:
eligibility_classification = "medium confidence"
else:
eligibility_classification = "unlikely"
else:
if row["post_install_sap"] >= 71:
eligibility_classification = "highest confidence"
elif row["post_install_sap"] >= 69:
eligibility_classification = "high confidence"
elif row["post_install_sap"] >= 67:
eligibility_classification = "medium confidence"
else:
eligibility_classification = "unlikely"
eligibility_assessment.append(
{
"uprn": row["uprn"],
"eligibility_classification": eligibility_classification
}
)
eligibility_assessment = pd.DataFrame(eligibility_assessment)
results_df = results_df.merge(
eligibility_assessment, how="left", on="uprn"
)
# We have some properties that are duplicated so we take just one instance
results_df = results_df.drop_duplicates(subset=["uprn"])
return results_df, scoring_data, nodata
def analyse_ha_4(results_df, data):
n_identified = (results_df["gbis_eligible"] | results_df["eco4_eligible"]).sum()
n_eco4 = results_df["eco4_eligible"].sum()
n_gbis = results_df[~results_df["eco4_eligible"]]["gbis_eligible"].sum()
eco_eligibile = results_df[results_df["eco4_eligible"]]
eco_eligibile["eligibility_classification"].value_counts()
future_possibilities_eco = results_df[
(results_df["eco4_eligible_future"] == True) & (~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
].copy()
future_possibilities_gbis = results_df[
(results_df["gbis_eligible_future"] == True) & (results_df["eco4_eligible_future"] == False) & (
~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
].copy()
total_future_possibilities = future_possibilities_eco.shape[0] + future_possibilities_gbis.shape[0]
def app():
data = load_ha_4()
data = standardise_ha_4(data)
data["row_id"] = ["h4" + str(i) for i in range(0, len(data))]
cleaned = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-dev"
)
cleaned = msgpack.unpackb(cleaned, raw=False)
cleaning_data = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
)
created_at = datetime.now().isoformat()
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
results_df, scoring_data, nodata = get_ha_4_data(
data=data,
cleaned=cleaned,
cleaning_data=cleaning_data,
created_at=created_at,
photo_supply_lookup=photo_supply_lookup,
floor_area_decile_thresholds=floor_area_decile_thresholds
)
# Store the data locally as a pickle
# import pickle
# with open("ha_4.pickle", "wb") as f:
# pickle.dump(
# {
# "results_df": results_df,
# "scoring_data": scoring_data,
# "nodata": nodata
# }, f)
# Read in
# import pickle
# with open("ha_4.pickle", "rb") as f:
# data = pickle.load(f)
# results_df = data["results_df"]
# scoring_data = data["scoring_data"]
# nodata = data["nodata"]

View file

@ -0,0 +1,383 @@
import os
import msgpack
import openpyxl
from openpyxl.styles.colors import COLOR_INDEX
from pathlib import Path
from datetime import datetime
import pandas as pd
import numpy as np
from utils.s3 import read_from_s3, read_dataframe_from_s3_parquet
from utils.logger import setup_logger
from dotenv import load_dotenv
from tqdm import tqdm
from backend.SearchEpc import SearchEpc
from etl.eligibility.Eligibility import Eligibility
from etl.eligibility.ha_15_32.app import prepare_model_data_row
from etl.epc.DataProcessor import DataProcessor
from etl.epc.settings import COLUMNS_TO_MERGE_ON
from backend.ml_models.api import ModelApi
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
from recommendations.recommendation_utils import calculate_cavity_age
from recommendation_utils import convert_thickness_to_numeric
ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
logger = setup_logger()
load_dotenv(ENV_FILE)
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
OS_API_KEY = os.getenv("ORDNANCE_SURVEY_API_KEY")
def load_data():
"""
Load the data from the excel
"""
workbook = openpyxl.load_workbook('etl/eligibility/ha_15_32/HESTIA - HA 7 ASSET LIST.xlsx')
sheet = workbook.active
# Prepare lists to collect rows data and their colors
rows_data = []
rows_colors = []
for row in sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
row_data = [cell.value for cell in row] # This will get you the cell values
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
row_color = COLOR_INDEX[row_color]
rows_data.append(row_data)
rows_colors.append(row_color)
df = pd.DataFrame(rows_data, columns=[cell.value for cell in sheet[1]])
# Add the row colors as a new column
df['row_color'] = rows_colors
df.columns.values[8] = "is_active"
# Remove None columns
df = df.dropna(axis=1, how='all')
# We now parse the colours
df["row_color"].unique()
df["row_colour_name"] = np.where(
df["row_color"] == "0000FFFF", "red",
np.where(df["row_color"] == "00FF00FF", "green", "yellow")
)
df["row_code"] = np.where(
df["row_colour_name"] == "red", "invalid",
np.where(df["row_colour_name"] == "green", "potential ECO4", "needs criteria change")
)
return df
def get_ha7_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
property_type_lookup = {
# "Mid Terrace": "Mid-Terrace",
# "End Terrace": "End-Terrace",
# "Semi Detached": "Semi-Detached",
# "Detached": "Detached",
"House": "House",
"Flat": "Flat",
"Bungalow": "Bungalow",
"Maisonette": "Maisonette",
}
scoring_data = []
results = []
nodata = []
for _, house in tqdm(data.iterrows(), total=len(data)):
if house["Address"]:
address = house["Address"]
else:
address = house["Address2"]
searcher = SearchEpc(
address1=address,
postcode=house["Postcode"],
auth_token=EPC_AUTH_TOKEN,
os_api_key=None,
property_type=property_type_lookup.get(house["Archetype"]),
)
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
nodata.append(house["row_id"])
continue
newest_epc = searcher.newest_epc
older_epcs = searcher.older_epcs
full_sap_epc = searcher.full_sap_epc
eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
eligibility.check_gbis_warmfront()
eligibility.check_eco4_warmfront()
# If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity
# Loft MUST be suitable
cavity_age = None
if (
eligibility.walls["is_cavity_wall"] and
eligibility.walls["is_filled_cavity"] and
eligibility.loft["suitability"] and
eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
):
# We check the age of the cavity and if it's particularly old, we flag it
cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
# If the house is not identified, we do a full gbis and eco4 check
eligibility.check_gbis()
eligibility.check_eco4()
if eligibility.eco4_warmfront["eligible"]:
scoring_dictionary = prepare_model_data_row(
property_id=house["row_id"],
modelling_epc=eligibility.epc,
cleaned=cleaned,
cleaning_data=cleaning_data,
created_at=created_at,
old_data=older_epcs,
full_sap_epc=full_sap_epc,
photo_supply_lookup=photo_supply_lookup,
floor_area_decile_thresholds=floor_area_decile_thresholds
)
scoring_data.extend(scoring_dictionary)
# If nothing is eligible or gbis is eligible, then we make a record this
results.append(
{
"row_id": house["row_id"],
"address": house["Address"],
"postcode": house["Postcode"],
"gbis_eligible": eligibility.gbis_warmfront,
"eco4_eligible": eligibility.eco4_warmfront["eligible"],
"eco4_message": eligibility.eco4_warmfront["message"],
"sap": float(eligibility.epc["current-energy-efficiency"]),
"gbis_eligible_future": eligibility.gbis["eligible"],
"gbis_eligible_future_message": eligibility.gbis["message"],
"eco4_eligible_future": eligibility.eco4["eligible"],
"eco4_eligible_future_message": eligibility.eco4["message"],
# Property components
"roof": eligibility.roof["clean_description"],
"walls": eligibility.walls["clean_description"],
"heating": eligibility.epc["mainheat-description"],
"tenure": eligibility.tenure,
"date_epc": eligibility.epc["lodgement-date"],
**newest_epc,
"cavity_age": cavity_age,
**eligibility.walls,
**eligibility.roof,
}
)
scoring_df = pd.DataFrame(scoring_data)
# Implement the same process that is being used in the recommendation engine to cleaning scoring_df
# Perform the same cleaning as in the model - first clean number of room variables though
scoring_df = DataProcessor.apply_averages_cleaning(
data_to_clean=scoring_df,
cleaning_data=cleaning_data,
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
)
scoring_df = DataProcessor.apply_averages_cleaning(
data_to_clean=scoring_df,
cleaning_data=cleaning_data,
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
).drop(columns=["LOCAL_AUTHORITY"])
scoring_df = DataProcessor.clean_missings_after_description_process(
scoring_df,
ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
"insulation_thickness" in c) or ("ENERGY_EFF" in c)]
)
scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
all_predictions = model_api.predict_all(
df=scoring_df,
bucket="retrofit-data-dev",
prediction_buckets={
"sap_change_predictions": "retrofit-sap-predictions-dev",
"heat_demand_predictions": "retrofit-heat-predictions-dev",
"carbon_change_predictions": "retrofit-carbon-predictions-dev"
}
)
predictions = all_predictions["sap_change_predictions"].copy()
results_df = pd.DataFrame(results)
predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
results_df[["row_id", "sap"]], how="left", on="row_id"
)
predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
results_df = results_df.merge(
predictions[["sap_uplift", "row_id"]],
how="left",
on="row_id"
)
results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
eligibility_assessment = []
for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
# The upgrade requirements are dependent on the current SAP
# If the property is an F or G, it only needs to upgrade to an %
if row["sap"] <= 38:
if row["post_install_sap"] >= 57:
eligibility_classification = "highest confidence"
elif row["post_install_sap"] >= 55:
eligibility_classification = "high confidence"
elif row["post_install_sap"] >= 53:
eligibility_classification = "medium confidence"
else:
eligibility_classification = "unlikely"
else:
if row["post_install_sap"] >= 71:
eligibility_classification = "highest confidence"
elif row["post_install_sap"] >= 69:
eligibility_classification = "high confidence"
elif row["post_install_sap"] >= 67:
eligibility_classification = "medium confidence"
else:
eligibility_classification = "unlikely"
eligibility_assessment.append(
{
"row_id": row["row_id"],
"eligibility_classification": eligibility_classification
}
)
eligibility_assessment = pd.DataFrame(eligibility_assessment)
results_df = results_df.merge(
eligibility_assessment, how="left", on="row_id"
)
return results_df, scoring_data, nodata
def analyse_ha_7(results_df, data):
analysis_data = results_df.merge(
data[["row_id", "row_code", "Property Type", "Construction Year Band"]], how="left", on="row_id"
)
analysis_data["row_code"].value_counts()
# NEW
analysis_data["roof_insulation_thickness"] = np.where(
pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
)
analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
)
ideal_eco4 = analysis_data[
(analysis_data["eco4_eligible"] == True) & (
analysis_data["roof_insulation_thickness_numeric"] <= 100)
]
secondary_eco4_warmfront_not_sold = analysis_data[
(analysis_data["eco4_eligible"] == True) & (
analysis_data["roof_insulation_thickness_numeric"] > 100)
]
# underperforming cavities
underperforming_cavities = analysis_data[
(analysis_data["eco4_message"] == "Failed due to full cavity - check cavity age") & (
analysis_data["cavity_age"] > 9 * 365
) & (analysis_data["roof_insulation_thickness_numeric"] <= 100)
]
identified_gbis_not_sold = analysis_data[
(analysis_data["gbis_eligible"] == True) & (
analysis_data["eco4_eligible"] == False
)
]
wf_identified = analysis_data[
(analysis_data["row_code"] == "potential ECO4")
]
# END NEW
warmfront_identification = analysis_data["row_code"].value_counts()
warmfront_identified = analysis_data[analysis_data["row_code"] == "potential ECO4"]
warmfront_identified["walls"].value_counts(normalize=True)
analysis_data["Construction Year Band"].value_counts(normalize=True)
# Number of days from today
days_to_today = (datetime.now() - pd.to_datetime(warmfront_identified["date_epc"])).dt.days
days_to_today.mean()
property_types = analysis_data["Property Type"].value_counts()
n_identified = (results_df["gbis_eligible"] | results_df["eco4_eligible"]).sum()
eco_identified = results_df[results_df["eco4_eligible"]]
n_eco4 = eco_identified["eco4_eligible"].sum()
gbis_identified = results_df[~results_df["eco4_eligible"] & results_df["gbis_eligible"]]
n_gbis = results_df[~results_df["eco4_eligible"]]["gbis_eligible"].sum()
eco_eligibile = results_df[results_df["eco4_eligible"]]
eco_eligibile["eligibility_classification"].value_counts()
future_possibilities_eco = results_df[
(results_df["eco4_eligible_future"] == True) & (~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
].copy()
future_possibilities_gbis = results_df[
(results_df["gbis_eligible_future"] == True) & (results_df["eco4_eligible_future"] == False) & (
~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
].copy()
total_future_possibilities = future_possibilities_eco.shape[0] + future_possibilities_gbis.shape[0]
def app():
data = load_data()
data["row_id"] = ["ha7" + str(i) for i in range(0, len(data))]
cleaned = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-dev"
)
cleaned = msgpack.unpackb(cleaned, raw=False)
cleaning_data = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
)
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
created_at = datetime.now().isoformat()
results_df, scoring_data, nodata = get_ha7_data(
data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds
)
# Pickle results
# import pickle
# with open("ha7_results_jan_10.pkl", "wb") as f:
# pickle.dump({"results_df": results_df, "scoring_data": scoring_data, "nodata": nodata}, f)
# Read in the old data
# import pickle
# with open("ha7_results_jan_10.pkl", "rb") as f:
# old_data = pickle.load(f)
# results_df = old_data["results_df"]
# scoring_data = old_data["scoring_data"]
# nodata = old_data["nodata"]

View file

@ -766,12 +766,16 @@ class EPCDataProcessor:
how='left' how='left'
) )
global_averages = cleaning_data[cols_to_clean].mean()
# Fill NaN values with averages # Fill NaN values with averages
for col in cols_to_clean: for col in cols_to_clean:
data_to_clean[col].fillna(data_to_clean[f"{col}_AVERAGE"], inplace=True) data_to_clean[col].fillna(data_to_clean[f"{col}_AVERAGE"], inplace=True)
data_to_clean.drop(columns=[f"{col}_AVERAGE"], inplace=True) data_to_clean.drop(columns=[f"{col}_AVERAGE"], inplace=True)
# If we still have missings # If we still have missings
data_to_clean[col].fillna(data_to_clean[col].mean(), inplace=True) data_to_clean[col].fillna(data_to_clean[col].mean(), inplace=True)
# Final step if we still have missings - use global mean
data_to_clean[col].fillna(global_averages[col], inplace=True)
return data_to_clean return data_to_clean

View file

@ -23,6 +23,12 @@ def main():
pd.DataFrame(epc_pipeline.compiled_all_equal_rows).to_parquet("refactor_datasets/all_equal_rows.parquet") pd.DataFrame(epc_pipeline.compiled_all_equal_rows).to_parquet("refactor_datasets/all_equal_rows.parquet")
pd.concat(epc_pipeline.compiled_cleaning_averages).to_parquet("refactor_datasets/cleaning_averages.parquet") pd.concat(epc_pipeline.compiled_cleaning_averages).to_parquet("refactor_datasets/cleaning_averages.parquet")
from utils.s3 import read_dataframe_from_s3_parquet
dataset = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev",
file_key="sap_change_model/dataset_test.parquet",
)
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View file

@ -16,6 +16,7 @@ class MainHeatAttributes(Definitions):
"solar assisted heat pump", "solar assisted heat pump",
"exhaust source heat pump", "exhaust source heat pump",
"community heat pump", "community heat pump",
"portable electric heating"
] ]
FUEL_TYPES = ["electric", "mains gas", "wood logs", "coal", "oil", "wood pellets", "anthracite", FUEL_TYPES = ["electric", "mains gas", "wood logs", "coal", "oil", "wood pellets", "anthracite",
"dual fuel mineral and wood", "smokeless fuel", "lpg", "b30k"] "dual fuel mineral and wood", "smokeless fuel", "lpg", "b30k"]

View file

@ -152,4 +152,7 @@ class WallAttributes(Definitions):
else: else:
result["insulation_thickness"] = "average" result["insulation_thickness"] = "average"
if result["is_cavity_wall"] & result["is_as_built"] & (result["insulation_thickness"] == "average"):
result["is_filled_cavity"] = True
return result return result

View file

@ -52,7 +52,7 @@ class WindowAttributes(Definitions):
raise ValueError('Invalid description') raise ValueError('Invalid description')
def process(self) -> Dict[str, Union[str, bool]]: def process(self) -> Dict[str, Union[str, bool]]:
result: Dict[str, Union[str, bool]] = { result: Dict[str, Union[str, bool, None]] = {
"has_glazing": False, "has_glazing": False,
"glazing_coverage": None, "glazing_coverage": None,
"glazing_type": None, "glazing_type": None,
@ -80,7 +80,11 @@ class WindowAttributes(Definitions):
break break
# If we didn't find any coverage or type, we assume full coverage # If we didn't find any coverage or type, we assume full coverage
if not result["glazing_coverage"]: if (not result["glazing_coverage"]) & (result["glazing_type"] != "single"):
result["glazing_coverage"] = "full" result["glazing_coverage"] = "full"
# We reset some values if the glazing is single
if result["glazing_type"] == "single":
result["has_glazing"] = False
return result return result

View file

@ -1652,4 +1652,17 @@ mainheat_cases = [
'has_electricaire': False, 'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False, 'has_electricaire': False, 'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False,
"has_electric_heat_pumps": False, "has_electric_heat_pumps": False,
"has_micro-cogeneration": False}, "has_micro-cogeneration": False},
{'original_description': 'Portable electric heating assumed for most rooms', 'has_radiators': False,
'has_fan_coil_units': False, 'has_pipes_in_screed_above_insulation': False,
'has_pipes_in_insulated_timber_floor': False, 'has_pipes_in_concrete_slab': False, 'has_boiler': False,
'has_air_source_heat_pump': False, 'has_room_heaters': False, 'has_electric_storage_heaters': False,
'has_warm_air': False, 'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False,
'has_community_scheme': False, 'has_ground_source_heat_pump': False, 'has_no_system_present': False,
'has_portable_electric_heaters': False, 'has_water_source_heat_pump': False, 'has_electric_heat_pump': False,
'has_micro-cogeneration': False, 'has_solar_assisted_heat_pump': False, 'has_exhaust_source_heat_pump': False,
'has_community_heat_pump': False, 'has_portable_electric_heating': True, 'has_electric': True,
'has_mains_gas': False, 'has_wood_logs': False, 'has_coal': False, 'has_oil': False, 'has_wood_pellets': False,
'has_anthracite': False, 'has_dual_fuel_mineral_and_wood': False, 'has_smokeless_fuel': False, 'has_lpg': False,
'has_b30k': False, 'has_assumed': True, 'has_electricaire': False, 'has_assumed_for_most_rooms': True,
'has_underfloor_heating': False}
] ]

View file

@ -550,7 +550,7 @@ wall_cases = [
'is_as_built': False, 'is_cob': False, 'is_assumed': False, 'is_sandstone_or_limestone': False, 'is_as_built': False, 'is_cob': False, 'is_assumed': False, 'is_sandstone_or_limestone': False,
'insulation_thickness': None, 'external_insulation': False, 'internal_insulation': False}, 'insulation_thickness': None, 'external_insulation': False, 'internal_insulation': False},
{'original_description': 'Cavity wall, as built, insulated (assumed)', 'thermal_transmittance': None, {'original_description': 'Cavity wall, as built, insulated (assumed)', 'thermal_transmittance': None,
'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': False, 'is_solid_brick': False, 'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': True, 'is_solid_brick': False,
'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': True, 'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': True,
'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average', 'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average',
'external_insulation': False, 'internal_insulation': False}, 'external_insulation': False, 'internal_insulation': False},
@ -727,7 +727,7 @@ wall_cases = [
'external_insulation': False, 'internal_insulation': False}, 'external_insulation': False, 'internal_insulation': False},
{'original_description': 'Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)', {'original_description': 'Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)',
'thermal_transmittance': None, 'thermal_transmittance': None,
'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': False, 'is_solid_brick': False, 'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': True, 'is_solid_brick': False,
'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': True, 'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': True,
'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average', 'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average',
'external_insulation': False, 'internal_insulation': False}, 'external_insulation': False, 'internal_insulation': False},

View file

@ -30,7 +30,8 @@ windows_cases = [
'glazing_type': 'triple', 'no_data': False}, 'glazing_type': 'triple', 'no_data': False},
{'original_description': 'Gwydrau triphlyg rhannol', 'has_glazing': True, 'glazing_coverage': 'partial', {'original_description': 'Gwydrau triphlyg rhannol', 'has_glazing': True, 'glazing_coverage': 'partial',
'glazing_type': 'triple', 'no_data': False}, 'glazing_type': 'triple', 'no_data': False},
{'original_description': 'Single glazed', 'has_glazing': True, 'glazing_coverage': 'full', 'glazing_type': 'single', {'original_description': 'Single glazed', 'has_glazing': False, 'glazing_coverage': None,
'glazing_type': 'single',
'no_data': False}, 'no_data': False},
{'original_description': 'Some double glazing', 'has_glazing': True, 'glazing_coverage': 'partial', {'original_description': 'Some double glazing', 'has_glazing': True, 'glazing_coverage': 'partial',
'glazing_type': 'double', 'no_data': False}, 'glazing_type': 'double', 'no_data': False},
@ -46,7 +47,8 @@ windows_cases = [
'glazing_type': 'double', 'no_data': False}, 'glazing_type': 'double', 'no_data': False},
{'original_description': 'Gwydrau dwbl gan mwyaf', 'has_glazing': True, 'glazing_coverage': 'most', {'original_description': 'Gwydrau dwbl gan mwyaf', 'has_glazing': True, 'glazing_coverage': 'most',
'glazing_type': 'double', 'no_data': False}, 'glazing_type': 'double', 'no_data': False},
{'original_description': 'Gwydrau sengl', 'has_glazing': True, 'glazing_coverage': 'full', 'glazing_type': 'single', {'original_description': 'Gwydrau sengl', 'has_glazing': False, 'glazing_coverage': None,
'glazing_type': 'single',
'no_data': False}, 'no_data': False},
{'original_description': 'Ffenestri perfformiad uchel', 'has_glazing': True, 'glazing_coverage': 'full', {'original_description': 'Ffenestri perfformiad uchel', 'has_glazing': True, 'glazing_coverage': 'full',
'glazing_type': 'high performance', 'no_data': False}, 'glazing_type': 'high performance', 'no_data': False},

View file

@ -3,12 +3,13 @@ from pathlib import Path
from etl.epc_clean.tests.test_data.test_roof_attributes_cases import clean_roof_test_cases from etl.epc_clean.tests.test_data.test_roof_attributes_cases import clean_roof_test_cases
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
# For local testing # For local testing
if __file__ == "<input>": # if __file__ == "<input>":
input_data_path = Path("./model_data/tests/test_data/EpcClean_inputs.obj") # input_data_path = Path("./model_data/tests/test_data/EpcClean_inputs.obj")
else: # else:
current_file_path = Path(__file__) # current_file_path = Path(__file__)
input_data_path = current_file_path.parent / 'test_data' / 'EpcClean_inputs.obj' # input_data_path = current_file_path.parent / 'test_data' / 'EpcClean_inputs.obj'
class TestRoofAttributes: class TestRoofAttributes:
@ -88,7 +89,12 @@ class TestRoofAttributes:
def test_clean_roof_no_description(self): def test_clean_roof_no_description(self):
roof = RoofAttributes('').process() roof = RoofAttributes('').process()
assert roof == {} assert roof == {
'thermal_transmittance': False, 'thermal_transmittance_unit': False, 'is_pitched': False,
'is_roof_room': False, 'is_loft': False, 'is_flat': False, 'is_thatched': False,
'is_at_rafters': False, 'is_assumed': False, 'has_dwelling_above': False, 'is_valid': False,
'insulation_thickness': False
}
def test_clean_roof_edge_cases(self): def test_clean_roof_edge_cases(self):
# Insulation thickness edge case # Insulation thickness edge case

View file

@ -0,0 +1,244 @@
import pandas as pd
from tqdm import tqdm
from utils.s3 import save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
from utils.logger import setup_logger
logger = setup_logger()
class SolarPhotoSupply:
DATASET_COLUMNS = [
"UPRN", "PROPERTY_TYPE", "TENURE", "BUILT_FORM", "ROOF_DESCRIPTION", "PHOTO_SUPPLY", "TOTAL_FLOOR_AREA",
"CONSTRUCTION_AGE_BAND", "SOLAR_WATER_HEATING_FLAG"
]
def __init__(self, file_directories, cleaned_lookup):
"""
Initialize the SolarPhotoSupply class with file directories and a cleaned lookup. Currently, this class
just works with locally stored data, but this could be extended to work with data stored in S3.
:param file_directories: A list of directories where files are stored.
:param cleaned_lookup: A dictionary containing cleaned lookup data.
"""
self.file_directories = file_directories
self.results = []
self.decile_thresholds = None
self.roof_lookup = pd.DataFrame(cleaned_lookup.get("roof-description"))
self.photo_supply_lookup = pd.DataFrame()
self.floor_area_decile_thresholds = pd.DataFrame()
def create_dataset(self):
"""
Create a dataset from the provided file directories. This method processes the data files,
applies transformations, and aggregates data into a useful format.
"""
if self.roof_lookup.empty:
raise ValueError("No roof lookup data")
results = []
logger.info("Creating solar photo supply dataset")
for dir in tqdm(self.file_directories):
filepath = dir / "certificates.csv"
df = pd.read_csv(filepath, low_memory=False)
df = df[~pd.isnull(df["UPRN"])]
df["UPRN"] = df["UPRN"].astype(int).astype(str)
# Drop rows that have a missing PROPERTY_TYPE, BUILT_FORM, CONSTRUCTION_AGE_BAND, TOTAL_FLOOR_AREA
for col in ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA"]:
df = df[~pd.isnull(df[col])]
# Take newest LODGEMENT_DATE per UPRN
df = df.sort_values(by="LODGEMENT_DATE", ascending=False).drop_duplicates(subset=["UPRN"])
data = df[self.DATASET_COLUMNS].copy()
data["PHOTO_SUPPLY"] = data["PHOTO_SUPPLY"].fillna(0)
data = data[data["PHOTO_SUPPLY"] != 0]
results.append(data)
self.results = pd.concat(results)
# Convert total floor area to deciles
self.decile_thresholds = self.results["TOTAL_FLOOR_AREA"].quantile(
[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
).values
self.results["floor_area_decile"] = pd.cut(
self.results["TOTAL_FLOOR_AREA"],
bins=[0] + list(self.decile_thresholds) + [float('inf')],
labels=False,
include_lowest=True
)
# Convert tenure to lower
self.results["TENURE"] = self.results["TENURE"].str.lower()
self.results = self.results.merge(
self.roof_lookup.drop(
columns=[
"clean_description", "thermal_transmittance", "thermal_transmittance_unit", "insulation_thickness",
"is_assumed"
]
),
left_on="ROOF_DESCRIPTION",
right_on="original_description",
how="left"
)
self.photo_supply_lookup = self.results.groupby(
[
"PROPERTY_TYPE", "BUILT_FORM", "TENURE", "is_pitched", "is_roof_room", "is_flat",
"CONSTRUCTION_AGE_BAND", "floor_area_decile"
],
observed=True
).agg(
{
"PHOTO_SUPPLY": ["median", "mean"],
}
).reset_index()
self.photo_supply_lookup.columns = ['_'.join(col).strip() for col in self.photo_supply_lookup.columns.values]
# Remove trailing underscore from columns
self.photo_supply_lookup.columns = [
col[:-1] if col.endswith("_") else col for col in self.photo_supply_lookup.columns.values
]
# Convert columns to lowercase
self.photo_supply_lookup.columns = [col.lower() for col in self.photo_supply_lookup.columns.values]
self.floor_area_decile_thresholds = pd.DataFrame(
self.decile_thresholds,
columns=["floor_area_decile_thresholds"]
)
@staticmethod
def classify_floor_area(new_area, thresholds):
"""
Classify a given floor area into a decile based on provided thresholds.
:param new_area: The new floor area to be classified.
:param thresholds: A list of thresholds used for classification.
:return: An integer representing the decile index.
"""
for i, threshold in enumerate(thresholds):
if new_area <= threshold:
return i # Returns the decile index (0 to 9)
return len(thresholds)
def save(self):
"""
Save the processed data to an S3 bucket in the parquet format. This method also handles
logging and validation to ensure data is present before saving.
"""
if self.photo_supply_lookup.empty:
raise ValueError("No data to save")
logger.info("Storing outputs to S3")
# Store this data in s3 as a parquet file
save_dataframe_to_s3_parquet(
df=self.photo_supply_lookup,
bucket_name="retrofit-data-dev",
file_key="solar_pv_supply/photo_supply_lookup.parquet",
)
save_dataframe_to_s3_parquet(
df=self.floor_area_decile_thresholds,
bucket_name="retrofit-data-dev",
file_key=f"solar_pv_supply/floor_area_decile_thresholds.parquet",
)
@staticmethod
def load(bucket):
"""
Load datasets from an S3 bucket.
:param bucket: The name of the S3 bucket to load data from.
:return: A tuple containing photo supply lookup and floor area decile thresholds dataframes.
"""
photo_supply_lookup = read_dataframe_from_s3_parquet(
bucket_name=bucket, file_key="solar_pv_supply/photo_supply_lookup.parquet",
)
floor_area_decile_thresholds = read_dataframe_from_s3_parquet(
bucket_name=bucket, file_key="solar_pv_supply/floor_area_decile_thresholds.parquet",
)
return photo_supply_lookup, floor_area_decile_thresholds
@classmethod
def filter_photo_supply_lookup(
cls,
photo_supply_lookup: pd.DataFrame,
floor_area_decile_thresholds: pd.DataFrame,
tenure: str,
built_form: str,
property_type: str,
construction_age_band: str,
is_flat: bool,
is_pitched: bool,
is_roof_room: bool,
floor_area: float
):
"""
Filter the photo supply lookup to find the most appropriate photo supply for a given property.
:param photo_supply_lookup: The photo supply lookup dataframe.
:param floor_area_decile_thresholds: The floor area decile thresholds dataframe.
:param tenure: The tenure of the property.
:param built_form: The built form of the property.
:param property_type: The property type of the property.
:param construction_age_band: The construction age band of the property.
:param is_flat: Whether the property has a flat roof.
:param is_pitched: Whether the property has a pitched roof.
:param is_roof_room: Whether the property has a roof room.
:param floor_area: The floor area of the property.
:return:
"""
# Convert the tenure to lower case, as is done in the creation of the dataset
tenure = tenure.lower()
# We remap the "not defined"
tenure = {
"not defined - use in the case of a new dwelling for which the intended tenure in not known. it is not to "
"be used for an existing dwelling":
"not defined - use in the case of a new dwelling for which the intended tenure in not known. it is no"
}.get(tenure, tenure)
photo_supply_matched = photo_supply_lookup[
(photo_supply_lookup["tenure"] == tenure) &
(photo_supply_lookup["built_form"] == built_form) &
(photo_supply_lookup["property_type"] == property_type) &
(photo_supply_lookup["construction_age_band"] == construction_age_band) &
(photo_supply_lookup["is_flat"] == is_flat) &
(photo_supply_lookup["is_pitched"] == is_pitched) &
(photo_supply_lookup["is_roof_room"] == is_roof_room)
]
if photo_supply_matched.empty:
# There are a small number of cases where we don't get a full match so try again with a more aggregated
# average
photo_supply_matched = photo_supply_lookup[
(photo_supply_lookup["tenure"] == tenure) &
(photo_supply_lookup["built_form"] == built_form) &
(photo_supply_lookup["property_type"] == property_type)
]
if construction_age_band in photo_supply_matched["construction_age_band"].values:
photo_supply_matched = photo_supply_matched[
photo_supply_matched["construction_age_band"] == construction_age_band
]
if photo_supply_matched.empty:
raise ValueError("No photo supply matches")
floor_area_decile = cls.classify_floor_area(
floor_area, floor_area_decile_thresholds["floor_area_decile_thresholds"].values
)
if floor_area_decile in photo_supply_matched["floor_area_decile"].values:
photo_supply_matched = photo_supply_matched[
photo_supply_matched["floor_area_decile"] == floor_area_decile
]
return photo_supply_matched

31
etl/solar/app.py Normal file
View file

@ -0,0 +1,31 @@
from pathlib import Path
from etl.epc.property_change_app import get_cleaned
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"
def app():
"""
This code reads in the EPC data and attempt to produce a reasonable figure for the photo-supply variable, which
is the following:
"Percentage of photovoltaic area as a percentage of total roof area. 0% indicates that a Photovoltaic Supply
is not present in the property."
When recommending solar, we want to simulate the retrofit by increasing this value from 0, so we need a sensible
figure to increase this to. This script will pull the data for that, to allow us to try and deduce what
a sensible figure would be
:return:
"""
directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
cleaned_lookup = get_cleaned()
solar_data_client = SolarPhotoSupply(
file_directories=directories,
cleaned_lookup=cleaned_lookup
)
solar_data_client.create_dataset()
solar_data_client.save()

View file

@ -0,0 +1,109 @@
import unittest
import pandas as pd
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
class TestSolarPhotoSupply(unittest.TestCase):
def setUp(self):
# Mock data for photo_supply_lookup and floor_area_decile_thresholds
self.photo_supply_lookup = pd.DataFrame({
"tenure": ["leasehold", "freehold"],
"built_form": ["detached", "semi-detached"],
"property_type": ["house", "flat"],
"construction_age_band": ["pre-1900", "1900-1929"],
"is_flat": [False, True],
"is_pitched": [True, False],
"is_roof_room": [False, True],
"floor_area_decile": [0, 1],
"photo_supply": [100, 200]
})
self.floor_area_decile_thresholds = pd.DataFrame({
"floor_area_decile_thresholds": [50, 100]
})
self.solar_photo_supply = SolarPhotoSupply([], {})
def test_correct_filtering(self):
result = self.solar_photo_supply.filter_photo_supply_lookup(
self.photo_supply_lookup,
self.floor_area_decile_thresholds,
"leasehold",
"detached",
"house",
"pre-1900",
False,
True,
False,
45
)
self.assertEqual(len(result), 1)
self.assertEqual(result.iloc[0]["photo_supply"], 100)
def test_no_matches(self):
with self.assertRaises(ValueError):
self.solar_photo_supply.filter_photo_supply_lookup(
self.photo_supply_lookup,
self.floor_area_decile_thresholds,
"leasehold",
"unknown",
"house",
"pre-1900",
False,
True,
False,
45
)
def test_floor_area_decile_matching(self):
result = self.solar_photo_supply.filter_photo_supply_lookup(
self.photo_supply_lookup,
self.floor_area_decile_thresholds,
"freehold",
"semi-detached",
"flat",
"1900-1929",
True,
False,
True,
60
)
self.assertEqual(len(result), 1)
self.assertEqual(result.iloc[0]["photo_supply"], 200)
def test_invalid_parameters(self):
with self.assertRaises(AttributeError):
self.solar_photo_supply.filter_photo_supply_lookup(
self.photo_supply_lookup,
self.floor_area_decile_thresholds,
123, # Invalid type for tenure
"detached",
"house",
"pre-1900",
False,
True,
False,
45
)
def test_classify_floor_area(self):
# Setup
thresholds = [10, 20, 30, 40, 50]
solar_photo_supply = SolarPhotoSupply([], {})
# Test Case 1: Valid floor area
floor_area = 25
expected_decile = 2
result = solar_photo_supply.classify_floor_area(floor_area, thresholds)
self.assertEqual(result, expected_decile, "Decile classification did not match expected result")
# Test Case 2: Out of range floor area
floor_area = 60
expected_decile = len(thresholds)
result = solar_photo_supply.classify_floor_area(floor_area, thresholds)
self.assertEqual(result, expected_decile, "Decile classification for out of range value is incorrect")
if __name__ == '__main__':
unittest.main()

View file

@ -0,0 +1,194 @@
from pathlib import Path
from random import choices, sample
import os
import pandas as pd
from tqdm import tqdm
from dotenv import load_dotenv
from utils.logger import setup_logger
from backend.SearchEpc import SearchEpc, vartypes
from BaseUtility import Definitions
from etl.epc.settings import BUILT_FORM_REMAP
ENV_FILE = Path(__file__).parent / "backend" / ".env"
logger = setup_logger()
DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"
DIR_SAMPLE_SIZE = 500
N_DIRECTORIES = 50
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
load_dotenv(ENV_FILE)
CATETORICALS_TO_IGNORE = [
"postcode", "constituency", "local-authority", "built-form", "property-type", "address1", "constituency-label",
"building-reference-number", "address2", "posttown", "transaction-type", "lmk-key", "address3",
"local-authority-label", "county",
]
def check_numeric_performance(estimated_value, actual_value):
# If we don't have anything to compare against, return None
if pd.isnull(actual_value):
return None
if pd.isnull(estimated_value):
return 1
if actual_value == 0 and estimated_value == 0:
return 0
if actual_value == 0 and estimated_value != 0:
return 1
return abs(estimated_value - actual_value) / actual_value
def app():
"""
This script is used to test the EPC estimation process.
"""
numerical_vartypes = {key: value for key, value in vartypes.items() if value in ["float", "Int64"]}
str_var_types = {key: value for key, value in vartypes.items() if value == "str"}
# Make sure we have missed any keys
if len(numerical_vartypes) + len(str_var_types) != len(vartypes):
raise ValueError("Not all vartypes have been accounted for")
# Drop some keys that aren't important
for k in CATETORICALS_TO_IGNORE:
str_var_types.pop(k, None)
directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
directory_sample = choices(directories, k=N_DIRECTORIES)
results = []
for directory in tqdm(directory_sample):
filepath = directory / "certificates.csv"
df = pd.read_csv(filepath, low_memory=False)
df["UPRN"] = df["UPRN"].astype("Int64").astype("str")
df = df[~pd.isnull(df["UPRN"])]
# uprn_sample = sample(df["UPRN"].unique().tolist(), DIR_SAMPLE_SIZE)
# Take a fixed sample based on the first DIR_SAMPLE_SIZE uprns
uprn_sample = sorted(df["UPRN"].unique().tolist())[:DIR_SAMPLE_SIZE]
df_sample = df[df["UPRN"].isin(uprn_sample)]
# Take the record with the newest LODGEMENT_DATETIME by uprn
df_sample = df_sample.sort_values("LODGEMENT_DATETIME", ascending=False).drop_duplicates("UPRN")
# Convert the columns to lower case and replace underscores with hyphens, the same as the api
df_sample.columns = df_sample.columns.str.lower().str.replace("_", "-")
# For each epc, we test the estimation process
for _, epc in df_sample.iterrows():
epc = epc.to_dict()
address1 = epc["address1"]
postcode = epc["postcode"]
# Get all EPCs for this urpn and we make sure they get dropped from the estimate_epc function
epcs_for_uprn = df[df["UPRN"] == epc["uprn"]]
lmks_to_drop = epcs_for_uprn["LMK_KEY"].tolist()
searcher = SearchEpc(address1, postcode, auth_token=EPC_AUTH_TOKEN, os_api_key="")
searcher.uprn = epc["uprn"]
# Perform the same remapping for built-form as in the Property class for this test, in case we get (e.g.)
# Enclosed End-Terrace
built_form = BUILT_FORM_REMAP.get(epc["built-form"], epc["built-form"])
if ((epc["property-type"] == "Maisonette") & (built_form == "Detached")) or (
built_form in Definitions.DATA_ANOMALY_MATCHES
):
built_form = ""
estimated_epc = searcher.estimate_epc(
property_type=epc["property-type"], built_form=built_form, lmks_to_drop=lmks_to_drop
)
# We now compare the difference between the estimated and original
# TODO: We can convert windows and lighting to numeric versions and estimate how close we are
numeric_performance = {
key: check_numeric_performance(estimated_epc[key], epc[key]) for key, value in
numerical_vartypes.items()
}
# Remove Nones
numeric_performance = {key: value for key, value in numeric_performance.items() if value is not None}
# Get an average
numeric_performance = sum(numeric_performance.values()) / len(numeric_performance)
numeric_success = 1 - numeric_performance
# categorical performance
categorical_performance = {
key: 0 if estimated_epc[key] != epc[key] else 1 for key, value in str_var_types.items()
}
# Get an average
categorical_success = sum(categorical_performance.values()) / len(categorical_performance)
results.append(
{
"uprn": epc["uprn"],
"numeric_success": numeric_success,
"categorical_success": categorical_success,
"property_type": epc["property-type"],
"built_form": epc["built-form"],
"tenure": epc["tenure"],
}
)
# Get aggregate performance figures
results_df = pd.DataFrame(results)
results_df["tenure"] = results_df["tenure"].replace("Rented (social)", "rental (social)")
avg_numeric_succes = results_df["numeric_success"].median()
avg_categorical_sucess = results_df["categorical_success"].median()
# With 20 nearest homes
# 0.7718100840549558
# 0.5116279069767442
# 100 nearest homes
# 0.7859617377809409
# 0.5348837209302325
# Fixed sample, sqrt weights
# Group by tenure
by_tenure = results_df.groupby("tenure").agg(
{"numeric_success": "median", "categorical_success": "median", "uprn": "count"}
)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
# With 20 nearest homes
# numeric_success categorical_success uprn
# tenure
# NO DATA! 0.847840 0.581395 278
# Not defined - use in the case of a new dwelling... 0.930282 0.651163 617
# Owner-occupied 0.770330 0.511628 2588
# Rented (private) 0.791885 0.558140 1232
# owner-occupied 0.741088 0.488372 10912
# rental (private) 0.749064 0.488372 3252
# rental (social) 0.822109 0.581395 3878
# unknown 0.895840 0.627907 1820
# 100 nearest homes
# tenure
# NO DATA! 0.899566 0.604651 233
# Not defined - use in the case of a new dwelling... 0.927518 0.674419 608
# Owner-occupied 0.777026 0.511628 3167
# Rented (private) 0.805646 0.534884 1316
# owner-occupied 0.762180 0.488372 10835
# rental (private) 0.760503 0.511628 3181
# rental (social) 0.830057 0.604651 3705
# unknown 0.899948 0.627907 1571
# By property type - we also want to see how many properties we have for each property type
by_property_type = results_df.groupby("property_type").agg(
{"numeric_success": "median", "categorical_success": "median", "uprn": "count"}
)
# By property_type & built form
by_property_type_built_form = results_df.groupby(["property_type", "built_form"]).agg(
{"numeric_success": "median", "categorical_success": "median", "uprn": "count"}
)

View file

@ -0,0 +1,42 @@
"""
This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
testing
"""
import pandas as pd
from utils.s3 import save_csv_to_s3
USER_ID = 8
PORTFOLIO_ID = 57
def app():
"""
This portfolio is for testing windows recommendations
:return:
"""
test_file = pd.DataFrame(
[
{"address": "21 Butler House", "postcode": "E2 0PN", "Notes": None},
{"address": "22 Butler House", "postcode": "E2 0PN", "Notes": None},
{"address": "23 Butler House", "postcode": "E2 0PN", "Notes": None},
{"address": "24 Butler House", "postcode": "E2 0PN", "Notes": None},
]
)
# Store the data in s3
filename = f"{USER_ID}/{PORTFOLIO_ID}/no_epc.csv"
save_csv_to_s3(
dataframe=test_file,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Social",
"goal": "Increase EPC",
"goal_value": "A",
"trigger_file_path": filename
}
print(body)

View file

@ -0,0 +1,43 @@
"""
This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
testing
"""
import pandas as pd
from utils.s3 import save_csv_to_s3
USER_ID = 8
PORTFOLIO_ID = 56
def app():
"""
This portfolio is for testing windows recommendations
:return:
"""
test_file = pd.DataFrame(
[
{"address": "3 Church Terrace", "postcode": "LE13 0PW", "Notes": None},
{"address": "3, Main Street, Redmile", "postcode": "NG13 0GA", "Notes": None},
{"address": "Manor House, Kennel Lane, Reepham", "postcode": "LN3 4DZ", "Notes": None},
{"address": "13 Main Street", "postcode": "LE14 2JU", "Notes": None},
{"address": "8 The Crescent, Coston Road, Buckminster", "postcode": "NG33 5SF", "Notes": None},
]
)
# Store the data in s3
filename = f"{USER_ID}/{PORTFOLIO_ID}/windows_portfolio_inputs.csv"
save_csv_to_s3(
dataframe=test_file,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Social",
"goal": "Increase EPC",
"goal_value": "A",
"trigger_file_path": filename
}
print(body)

View file

@ -18,6 +18,25 @@ regional_labour_variations = [
{"Region": "Northern Ireland", "Adjustment_Factor": 0.76} {"Region": "Northern Ireland", "Adjustment_Factor": 0.76}
] ]
# This data is based on the MCS database
MCS_SOLAR_PV_COST_DATA = {
"last_updated": "2024-01-04",
"average_cost_per_kwh": 2013.94,
"average_cost_per_kwh-Outer London": 2618.75,
"average_cost_per_kwh-Inner London": 2618.75,
"average_cost_per_kwh-South East England": 2083.33,
"average_cost_per_kwh-South West England": 2113,
"average_cost_per_kwh-East of England": 1973.86,
"average_cost_per_kwh-East Midlands": 1981.86,
"average_cost_per_kwh-West Midlands": 1926.55,
"average_cost_per_kwh-North East England": 2028.49,
"average_cost_per_kwh-North West England": 1620.42,
"average_cost_per_kwh-Yorkshire and the Humber": 2060.9,
"average_cost_per_kwh-Wales": 1898.83,
"average_cost_per_kwh-Scotland": 1967.97,
"average_cost_per_kwh-Northern Ireland": 2126.09,
}
class Costs: class Costs:
""" """
@ -42,7 +61,7 @@ class Costs:
# We use a higher contingency rate for internal wall insulation because of the potential for issues with moving # We use a higher contingency rate for internal wall insulation because of the potential for issues with moving
# fittings and trimming doors, as well as scope for damage to the existing wall during preparation. # fittings and trimming doors, as well as scope for damage to the existing wall during preparation.
IWI_CONTINGENCY = 0.15 IWI_CONTINGENCY = 0.2
# Where there is more uncertainty, a higher contingency rate is used # Where there is more uncertainty, a higher contingency rate is used
HIGH_RISK_CONTINGENCY = 0.2 HIGH_RISK_CONTINGENCY = 0.2
@ -58,12 +77,22 @@ class Costs:
# have a preliminaries of 12-14% so we use 12% as the median for the preliminaries rate. # have a preliminaries of 12-14% so we use 12% as the median for the preliminaries rate.
# For External wall insulation (EWI), we use 15% as the preliminaries rate if we think the property might # For External wall insulation (EWI), we use 15% as the preliminaries rate if we think the property might
# need scaffolding, otherwise we use 12%. This is to account for any site preparation that might be required # need scaffolding, otherwise we use 12%. This is to account for any site preparation that might be required
EWI_NO_SCAFFOLDING_PRELIMINARIES = 0.15 EWI_NO_SCAFFOLDING_PRELIMINARIES = 0.2
EWI_SCAFFOLDING_PRELIMINARIES = 0.20 EWI_SCAFFOLDING_PRELIMINARIES = 0.25
VAT_RATE = 0.2 VAT_RATE = 0.2
PROFIT_MARGIN = 0.2 PROFIT_MARGIN = 0.2
# Based on this greenmatch article, on average, a Sash window is around 50% more expensive than a casement window.
# Therefore, for a conservative cost estimate, and allowance for a more premium window type, we inflate the material
# cost of the windows to allow for a sash window type
# https://www.greenmatch.co.uk/windows/double-glazing/cost
SASH_WINDOW_INFLATION_FACTOR = 1.5
# Typically, secondary glazing can be installed for 25% of the cost of double glazed windows - to be conservative,
# we scale the cost by half
SECONDARY_GLAZING_SCALING_FACTOR = 0.5
def __init__(self, property_instance): def __init__(self, property_instance):
""" """
Initializes the Costs class with a property instance. Initializes the Costs class with a property instance.
@ -147,12 +176,16 @@ class Costs:
""" """
material_cost_per_m2 = material["material_cost"] material_cost_per_m2 = material["material_cost"]
# We inflate material costs due to recent price increases
material_cost_per_m2 = material_cost_per_m2 * 1.5
base_material_cost = material_cost_per_m2 * floor_area base_material_cost = material_cost_per_m2 * floor_area
labour_cost = material["labour_cost"] * floor_area * self.labour_adjustment_factor labour_cost = material["labour_cost"] * floor_area * self.labour_adjustment_factor
subtotal_before_profit = base_material_cost + labour_cost subtotal_before_profit = base_material_cost + labour_cost
contingency_cost = subtotal_before_profit * self.CONTINGENCY # We use high risk contingency because of the possibility of access issues and clearing existing insulation
contingency_cost = subtotal_before_profit * self.HIGH_RISK_CONTINGENCY
preliminaries_cost = subtotal_before_profit * self.PRELIMINARIES preliminaries_cost = subtotal_before_profit * self.PRELIMINARIES
profit_cost = subtotal_before_profit * self.PROFIT_MARGIN profit_cost = subtotal_before_profit * self.PROFIT_MARGIN
@ -719,3 +752,121 @@ class Costs:
"labour_days": labour_days, "labour_days": labour_days,
"labour_cost": labour_costs "labour_cost": labour_costs
} }
def window_glazing(self, number_of_windows, material, is_secondary_glazing=False):
"""
We characterise the jobs to be done for window glazing as the following:
1) Initial Assessment and Measurements: Before removing the existing window, it's essential to assess the
condition of the window frame and opening. Precise measurements are taken to ensure the new double glazed
windows fit perfectly.
2) Remove the Existing Window: This involves carefully dismantling and removing the old single glazed window. It
requires skill to avoid damaging the surrounding wall and the window frame (if it's to be reused).
3) Dispose of the Existing Window: The old window, especially if it's a single glazed unit, needs to be
disposed of responsibly. Glass and other materials should be recycled where possible.
4) Surface Preparation: The window opening might need some preparation, especially if there's damage or if
adjustments are needed to accommodate the new window. This can include repairing or replacing parts of the
window frame, sealing gaps, and ensuring the opening is level and square.
5) Install the Window Frame (if new frames are used): In many cases, double glazed windows come with their
frames. These need to be installed securely into the window opening. This process involves aligning, leveling,
and fixing the frame in place.
6) Install the Window Sill: If a new window sill is required, it is installed at this stage. It needs to be
correctly aligned with the frame and securely attached.
7) Install the Double Glazed Glass Units: The glass units are carefully inserted into the frame. This step
requires precision to ensure a snug fit without causing stress on the glass, which could lead to cracking or
breaking.
8) Sealing and Weatherproofing: After the glass units are in place, it's crucial to seal around the frame and
between the glass and frame to ensure there are no drafts and that the installation is weather-tight. This
typically involves applying silicone sealant or other appropriate sealing materials.
9) Finishing Touches: This includes any cosmetic work, such as trimming, painting, or staining the frame and
sill to match the rest of the property. It might also involve cleaning up any mess created during the
installation.
10) Inspection and Testing: Finally, the new windows should be inspected to ensure they open, close, and lock
correctly. This is also a good time to check for any gaps or issues with the sealing.
For this cost estimation process, we factor in initial assement into the preliminaries
"""
material_cost = material["material_cost"] * number_of_windows
labour_cost = (
material["labour_cost"] * number_of_windows * self.labour_adjustment_factor
)
multiplier = self.SECONDARY_GLAZING_SCALING_FACTOR if is_secondary_glazing else (
self.SASH_WINDOW_INFLATION_FACTOR)
subtotal = (material_cost + labour_cost) * multiplier
contingency_cost = subtotal * self.CONTINGENCY
preliminaries_cost = subtotal * self.PRELIMINARIES
profit_cost = subtotal * self.PROFIT_MARGIN
subtotal_before_vat = subtotal + contingency_cost + preliminaries_cost + profit_cost
vat_cost = subtotal_before_vat * self.VAT_RATE
total_cost = subtotal_before_vat + vat_cost
labour_hours = material["labour_hours_per_unit"] * number_of_windows
labour_hours = labour_hours * self.SECONDARY_GLAZING_SCALING_FACTOR if is_secondary_glazing else labour_hours
# Assume a team of 2
labour_days = (labour_hours / 8) / 2
return {
"total": total_cost,
"subtotal": subtotal_before_vat,
"vat": vat_cost,
"contingency": contingency_cost,
"preliminaries": preliminaries_cost,
"material": material_cost,
"profit": profit_cost,
"labour_hours": labour_hours,
"labour_cost": labour_cost,
"labour_days": labour_days
}
def solar_pv(self, wattage: float):
"""
Calculates the total cost for solar PV based data provided by the MCS dashboard, which contains
costing data for installations of renewable and clean energy measures.
The data in the dashboard is filtered on domestic building installations and then the data across the
various regions is manually collected. There is currently no automated way to get the data from the MCS
dashboard
Price can also be benchmarked against this checkatrade article:
https://www.checkatrade.com/blog/cost-guides/cost-of-solar-panel-installation/
:param wattage: Peak wattage of the solar PV system
:return:
"""
# Get the cost data relevant to the region
regional_cost = MCS_SOLAR_PV_COST_DATA["-".join(["average_cost_per_kwh", self.region])]
kw = wattage / 1000
total_cost = kw * regional_cost
subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
vat = total_cost - subtotal_before_vat
# Labour hours are based on estimates from online research but an average team seems to consist of 3 people
# and most jobs take around 2 days. Assuming an 8 hour day for 3 people across 2 days, gives us 72 hours of
# labour
return {
"total": total_cost,
"subtotal": subtotal_before_vat,
"vat": vat,
"labour_hours": 72,
"labour_days": 2,
}

View file

@ -6,6 +6,8 @@ from recommendations.RoofRecommendations import RoofRecommendations
from recommendations.VentilationRecommendations import VentilationRecommendations from recommendations.VentilationRecommendations import VentilationRecommendations
from recommendations.FireplaceRecommendations import FireplaceRecommendations from recommendations.FireplaceRecommendations import FireplaceRecommendations
from recommendations.LightingRecommendations import LightingRecommendations from recommendations.LightingRecommendations import LightingRecommendations
from recommendations.SolarPvRecommendations import SolarPvRecommendations
from recommendations.WindowsRecommendations import WindowsRecommendations
from backend.ml_models.AnnualBillSavings import AnnualBillSavings from backend.ml_models.AnnualBillSavings import AnnualBillSavings
@ -35,6 +37,8 @@ class Recommendations:
) )
self.fireplace_recommender = FireplaceRecommendations(property_instance=property_instance) self.fireplace_recommender = FireplaceRecommendations(property_instance=property_instance)
self.lighting_recommender = LightingRecommendations(property_instance=property_instance, materials=materials) self.lighting_recommender = LightingRecommendations(property_instance=property_instance, materials=materials)
self.windows_recommender = WindowsRecommendations(property_instance=property_instance, materials=materials)
self.solar_recommender = SolarPvRecommendations(property_instance=property_instance)
def recommend(self): def recommend(self):
@ -77,6 +81,16 @@ class Recommendations:
if self.lighting_recommender.recommendation: if self.lighting_recommender.recommendation:
property_recommendations.append(self.lighting_recommender.recommendation) property_recommendations.append(self.lighting_recommender.recommendation)
# Windows recommendations
self.windows_recommender.recommend()
if self.windows_recommender.recommendation:
property_recommendations.append(self.windows_recommender.recommendation)
# Solar recommendations
self.solar_recommender.recommend()
if self.solar_recommender.recommendation:
property_recommendations.append(self.solar_recommender.recommendation)
# We insert temporary ids into the recommendations which is important for the optimiser later # We insert temporary ids into the recommendations which is important for the optimiser later
property_recommendations = self.insert_temp_recommendation_id(property_recommendations) property_recommendations = self.insert_temp_recommendation_id(property_recommendations)
@ -148,6 +162,8 @@ class Recommendations:
# For the moment, we cap the number of SAP points that can be achieved by ventilation at 2 # For the moment, we cap the number of SAP points that can be achieved by ventilation at 2
rec["sap_points"] = min(rec["sap_points"], VentilationRecommendations.SAP_LIMIT) rec["sap_points"] = min(rec["sap_points"], VentilationRecommendations.SAP_LIMIT)
# Round to 2 decimal places
rec["sap_points"] = round(rec["sap_points"], 2)
rec["co2_equivalent_savings"] = float(property_instance.data["co2-emissions-current"]) - new_carbon rec["co2_equivalent_savings"] = float(property_instance.data["co2-emissions-current"]) - new_carbon
# Energy consumption current is per meter squared, so we need to multiply by the floor area to get # Energy consumption current is per meter squared, so we need to multiply by the floor area to get

View file

@ -0,0 +1,65 @@
import numpy as np
from recommendations.Costs import Costs
class SolarPvRecommendations:
# Approximate area of the solar panels
SOLAR_PANEL_AREA = 1.6
# Wattage per panel
SOLAR_PANEL_WATTAGE = 360
def __init__(self, property_instance):
"""
:param property_instance: Instance of the Property class, for the home associated to property_id
"""
self.property = property_instance
self.costs = Costs(self.property)
self.recommendation = []
def recommend(self):
"""
We check if a property is potentially suitable for solar PV based on the following criteria:
- The property is a house or bungalow
- The property has a flat or pitched roof
- The property does not have existing solar pv
:return:
"""
is_valid_property_type = self.property.data["property-type"] in ["House", "Bungalow"]
is_valid_roof_type = (
self.property.roof["is_flat"] or self.property.roof["is_pitched"] or self.property.roof["is_roof_room"]
)
# If there is no existing solar PV, the photo-supply field will be None or a missing value
has_no_existing_solar_pv = self.property.data["photo-supply"] in [
None, 0, self.property.DATA_ANOMALY_MATCHES
]
if not is_valid_property_type or not is_valid_roof_type or not has_no_existing_solar_pv:
return
# We now have a property which is potentially suitable for solar PV
number_solar_panels = np.floor(self.property.solar_pv_roof_area / self.SOLAR_PANEL_AREA)
solar_panel_wattage = number_solar_panels * self.SOLAR_PANEL_WATTAGE
# Given the wattage, we estimate the cost of the solar PV system. This is based on the MCS database
# of solar PV installations
cost_result = self.costs.solar_pv(wattage=solar_panel_wattage)
kw = int(np.round(solar_panel_wattage / 1000))
self.recommendation = [
{
"parts": [],
"type": "solar_pv",
"description": f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) panel system on the roof",
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
**cost_result,
# This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we scale
# back up here
"photo_supply": 100 * self.property.solar_pv_percentage
}
]

View file

@ -0,0 +1,97 @@
from typing import List
import numpy as np
from backend.Property import Property
from recommendations.Costs import Costs
class WindowsRecommendations:
# If the property has existing glazing, we scale down the number of windows that need to be glazed
COVERAGE_MAP = {
# If most of the windows have already been glazed, we assume that 2/3 are glazed and 1/2 are remaining to be
# glazed
"most": 0.33,
# If glazing is partial, we assume 50/50 split between glazed and unglazed
"partial": 0.5
}
def __init__(self, property_instance: Property, materials: List):
self.property = property_instance
self.costs = Costs(self.property)
self.recommendation = []
self.glazing_material = [
material for material in materials if material["type"] == "windows_glazing"
]
if len(self.glazing_material) != 1:
raise ValueError("There should only be one window glazing material")
self.glazing_material = self.glazing_material[0]
def recommend(self):
"""
This method will recommend the best possible glazing options for a property.
In order to do this, we need to estimate the number of windows that the home has. This information will be
stored in the property object, under property.number_of_windows
:return:
"""
# If the property is in a conservation area or is a listed building, it becomes more difficult to install
# double glazing. Therefore, we don't recommend it. It is still possible but is not practical as it
# requires planning permission and might require a more expensive window type, such as timber.
number_of_windows = self.property.number_of_windows
is_secondary_glazing = self.property.restricted_measures or (
self.property.windows["glazing_type"] == "secondary"
)
if not number_of_windows:
raise ValueError("Number of windows not specified")
if self.property.windows["has_glazing"] & (self.property.windows["glazing_coverage"] == "full"):
return
# We scale the number of windows based on the proportion of existing glazing
if self.property.data["multi-glaze-proportion"] != "":
n_windows_scalar = 1 - (int(self.property.data["multi-glaze-proportion"]) / 100)
else:
n_windows_scalar = self.COVERAGE_MAP.get(self.property.windows["glazing_coverage"], 1)
number_of_windows *= n_windows_scalar
number_of_windows = np.ceil(number_of_windows)
# We then price the job based on the number of windows that there are
cost_result = self.costs.window_glazing(
number_of_windows=number_of_windows,
material=self.glazing_material,
is_secondary_glazing=is_secondary_glazing
)
glazing_type = "secondary glazing" if is_secondary_glazing else "double glazing"
if self.property.windows["glazing_coverage"] in ["partial", "most"]:
description = f"Install {glazing_type} to the remaining windows"
else:
description = f"Install {glazing_type} to all windows"
if self.property.is_listed:
description += ". Secondary glazing recommended due to listed building status"
elif self.property.is_heritage:
description += ". Secondary glazing recommended due to herigate building status"
elif self.property.in_conservation_area:
description += ". Secondary glazing recommended due to conservation area status"
self.recommendation = [
{
"parts": [],
"type": "windows_glazing",
"description": description,
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
**cost_result,
"is_secondary_glazing": is_secondary_glazing
}
]

View file

@ -1,4 +1,5 @@
import math import math
from datetime import datetime
from copy import deepcopy from copy import deepcopy
from typing import Union from typing import Union
@ -565,7 +566,7 @@ def estimate_external_wall_area(num_floors, floor_height, perimeter, built_form)
'Detached': 4, 'Detached': 4,
} }
exposed_wall_area = total_wall_area * (number_exposed_walls[built_form] / 4) exposed_wall_area = total_wall_area * (number_exposed_walls.get(built_form, 3) / 4)
return exposed_wall_area return exposed_wall_area
@ -669,3 +670,87 @@ def esimtate_pitched_roof_area(floor_area: float, floor_height: float) -> float:
area = 2 * (slope * wall_width) area = 2 * (slope * wall_width)
return area return area
def estimate_windows(
property_type, built_form, construction_age_band, floor_area, number_habitable_rooms, extension_count
):
# Base window count based on habitable rooms
window_count = number_habitable_rooms
# Additional windows for non-habitable rooms (e.g., kitchen, bathroom)
# Assuming most houses will have at least one kitchen and one bathroom
# Scale non-habitable windows with the number of habitable rooms
non_habitable_base = 2 # Base for kitchen and bathroom
extra_non_habitable = max(0, (number_habitable_rooms - 3) // 2) # Extra for large houses
window_count += non_habitable_base + extra_non_habitable
# Adjustments based on built form and property type
if property_type in ["House", "Bungalow"] and built_form in ["Semi-Detached", "Detached"]:
built_form_lookup = {
"Semi-Detached": 3,
"Detached": 4,
}
else:
# For Flats and Maisonettes, adjustments might be less
built_form_lookup = {
"Mid-Terrace": 0,
"End-Terrace": 1,
"Semi-Detached": 1,
"Detached": 2,
}
window_count += built_form_lookup.get(built_form, 0)
# Adjust for floor area (larger floor area might indicate more rooms/windows)
if floor_area < 85: # Small to medium properties
# Standard window count likely sufficient
pass
elif 85 <= floor_area <= 120: # Medium to large properties
# More rooms or larger rooms likely, potentially more windows
window_count += 1
elif floor_area > 120: # Very large properties
# Likely to have significantly more or larger rooms
window_count += 2
# Adjust for construction age band
if construction_age_band in ["England and Wales: before 1900", "England and Wales: 1900-1929"]:
# Older houses with smaller, more numerous windows
window_count += 1
# Adjust for extensions (each extension might add windows)
window_count += extension_count
# Adjustments for specific property types
if property_type in ["Flat", "Maisontte"]:
# Flats might have fewer windows due to shared walls
# Maisonettes might follow a similar pattern to flats or small houses
window_count -= 1
# Ensure window count is not negative
if window_count < 0:
raise ValueError("Window count cannot be negative.")
return window_count
def calculate_cavity_age(newest_epc, older_epcs, cleaned):
all_epcs = [newest_epc] + older_epcs
df = []
for x in all_epcs:
# Get the cleaned mapping
mapped = [y for y in cleaned["walls-description"] if y["original_description"] == x["walls-description"]]
if not mapped:
continue
df.append(
{
**mapped[0],
"inspection-date": x["lodgement-date"],
}
)
df = pd.DataFrame(df)
df = df[df["is_cavity_wall"] & df["is_filled_cavity"]]
cavity_age = (datetime.now() - pd.to_datetime(df["inspection-date"].max())).days
return cavity_age

View file

@ -1,6 +1,7 @@
from recommendations.Costs import Costs from recommendations.Costs import Costs
from unittest.mock import Mock from unittest.mock import Mock
import datetime import datetime
import pytest
class TestCosts: class TestCosts:
@ -58,9 +59,9 @@ class TestCosts:
) )
assert loft_results == { assert loft_results == {
'total': 430.21445040000003, 'subtotal': 358.512042, 'vat': 71.70240840000001, 'total': 639.4133610000001, 'subtotal': 532.8444675000001, 'vat': 106.56889350000002,
'contingency': 25.608003000000004, 'preliminaries': 25.608003000000004, 'material': 198.29923000000002, 'contingency': 71.045929, 'preliminaries': 35.5229645, 'material': 297.448845, 'profit': 71.045929,
'profit': 51.21600600000001, 'labour_hours': 3.685, 'labour_cost': 57.7808, 'labour_days': 0.460625 'labour_hours': 3.685, 'labour_cost': 57.7808, 'labour_days': 0.460625
} }
def test_internal_wall_insulation(self): def test_internal_wall_insulation(self):
@ -176,11 +177,9 @@ class TestCosts:
) )
assert iwi_results == { assert iwi_results == {
'total': 6650.889456921851, 'subtotal': 5542.407880768209, 'vat': 1108.4815761536418, 'total': 6880.2304726777775, 'subtotal': 5733.525393898148, 'vat': 1146.7050787796295,
'contingency': 573.3525393898148, 'preliminaries': 382.2350262598765, 'contingency': 764.470052519753, 'preliminaries': 382.2350262598765, 'material': 1747.488000615996,
'material': 1747.488000615996, 'profit': 764.470052519753, 'labour_hours': 88.23759388401297, 'labour_days': 2.757424808875405,
'profit': 764.470052519753, 'labour_hours': 88.23759388401297,
'labour_days': 2.757424808875405,
'labour_cost': 1927.1602026551818 'labour_cost': 1927.1602026551818
} }
@ -414,8 +413,8 @@ class TestCosts:
) )
assert ewi_results == { assert ewi_results == {
'total': 14561.688989159393, 'subtotal': 12134.740824299493, 'vat': 2426.948164859899, 'total': 15047.078622131372, 'subtotal': 12539.232185109477, 'vat': 2507.8464370218953,
'contingency': 808.9827216199662, 'preliminaries': 1617.9654432399325, 'material': 4020.565147410677, 'contingency': 808.9827216199662, 'preliminaries': 2022.4568040499155, 'material': 4020.565147410677,
'profit': 1617.9654432399325, 'labour_hours': 187.02533486285358, 'labour_days': 5.8445417144641745, 'profit': 1617.9654432399325, 'labour_hours': 187.02533486285358, 'labour_days': 5.8445417144641745,
'labour_cost': 3921.5600094613983 'labour_cost': 3921.5600094613983
} }
@ -499,3 +498,48 @@ class TestCosts:
'labour_hours': 24.79, 'labour_days': 1.549375, 'labour_cost': 186.9032} 'labour_hours': 24.79, 'labour_days': 1.549375, 'labour_cost': 186.9032}
assert costs.labour_adjustment_factor == 0.88 assert costs.labour_adjustment_factor == 0.88
# Mock property instance for regional tests
@pytest.fixture(params=[
("Northamptonshire", "East Midlands", 7927.44),
("Greater London Authority", "Inner London", 10475.0),
("Adur", "South East England", 8333.32),
("Bournemouth", "South West England", 8452),
("Basildon", "East of England", 7895.44),
("Birmingham", "West Midlands", 7706.2),
("County Durham", "North East England", 8113.96),
("Allerdale", "North West England", 6481.68),
("York", "Yorkshire and the Humber", 8243.6),
("Cardiff", "Wales", 7595.32),
("Glasgow City", "Scotland", 7871.88),
("Belfast", "Northern Ireland", 8504.36)
])
def mock_property_with_region(self, request):
county, region, expected_cost = request.param
mock_property = Mock()
mock_property.data = {"county": county}
return mock_property, region, expected_cost
# Test for different wattages
@pytest.mark.parametrize("wattage, expected_cost", [
(3000, 5945.58),
(4000, 7927.44),
(5000, 9909.3),
(6000, 11891.16),
])
def test_solar_pv_different_wattages(self, wattage, expected_cost):
mock_property = Mock()
mock_property.data = {"county": "Mansfield"}
costs = Costs(mock_property)
result = costs.solar_pv(wattage)
assert result['total'] == pytest.approx(expected_cost, rel=0.01)
def test_solar_pv_regional_variation(self, mock_property_with_region):
# Test for regional cost variations
property_instance, expected_region, expected_cost = mock_property_with_region
costs = Costs(property_instance)
assert costs.region == expected_region
result = costs.solar_pv(4000) # Testing with a fixed wattage of 4000
assert result['total'] == pytest.approx(expected_cost, rel=0.01)

View file

@ -942,8 +942,24 @@ materials = [
'https://www.hamuch.com/cost/led-spot-light#:~:text=It%20costs%20an%20average%20of,' 'https://www.hamuch.com/cost/led-spot-light#:~:text=It%20costs%20an%20average%20of,'
'will%20drive%20up%20the%20cost.', 'will%20drive%20up%20the%20cost.',
'created_at': datetime.datetime(2023, 11, 28, 22, 49, 12, 244907), 'is_active': True, 'prime_material_cost': None, 'created_at': datetime.datetime(2023, 11, 28, 22, 49, 12, 244907), 'is_active': True, 'prime_material_cost': None,
'material_cost': 20.0, 'labour_cost': 46.0, 'labour_hours_per_unit': 0.8, 'plant_cost': 0.0, 'total_cost': 66.0, 'material_cost': 20.0, 'labour_cost': 15.0, 'labour_hours_per_unit': 0.8, 'plant_cost': 0.0, 'total_cost': 66.0,
'notes': 'We estimate the unit economics from the checkatrade article. We assume that the average job consists ' 'notes': 'We estimate the unit economics from the checkatrade article. We assume that the average job consists '
'of installing 6 lights based on the hamuch article. We use the median value of 400 for a job of 6 ' 'of installing 6 lights based on the hamuch article. We use the median value of 400 for a job of 6 '
'lights'} 'lights'},
{'id': 1235, 'type': 'windows_glazing',
'description': 'uPVC windows; Profile 22 or other equal and approved; reinforced where appropriate with '
'aluminium alloy; in refurbishment work, including standard ironmongery; sills and factory glazed '
'with low-e 24 mm double glazing; removing existing windows and fixing new in position; including '
'lugs plugged and screwed to brickwork or blockwork; Casement/fixed light; including vents; '
'e.p.d.m. glazing gaskets and weather seals; 1770 mm × 1200 mm; ref P312WW',
'depth': 0.0, 'depth_unit': None, 'cost': None, 'cost_unit': 'gbp_per_unit', 'r_value_per_mm': None,
'r_value_unit': 'square_meter_kelvin_per_watt', 'thermal_conductivity': None, 'thermal_conductivity_unit': None,
'link': 'SPONs',
'created_at': datetime.datetime(2023, 11, 28, 22, 49, 12, 244907),
'is_active': True, 'prime_material_cost': 176.55,
'material_cost': 182.25, 'labour_cost': 163.36, 'labour_hours_per_unit': 6.5, 'plant_cost': 0.0,
'total_cost': 345.61,
'notes': 'This is the cost of removal of existing windows and installation of new windows. This is a casement '
'style window, which is the most common but also the cheapest style. In the cost estimation framework, '
'we can inflate prices for different finishes, to be conservative on price.'}
] ]

View file

@ -6,7 +6,7 @@ from recommendations.FireplaceRecommendations import FireplaceRecommendations
class TestFirepaceRecommendations: class TestFirepaceRecommendations:
def test_no_fireplaces(self): def test_no_fireplaces(self):
property_instance = Property(id=0, address1="fake", postcode="fake", epc_client=Mock()) property_instance = Property(id=0, address="fake", postcode="fake")
property_instance.data = { property_instance.data = {
"number-open-fireplaces": 0 "number-open-fireplaces": 0
} }
@ -22,7 +22,7 @@ class TestFirepaceRecommendations:
assert recommender.recommendation is None assert recommender.recommendation is None
def test_one_fireplace(self): def test_one_fireplace(self):
property_instance = Property(id=0, address1="fake", postcode="fake", epc_client=Mock()) property_instance = Property(id=0, address="fake", postcode="fake")
property_instance.data = { property_instance.data = {
"number-open-fireplaces": 1 "number-open-fireplaces": 1
} }
@ -40,7 +40,7 @@ class TestFirepaceRecommendations:
assert recommender.recommendation[0]["total"] == 300 assert recommender.recommendation[0]["total"] == 300
def test_multiple_fireplaces(self): def test_multiple_fireplaces(self):
property_instance = Property(id=0, address1="fake", postcode="fake", epc_client=Mock()) property_instance = Property(id=0, address="fake", postcode="fake")
property_instance.data = { property_instance.data = {
"number-open-fireplaces": 3 "number-open-fireplaces": 3
} }

View file

@ -21,16 +21,6 @@ class TestFloorRecommendations:
) as f: ) as f:
return pickle.load(f) return pickle.load(f)
@pytest.fixture
def mock_floor_rec_instance(self):
# Creating a mock instance of WallRecommendations with the necessary attributes
property_mock = Mock()
property_mock.full_sap_epc = {"lodgement-date": "2000-01-01"}
property_mock.data = {"county": "York"}
mock_wall_rec_instance = FloorRecommendations(property_mock, materials)
return mock_wall_rec_instance
def test_init(self, input_properties): def test_init(self, input_properties):
input_properties[0].insulation_floor_area = 50 input_properties[0].insulation_floor_area = 50
input_properties[0].insulation_wall_area = 90 input_properties[0].insulation_wall_area = 90
@ -68,6 +58,7 @@ class TestFloorRecommendations:
input_properties[2].wall_type = "solid brick" input_properties[2].wall_type = "solid brick"
input_properties[2].floor_type = "suspended" input_properties[2].floor_type = "suspended"
input_properties[2].number_of_floors = 1 input_properties[2].number_of_floors = 1
input_properties[2].floor_level = 0
recommender = FloorRecommendations(property_instance=input_properties[2], materials=materials) recommender = FloorRecommendations(property_instance=input_properties[2], materials=materials)
assert recommender.estimated_u_value is None assert recommender.estimated_u_value is None
@ -93,6 +84,8 @@ class TestFloorRecommendations:
input_properties[3].insulation_floor_area = 100 input_properties[3].insulation_floor_area = 100
input_properties[3].insulation_wall_area = 100 input_properties[3].insulation_wall_area = 100
input_properties[3].number_of_floors = 1 input_properties[3].number_of_floors = 1
input_properties[3].floor_level = 0
recommender = FloorRecommendations(property_instance=input_properties[3], materials=materials) recommender = FloorRecommendations(property_instance=input_properties[3], materials=materials)
assert recommender.estimated_u_value is None assert recommender.estimated_u_value is None
recommender.recommend() recommender.recommend()
@ -114,6 +107,7 @@ class TestFloorRecommendations:
input_properties[4].wall_type = "solid brick" input_properties[4].wall_type = "solid brick"
input_properties[4].floor_type = "solid" input_properties[4].floor_type = "solid"
input_properties[4].number_of_floors = 1 input_properties[4].number_of_floors = 1
input_properties[4].floor_level = 0
# In this case, we have no county, so in this case, it should yse the local-authority-label if possible # In this case, we have no county, so in this case, it should yse the local-authority-label if possible
input_properties[4].data["county"] = "" input_properties[4].data["county"] = ""

View file

@ -9,7 +9,7 @@ from recommendations.tests.test_data.materials import materials
class TestLightingRecommendations: class TestLightingRecommendations:
def test_init_invalid_materials(self): def test_init_invalid_materials(self):
input_property0 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock()) input_property0 = Property(id=1, postcode="F4k3 6", address="623 fake street")
input_property0.lighting = {"low_energy_proportion": 0} input_property0.lighting = {"low_energy_proportion": 0}
input_property0.data = {"county": "Greater London Authority"} input_property0.data = {"county": "Greater London Authority"}
# Test for invalid materials # Test for invalid materials
@ -18,7 +18,7 @@ class TestLightingRecommendations:
def test_recommend_no_action_needed(self): def test_recommend_no_action_needed(self):
# Case where no recommendation is needed # Case where no recommendation is needed
input_property1 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock()) input_property1 = Property(id=1, postcode="F4k3 6", address="623 fake street")
input_property1.lighting = {"low_energy_proportion": 100} input_property1.lighting = {"low_energy_proportion": 100}
input_property1.data = {"county": "Greater London Authority"} input_property1.data = {"county": "Greater London Authority"}
@ -28,7 +28,7 @@ class TestLightingRecommendations:
def test_recommend_action_needed(self): def test_recommend_action_needed(self):
# Case where recommendation is needed # Case where recommendation is needed
input_property1 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock()) input_property1 = Property(id=1, postcode="F4k3 6", address="623 fake street")
input_property1.lighting = {"low_energy_proportion": 100} input_property1.lighting = {"low_energy_proportion": 100}
input_property1.data = {"county": "Greater London Authority"} input_property1.data = {"county": "Greater London Authority"}
input_property1.lighting = {"low_energy_proportion": 0.80} input_property1.lighting = {"low_energy_proportion": 0.80}
@ -40,8 +40,7 @@ class TestLightingRecommendations:
assert lr.recommendation == [ assert lr.recommendation == [
{'parts': [], 'type': 'low_energy_lighting', 'description': 'Install low energy lighting in 4 outlets', {'parts': [], 'type': 'low_energy_lighting', 'description': 'Install low energy lighting in 4 outlets',
'starting_u_value': None, 'new_u_value': None, 'sap_points': 0.4, 'total': 458.976, 'subtotal': 382.48, 'starting_u_value': None, 'new_u_value': None, 'sap_points': 0.4, 'total': 240.24,
'vat': 76.49600000000001, 'contingency': 27.320000000000007, 'preliminaries': 27.320000000000007, 'subtotal': 200.20000000000002, 'vat': 40.040000000000006, 'contingency': 14.3, 'preliminaries': 14.3,
'material': 80.0, 'profit': 54.640000000000015, 'labour_hours': 3.2, 'labour_days': 0.4, 'material': 80.0, 'profit': 28.6, 'labour_hours': 3.2, 'labour_days': 0.4, 'labour_cost': 63.0}
'labour_cost': 193.20000000000002}
] ]

View file

@ -427,3 +427,106 @@ def test_external_wall_area():
for num_floors, floor_height, perimeter, built_form, expected in test_cases: for num_floors, floor_height, perimeter, built_form, expected in test_cases:
result = recommendation_utils.estimate_external_wall_area(num_floors, floor_height, perimeter, built_form) result = recommendation_utils.estimate_external_wall_area(num_floors, floor_height, perimeter, built_form)
assert result == expected, f"Test failed for {built_form}: Expected {expected}, got {result}" assert result == expected, f"Test failed for {built_form}: Expected {expected}, got {result}"
def test_estimate_windows():
# Based on data from an EPR that has 4 windows
windows_case_1 = recommendation_utils.estimate_windows(
property_type="Flat",
built_form="Semi-Detached",
construction_age_band="England and Wales: 1976-1982",
floor_area=37,
number_habitable_rooms=2,
extension_count=0,
)
assert windows_case_1 == 4, f"Expected 4 windows, got {windows_case_1}"
# Based on data from an EPR that has 7 winows, however two of the windows were very small, having areas of
# 0.21m^2 and 0.3m^2 respectively. We see 6 as a reasonable estimate for the number of windows
windows_case_2 = recommendation_utils.estimate_windows(
property_type="House",
built_form="Mid-Terrace",
construction_age_band="England and Wales: 1950-1966",
floor_area=69,
number_habitable_rooms=4,
extension_count=0,
)
assert windows_case_2 == 6, f"Expected 6 windows, got {windows_case_2}"
# Based on data from an EPR on a bungalow, that has 6 windows. Two of the windows are small, both have a 0.4m^2 area
# and so 5 windows is an acceptable estimate
windows_case_3 = recommendation_utils.estimate_windows(
property_type="Bungalow",
built_form="Mid-Terrace",
construction_age_band="England and Wales: 1967-1975",
floor_area=56,
number_habitable_rooms=3,
extension_count=0,
)
assert windows_case_3 == 5, f"Expected 5 windows, got {windows_case_3}"
# Based on data from an EPR on a end terrace house that has 8 windows. One of the windows is very small, with an
# area of 0.25 m^2 and so 7 windows is an acceptable estimate
windows_case_4 = recommendation_utils.estimate_windows(
property_type="House",
built_form="End-Terrace",
construction_age_band="England and Wales: 1967-1975",
floor_area=77.28,
number_habitable_rooms=4,
extension_count=0,
)
assert windows_case_4 == 7, f"Expected 7 windows, got {windows_case_4}"
# Based on data from an EPR on a Semi-detatched house that has 11 windows based on the associated condition report
# Right now, we estimate 12 windows for this property
windows_case_5 = recommendation_utils.estimate_windows(
property_type="House",
built_form="Semi-Detached",
construction_age_band="England and Wales: 1950-1966",
floor_area=88.4,
number_habitable_rooms=5,
extension_count=0,
)
assert windows_case_5 == 12, f"Expected 12 windows, got {windows_case_5}"
# Based on Khalim's flat which has 3 windows. There is no construction age band on the EPC. The windows are large
# so an estimate of 5 windows is a reasonable estimate
windows_case_6 = recommendation_utils.estimate_windows(
property_type="Flat",
built_form="",
construction_age_band="",
floor_area=100,
number_habitable_rooms=3,
extension_count=0,
)
assert windows_case_6 == 5, f"Expected 5 windows, got {windows_case_6}"
# Based on an EPR semi detatched house though we don't have the exact number of windows. We estimate 10
windows_case_7 = recommendation_utils.estimate_windows(
property_type="House",
built_form="Semi-Detached",
construction_age_band="England and Wales: 1967-1975",
floor_area=85,
number_habitable_rooms=4,
extension_count=0,
)
assert windows_case_7 == 10, f"Expected 10 windows, got {windows_case_7}"
# Base on Khalim's parents flat
windows_case_8 = recommendation_utils.estimate_windows(
property_type="Flat",
built_form="End-Terrace",
construction_age_band="",
floor_area=50,
number_habitable_rooms=3,
extension_count=0,
)
assert windows_case_8 == 5, f"Expected 5 windows, got {windows_case_8}"

View file

@ -1,5 +1,4 @@
from backend.Property import Property from backend.Property import Property
from unittest.mock import Mock
from recommendations.RoofRecommendations import RoofRecommendations from recommendations.RoofRecommendations import RoofRecommendations
from recommendations.tests.test_data.materials import materials from recommendations.tests.test_data.materials import materials
@ -7,7 +6,7 @@ from recommendations.tests.test_data.materials import materials
class TestRoofRecommendations: class TestRoofRecommendations:
def test_loft_insulation_recommendation_no_insulation(self): def test_loft_insulation_recommendation_no_insulation(self):
property_instance = Property(id=0, address1="fake", postcode="fake", epc_client=Mock()) property_instance = Property(id=0, address="fake", postcode="fake")
property_instance.age_band = "F" property_instance.age_band = "F"
property_instance.insulation_floor_area = 100 property_instance.insulation_floor_area = 100
property_instance.roof = { property_instance.roof = {
@ -32,7 +31,7 @@ class TestRoofRecommendations:
assert len(roof_recommender.recommendations) assert len(roof_recommender.recommendations)
def test_loft_insulation_recommendation_50mm_insulation(self): def test_loft_insulation_recommendation_50mm_insulation(self):
property_instance2 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock()) property_instance2 = Property(id=0, address="fake", postcode="fake")
property_instance2.age_band = "F" property_instance2.age_band = "F"
property_instance2.insulation_floor_area = 100 property_instance2.insulation_floor_area = 100
property_instance2.roof = { property_instance2.roof = {
@ -54,11 +53,11 @@ class TestRoofRecommendations:
assert len(roof_recommender2.recommendations) == 1 assert len(roof_recommender2.recommendations) == 1
assert roof_recommender2.recommendations[0]["total"] == 1310.56464 assert roof_recommender2.recommendations[0]["total"] == 1936.9206000000004
assert roof_recommender2.recommendations[0]["new_u_value"] == 0.14 assert roof_recommender2.recommendations[0]["new_u_value"] == 0.14
assert roof_recommender2.recommendations[0]["starting_u_value"] == 0.68 assert roof_recommender2.recommendations[0]["starting_u_value"] == 0.68
property_instance3 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock()) property_instance3 = Property(id=0, address="fake", postcode="fake")
property_instance3.age_band = "F" property_instance3.age_band = "F"
property_instance3.insulation_floor_area = 100 property_instance3.insulation_floor_area = 100
property_instance3.roof = { property_instance3.roof = {
@ -83,7 +82,7 @@ class TestRoofRecommendations:
assert roof_recommender3.recommendations[0]["parts"][0]["depth"] == 270 assert roof_recommender3.recommendations[0]["parts"][0]["depth"] == 270
def test_loft_insulation_recommendation_150mm_insulation(self): def test_loft_insulation_recommendation_150mm_insulation(self):
property_instance4 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock()) property_instance4 = Property(id=0, address="fake", postcode="fake")
property_instance4.age_band = "F" property_instance4.age_band = "F"
property_instance4.insulation_floor_area = 100 property_instance4.insulation_floor_area = 100
property_instance4.roof = { property_instance4.roof = {
@ -105,12 +104,12 @@ class TestRoofRecommendations:
assert len(roof_recommender4.recommendations) == 4 assert len(roof_recommender4.recommendations) == 4
assert roof_recommender4.recommendations[0]["total"] == 788.0544 assert roof_recommender4.recommendations[0]["total"] == 1128.744
assert roof_recommender4.recommendations[0]["new_u_value"] == 0.15 assert roof_recommender4.recommendations[0]["new_u_value"] == 0.15
assert roof_recommender4.recommendations[0]["starting_u_value"] == 0.3 assert roof_recommender4.recommendations[0]["starting_u_value"] == 0.3
assert roof_recommender4.recommendations[0]["parts"][0]["depth"] == 150 assert roof_recommender4.recommendations[0]["parts"][0]["depth"] == 150
property_instance5 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock()) property_instance5 = Property(id=0, address="fake", postcode="fake")
property_instance5.age_band = "F" property_instance5.age_band = "F"
property_instance5.insulation_floor_area = 100 property_instance5.insulation_floor_area = 100
property_instance5.roof = { property_instance5.roof = {
@ -137,7 +136,7 @@ class TestRoofRecommendations:
def test_loft_insulation_recommendation_270mm_insulation(self): def test_loft_insulation_recommendation_270mm_insulation(self):
# We shouldn't recommend anything in this case # We shouldn't recommend anything in this case
property_instance6 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock()) property_instance6 = Property(id=0, address="fake", postcode="fake")
property_instance6.age_band = "F" property_instance6.age_band = "F"
property_instance6.insulation_floor_area = 100 property_instance6.insulation_floor_area = 100
property_instance6.roof = { property_instance6.roof = {
@ -278,7 +277,7 @@ class TestRoofRecommendations:
# "Insulate your room roof with 270mm of Example room roof insulation" # "Insulate your room roof with 270mm of Example room roof insulation"
def test_flat_no_insulation(self): def test_flat_no_insulation(self):
property_instance11 = Property(id=11, address1="fake", postcode="fake", epc_client=Mock()) property_instance11 = Property(id=11, address="fake", postcode="fake")
property_instance11.age_band = "D" property_instance11.age_band = "D"
property_instance11.insulation_floor_area = 33.5 property_instance11.insulation_floor_area = 33.5
property_instance11.perimeter = 24 property_instance11.perimeter = 24
@ -307,7 +306,7 @@ class TestRoofRecommendations:
"Insulate the home's flat roof with 150mm of Ecotherm Eco-Versal General Purpose Insulation Board" "Insulate the home's flat roof with 150mm of Ecotherm Eco-Versal General Purpose Insulation Board"
def test_flat_insulated(self): def test_flat_insulated(self):
property_instance12 = Property(id=12, address1="fake", postcode="fake", epc_client=Mock()) property_instance12 = Property(id=12, address="fake", postcode="fake")
property_instance12.age_band = "D" property_instance12.age_band = "D"
property_instance12.insulation_floor_area = 40 property_instance12.insulation_floor_area = 40
property_instance12.perimeter = 30 property_instance12.perimeter = 30
@ -331,7 +330,7 @@ class TestRoofRecommendations:
assert not roof_recommender12.recommendations assert not roof_recommender12.recommendations
def test_flat_limited_insulation(self): def test_flat_limited_insulation(self):
property_instance13 = Property(id=12, address1="fake", postcode="fake", epc_client=Mock()) property_instance13 = Property(id=12, address="fake", postcode="fake")
property_instance13.age_band = "D" property_instance13.age_band = "D"
property_instance13.insulation_floor_area = 40 property_instance13.insulation_floor_area = 40
property_instance13.perimeter = 40 property_instance13.perimeter = 40
@ -363,7 +362,7 @@ class TestRoofRecommendations:
"Insulate the home's flat roof with 150mm of Ecotherm Eco-Versal General Purpose Insulation Board" "Insulate the home's flat roof with 150mm of Ecotherm Eco-Versal General Purpose Insulation Board"
def test_property_above(self): def test_property_above(self):
property_instance14 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock()) property_instance14 = Property(id=0, address="fake", postcode="fake")
property_instance14.age_band = "F" property_instance14.age_band = "F"
property_instance14.insulation_floor_area = 100 property_instance14.insulation_floor_area = 100
property_instance14.roof = { property_instance14.roof = {

View file

@ -0,0 +1,79 @@
import pytest
from recommendations.SolarPvRecommendations import SolarPvRecommendations
from backend.Property import Property
class TestSolarPvRecommendations:
@pytest.fixture
def property_instance_invalid_type(self):
# Setup the property_instance with an invalid property type
property_instance_invalid_type = Property(id=1, address="", postcode="")
property_instance_invalid_type.data = {
"property-type": "InvalidType", "county": "Broxbourne", "photo-supply": None
}
property_instance_invalid_type.roof = {"is_flat": False, "is_pitched": False, "is_roof_room": False}
return property_instance_invalid_type
@pytest.fixture
def property_instance_invalid_roof(self):
# Setup the property_instance with invalid roof type
property_instance_invalid_roof = Property(id=1, address="", postcode="")
property_instance_invalid_roof.data = {
"county": "Huntingdonshire", "property-type": "House", "photo-supply": None
}
property_instance_invalid_roof.roof = {"is_flat": False, "is_pitched": False, "is_roof_room": False}
return property_instance_invalid_roof
@pytest.fixture
def property_instance_has_solar_pv(self):
# Setup the property_instance without existing solar pv
property_instance_has_solar_pv = Property(id=1, address="", postcode="")
property_instance_has_solar_pv.data = {"photo-supply": "40", "county": "Huntingdonshire",
"property-type": "House"}
property_instance_has_solar_pv.roof = {"is_flat": True}
return property_instance_has_solar_pv
@pytest.fixture
def property_instance_valid_all(self):
# Setup a valid property_instance that passes all conditions
property_instance_valid_all = Property(id=1, address="", postcode="")
property_instance_valid_all.solar_pv_roof_area = 20
property_instance_valid_all.solar_pv_percentage = 40
property_instance_valid_all.data = {"property-type": "House", "photo-supply": None, "county": "Huntingdonshire"}
property_instance_valid_all.roof = {"is_flat": True}
return property_instance_valid_all
def test_invalid_property_type(self, property_instance_invalid_type):
solar_pv = SolarPvRecommendations(property_instance_invalid_type)
solar_pv.recommend()
assert not solar_pv.recommendation
def test_invalid_roof_type(self, property_instance_invalid_roof):
solar_pv = SolarPvRecommendations(property_instance_invalid_roof)
solar_pv.recommend()
assert not solar_pv.recommendation
def test_existing_solar_pv(self, property_instance_has_solar_pv):
solar_pv = SolarPvRecommendations(property_instance_has_solar_pv)
solar_pv.recommend()
assert not solar_pv.recommendation
def test_valid_all_conditions(self, property_instance_valid_all):
solar_pv = SolarPvRecommendations(property_instance_valid_all)
solar_pv.recommend()
assert solar_pv.recommendation == [
{
'parts': [],
'type': 'solar_pv',
'description': 'Install a 4 kilowatt-peak (kWp) solar photovoltaic (PV) panel system on the roof',
'starting_u_value': None,
'new_u_value': None,
'sap_points': None,
'total': 8527.0752,
'subtotal': 7105.896,
'vat': 1421.1791999999996,
'labour_hours': 72,
'labour_days': 2,
'photo_supply': 4000
}
]

View file

@ -1,5 +1,4 @@
from backend.Property import Property from backend.Property import Property
from unittest.mock import Mock
from recommendations.VentilationRecommendations import VentilationRecommendations from recommendations.VentilationRecommendations import VentilationRecommendations
from recommendations.tests.test_data.materials import materials from recommendations.tests.test_data.materials import materials
@ -7,7 +6,7 @@ from recommendations.tests.test_data.materials import materials
class TestVentilationRecommendations: class TestVentilationRecommendations:
def test_natural_ventilation(self): def test_natural_ventilation(self):
input_property1 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock()) input_property1 = Property(id=1, postcode="F4k3 6", address="623 fake street")
input_property1.data = {"mechanical-ventilation": "natural"} input_property1.data = {"mechanical-ventilation": "natural"}
recommender = VentilationRecommendations( recommender = VentilationRecommendations(
@ -28,7 +27,7 @@ class TestVentilationRecommendations:
assert recommender.recommendation[0]["parts"][0]["quantity"] == 2 assert recommender.recommendation[0]["parts"][0]["quantity"] == 2
def test_missing_ventilation(self): def test_missing_ventilation(self):
input_property2 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock()) input_property2 = Property(id=1, postcode="F4k3 6", address="623 fake street")
input_property2.data = {"mechanical-ventilation": None} input_property2.data = {"mechanical-ventilation": None}
recommender2 = VentilationRecommendations( recommender2 = VentilationRecommendations(
@ -49,7 +48,7 @@ class TestVentilationRecommendations:
assert recommender2.recommendation[0]["parts"][0]["quantity"] == 2 assert recommender2.recommendation[0]["parts"][0]["quantity"] == 2
def test_nodata_ventilation(self): def test_nodata_ventilation(self):
input_property3 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock()) input_property3 = Property(id=1, postcode="F4k3 6", address="623 fake street")
input_property3.data = {"mechanical-ventilation": "NO DATA!!"} input_property3.data = {"mechanical-ventilation": "NO DATA!!"}
recommender3 = VentilationRecommendations( recommender3 = VentilationRecommendations(
@ -70,7 +69,7 @@ class TestVentilationRecommendations:
assert recommender3.recommendation[0]["parts"][0]["quantity"] == 2 assert recommender3.recommendation[0]["parts"][0]["quantity"] == 2
def test_existing_ventilation_1(self): def test_existing_ventilation_1(self):
input_property4 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock()) input_property4 = Property(id=1, postcode="F4k3 6", address="623 fake street")
input_property4.data = {"mechanical-ventilation": 'mechanical, extract only'} input_property4.data = {"mechanical-ventilation": 'mechanical, extract only'}
recommender4 = VentilationRecommendations( recommender4 = VentilationRecommendations(
@ -86,7 +85,7 @@ class TestVentilationRecommendations:
assert recommender4.has_ventilaion assert recommender4.has_ventilaion
def test_existing_ventilation_2(self): def test_existing_ventilation_2(self):
input_property5 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock()) input_property5 = Property(id=1, postcode="F4k3 6", address="623 fake street")
input_property5.data = {"mechanical-ventilation": 'mechanical, supply and extract'} input_property5.data = {"mechanical-ventilation": 'mechanical, supply and extract'}
recommender5 = VentilationRecommendations( recommender5 = VentilationRecommendations(

View file

@ -231,7 +231,7 @@ class TestWallRecommendationsBase:
class TestCavityWallRecommensations: class TestCavityWallRecommensations:
def test_fill_empty_cavity(self): def test_fill_empty_cavity(self):
input_property = Property(id=1, postcode="F4k3", address1="123 fake street", epc_client=Mock()) input_property = Property(id=1, postcode="F4k3", address="123 fake street")
input_property.walls = { input_property.walls = {
'original_description': 'Cavity wall, as built, no insulation (assumed)', 'original_description': 'Cavity wall, as built, no insulation (assumed)',
'clean_description': 'Cavity wall, as built, no insulation', 'clean_description': 'Cavity wall, as built, no insulation',
@ -265,7 +265,7 @@ class TestCavityWallRecommensations:
assert np.isclose(recommender.recommendations[1]["total"], 2004.6600000000003) assert np.isclose(recommender.recommendations[1]["total"], 2004.6600000000003)
def test_fill_partial_filled_cavity(self): def test_fill_partial_filled_cavity(self):
input_property = Property(id=1, postcode="F4k3", address1="123 fake street", epc_client=Mock()) input_property = Property(id=1, postcode="F4k3", address="123 fake street")
input_property.walls = { input_property.walls = {
'original_description': 'Cavity wall, as built, partial insulation (assumed)', 'original_description': 'Cavity wall, as built, partial insulation (assumed)',
'clean_description': 'Cavity wall, as built, partial insulation', 'clean_description': 'Cavity wall, as built, partial insulation',
@ -299,7 +299,7 @@ class TestCavityWallRecommensations:
assert np.isclose(recommender.recommendations[1]["total"], 1999.9350000000002) assert np.isclose(recommender.recommendations[1]["total"], 1999.9350000000002)
def test_system_built_wall(self): def test_system_built_wall(self):
input_property2 = Property(id=1, postcode="F4k3 2", address1="223 fake street", epc_client=Mock()) input_property2 = Property(id=1, postcode="F4k3 2", address="223 fake street")
input_property2.walls = { input_property2.walls = {
'original_description': 'System built, as built, no insulation (assumed)', 'original_description': 'System built, as built, no insulation (assumed)',
'clean_description': 'System built, as built, no insulation', 'clean_description': 'System built, as built, no insulation',
@ -331,22 +331,22 @@ class TestCavityWallRecommensations:
assert len(recommender2.recommendations) == 9 assert len(recommender2.recommendations) == 9
assert recommender2.estimated_u_value == 1 assert recommender2.estimated_u_value == 1
assert np.isclose(recommender2.recommendations[0]["new_u_value"], 0.19) assert np.isclose(recommender2.recommendations[0]["new_u_value"], 0.19)
assert np.isclose(recommender2.recommendations[0]["total"], 15899.9616) assert np.isclose(recommender2.recommendations[0]["total"], 16429.960320000002)
assert recommender2.recommendations[0]["parts"][0]["type"] == "external_wall_insulation" assert recommender2.recommendations[0]["parts"][0]["type"] == "external_wall_insulation"
assert recommender2.recommendations[0]["parts"][0]["depth"] == 100 assert recommender2.recommendations[0]["parts"][0]["depth"] == 100
assert np.isclose(recommender2.recommendations[8]["new_u_value"], 0.23) assert np.isclose(recommender2.recommendations[8]["new_u_value"], 0.23)
assert np.isclose(recommender2.recommendations[8]["total"], 10916.3424) assert np.isclose(recommender2.recommendations[8]["total"], 11292.768)
assert recommender2.recommendations[8]["parts"][0]["type"] == "internal_wall_insulation" assert recommender2.recommendations[8]["parts"][0]["type"] == "internal_wall_insulation"
assert recommender2.recommendations[8]["parts"][0]["depth"] == 72.5 assert recommender2.recommendations[8]["parts"][0]["depth"] == 72.5
assert np.isclose(recommender2.recommendations[6]["new_u_value"], 0.29) assert np.isclose(recommender2.recommendations[6]["new_u_value"], 0.29)
assert np.isclose(recommender2.recommendations[6]["total"], 10621.934399999998) assert np.isclose(recommender2.recommendations[6]["total"], 10988.208)
assert recommender2.recommendations[6]["parts"][0]["type"] == "internal_wall_insulation" assert recommender2.recommendations[6]["parts"][0]["type"] == "internal_wall_insulation"
assert recommender2.recommendations[6]["parts"][0]["depth"] == 52.5 assert recommender2.recommendations[6]["parts"][0]["depth"] == 52.5
def test_timber_frame_wall(self): def test_timber_frame_wall(self):
input_property3 = Property(id=1, postcode="F4k3 2", address1="223 fake street", epc_client=Mock()) input_property3 = Property(id=1, postcode="F4k3 2", address="223 fake street")
input_property3.walls = { input_property3.walls = {
'original_description': 'Timber frame, as built, no insulation (assumed)', 'original_description': 'Timber frame, as built, no insulation (assumed)',
'clean_description': 'Timber frame, as built, no insulation', 'clean_description': 'Timber frame, as built, no insulation',
@ -378,17 +378,17 @@ class TestCavityWallRecommensations:
assert len(recommender3.recommendations) == 6 assert len(recommender3.recommendations) == 6
assert recommender3.estimated_u_value == 1.9 assert recommender3.estimated_u_value == 1.9
assert np.isclose(recommender3.recommendations[0]["new_u_value"], 0.2) assert np.isclose(recommender3.recommendations[0]["new_u_value"], 0.2)
assert np.isclose(recommender3.recommendations[0]["total"], 13117.46832) assert np.isclose(recommender3.recommendations[0]["total"], 13554.717263999999)
assert recommender3.recommendations[0]["parts"][0]["type"] == "external_wall_insulation" assert recommender3.recommendations[0]["parts"][0]["type"] == "external_wall_insulation"
assert recommender3.recommendations[0]["parts"][0]["depth"] == 100.0 assert recommender3.recommendations[0]["parts"][0]["depth"] == 100.0
assert np.isclose(recommender3.recommendations[1]["new_u_value"], 0.23) assert np.isclose(recommender3.recommendations[1]["new_u_value"], 0.23)
assert np.isclose(recommender3.recommendations[1]["total"], 34070.50944) assert np.isclose(recommender3.recommendations[1]["total"], 35206.19308800001)
assert recommender3.recommendations[1]["parts"][0]["type"] == "external_wall_insulation" assert recommender3.recommendations[1]["parts"][0]["type"] == "external_wall_insulation"
assert recommender3.recommendations[1]["parts"][0]["depth"] == 150.0 assert recommender3.recommendations[1]["parts"][0]["depth"] == 150.0
def test_granite_or_whinstone_wall(self): def test_granite_or_whinstone_wall(self):
input_property4 = Property(id=1, postcode="F4k3 2", address1="223 fake street", epc_client=Mock()) input_property4 = Property(id=1, postcode="F4k3 2", address="223 fake street")
input_property4.walls = { input_property4.walls = {
'original_description': 'Granite or whinstone, as built, no insulation (assumed)', 'original_description': 'Granite or whinstone, as built, no insulation (assumed)',
'clean_description': 'Granite or whinstone, as built, no insulation', 'clean_description': 'Granite or whinstone, as built, no insulation',
@ -420,17 +420,17 @@ class TestCavityWallRecommensations:
assert len(recommender4.recommendations) == 6 assert len(recommender4.recommendations) == 6
assert recommender4.estimated_u_value == 2.3 assert recommender4.estimated_u_value == 2.3
assert np.isclose(recommender4.recommendations[0]["new_u_value"], 0.21) assert np.isclose(recommender4.recommendations[0]["new_u_value"], 0.21)
assert np.isclose(recommender4.recommendations[0]["total"], 28562.514352) assert np.isclose(recommender4.recommendations[0]["total"], 29547.42864)
assert recommender4.recommendations[0]["parts"][0]["type"] == "external_wall_insulation" assert recommender4.recommendations[0]["parts"][0]["type"] == "external_wall_insulation"
assert recommender4.recommendations[0]["parts"][0]["depth"] == 100 assert recommender4.recommendations[0]["parts"][0]["depth"] == 100
assert np.isclose(recommender4.recommendations[1]["new_u_value"], 0.23) assert np.isclose(recommender4.recommendations[1]["new_u_value"], 0.23)
assert np.isclose(recommender4.recommendations[1]["total"], 74186.52678400002) assert np.isclose(recommender4.recommendations[1]["total"], 76744.68288000001)
assert recommender4.recommendations[1]["parts"][0]["type"] == "external_wall_insulation" assert recommender4.recommendations[1]["parts"][0]["type"] == "external_wall_insulation"
assert recommender4.recommendations[1]["parts"][0]["depth"] == 150 assert recommender4.recommendations[1]["parts"][0]["depth"] == 150
def test_cob_wall(self): def test_cob_wall(self):
input_property5 = Property(id=1, postcode="F4k3 2", address1="223 fake street", epc_client=Mock()) input_property5 = Property(id=1, postcode="F4k3 2", address="223 fake street")
input_property5.walls = { input_property5.walls = {
'original_description': 'Cob, as built', 'original_description': 'Cob, as built',
'clean_description': 'Cob, as built', 'clean_description': 'Cob, as built',
@ -462,17 +462,17 @@ class TestCavityWallRecommensations:
assert len(recommender5.recommendations) == 5 assert len(recommender5.recommendations) == 5
assert recommender5.estimated_u_value == 0.8 assert recommender5.estimated_u_value == 0.8
assert np.isclose(recommender5.recommendations[0]["new_u_value"], 0.29) assert np.isclose(recommender5.recommendations[0]["new_u_value"], 0.29)
assert np.isclose(recommender5.recommendations[0]["total"], 8665.040384000002) assert np.isclose(recommender5.recommendations[0]["total"], 8963.834880000002)
assert recommender5.recommendations[0]["parts"][0]["type"] == "external_wall_insulation" assert recommender5.recommendations[0]["parts"][0]["type"] == "external_wall_insulation"
assert recommender5.recommendations[0]["parts"][0]["depth"] == 50 assert recommender5.recommendations[0]["parts"][0]["depth"] == 50
assert np.isclose(recommender5.recommendations[3]["new_u_value"], 0.26) assert np.isclose(recommender5.recommendations[3]["new_u_value"], 0.26)
assert np.isclose(recommender5.recommendations[3]["total"], 20078.742992) assert np.isclose(recommender5.recommendations[3]["total"], 20771.11344)
assert recommender5.recommendations[3]["parts"][0]["type"] == "internal_wall_insulation" assert recommender5.recommendations[3]["parts"][0]["type"] == "internal_wall_insulation"
assert recommender5.recommendations[3]["parts"][0]["depth"] == 100 assert recommender5.recommendations[3]["parts"][0]["depth"] == 100
def test_sandstone_or_limestone_wall(self): def test_sandstone_or_limestone_wall(self):
input_property6 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock()) input_property6 = Property(id=1, postcode="F4k3 6", address="623 fake street")
input_property6.walls = { input_property6.walls = {
'original_description': 'Sandstone or limestone, as built, no insulation (assumed)', 'original_description': 'Sandstone or limestone, as built, no insulation (assumed)',
'clean_description': 'Sandstone or limestone, as built, no insulation', 'clean_description': 'Sandstone or limestone, as built, no insulation',
@ -504,16 +504,16 @@ class TestCavityWallRecommensations:
assert len(recommender6.recommendations) == 9 assert len(recommender6.recommendations) == 9
assert recommender6.estimated_u_value == 1 assert recommender6.estimated_u_value == 1
assert np.isclose(recommender6.recommendations[0]["new_u_value"], 0.19) assert np.isclose(recommender6.recommendations[0]["new_u_value"], 0.19)
assert np.isclose(recommender6.recommendations[0]["total"], 44829.0584) assert np.isclose(recommender6.recommendations[0]["total"], 46374.888000000006)
assert recommender6.recommendations[0]["parts"][0]["type"] == "external_wall_insulation" assert recommender6.recommendations[0]["parts"][0]["type"] == "external_wall_insulation"
assert recommender6.recommendations[0]["parts"][0]["depth"] == 100 assert recommender6.recommendations[0]["parts"][0]["depth"] == 100
assert np.isclose(recommender6.recommendations[2]["new_u_value"], 0.21) assert np.isclose(recommender6.recommendations[2]["new_u_value"], 0.21)
assert np.isclose(recommender6.recommendations[2]["total"], 116436.25280000002) assert np.isclose(recommender6.recommendations[2]["total"], 120451.29600000002)
assert recommender6.recommendations[2]["parts"][0]["type"] == "external_wall_insulation" assert recommender6.recommendations[2]["parts"][0]["type"] == "external_wall_insulation"
assert recommender6.recommendations[2]["parts"][0]["depth"] == 150 assert recommender6.recommendations[2]["parts"][0]["depth"] == 150
assert np.isclose(recommender6.recommendations[4]["new_u_value"], 0.28) assert np.isclose(recommender6.recommendations[4]["new_u_value"], 0.28)
assert np.isclose(recommender6.recommendations[4]["total"], 91267.0136) assert np.isclose(recommender6.recommendations[4]["total"], 94414.15199999999)
assert recommender6.recommendations[4]["parts"][0]["type"] == "internal_wall_insulation" assert recommender6.recommendations[4]["parts"][0]["type"] == "internal_wall_insulation"
assert recommender6.recommendations[4]["parts"][0]["depth"] == 100 assert recommender6.recommendations[4]["parts"][0]["depth"] == 100

View file

@ -0,0 +1,252 @@
from recommendations.WindowsRecommendations import WindowsRecommendations
from backend.Property import Property
from recommendations.tests.test_data.materials import materials
class TestWindowRecommendations:
def test_fully_single_glazed(self):
"""
For this property, we expect all windows to be single glazed and should recommend full double glazing
:return:
"""
property_1 = Property(
id=1,
postcode='1',
address='1',
data={
"county": "Wychavon",
"multi-glaze-proportion": 0,
"uprn": 0
}
)
property_1.windows = {
'original_description': 'Single glazed', 'has_glazing': False, 'glazing_coverage': 'full',
'glazing_type': 'single',
'no_data': False
}
property_1.number_of_windows = 7
recommender = WindowsRecommendations(property_instance=property_1, materials=materials)
assert not recommender.recommendation
recommender.recommend()
assert recommender.recommendation == [
{'parts': [], 'type': 'windows_glazing', 'description': 'Install double glazing to all windows',
'starting_u_value': None, 'new_u_value': None, 'sap_points': None, 'total': 5721.943248,
'subtotal': 4768.28604, 'vat': 953.6572080000001, 'contingency': 340.59186, 'preliminaries': 340.59186,
'material': 1275.75, 'profit': 681.18372, 'labour_hours': 45.5, 'labour_cost': 994.8624,
'labour_days': 2.84375, 'is_secondary_glazing': False}]
def test_partial_double_glazed(self):
"""
For this property, the double glazing is describes as partial, therefore we recommend completion of
double glazing
:return:
"""
property_2 = Property(
id=1,
postcode='1',
address='1',
data={
"county": "Wychavon",
"multi-glaze-proportion": 33,
"uprn": 0
}
)
property_2.windows = {'original_description': 'Mostly double glazing', 'has_glazing': True,
'glazing_coverage': 'most',
'glazing_type': 'double', 'no_data': False}
property_2.number_of_windows = 7
recommender2 = WindowsRecommendations(property_instance=property_2, materials=materials)
assert not recommender2.recommendation
recommender2.recommend()
assert recommender2.recommendation == [
{'parts': [], 'type': 'windows_glazing', 'description': 'Install double glazing to the remaining windows',
'starting_u_value': None, 'new_u_value': None, 'sap_points': None, 'total': 4087.10232,
'subtotal': 3405.9186, 'vat': 681.18372, 'contingency': 243.2799, 'preliminaries': 243.2799,
'material': 911.25, 'profit': 486.5598, 'labour_hours': 32.5, 'labour_cost': 710.6160000000001,
'labour_days': 2.03125, 'is_secondary_glazing': False}]
def test_fully_double_glazed(self):
"""
This property has full double glazing so we shouldn't recommend anything
:return:
"""
property_3 = Property(
id=1,
postcode='1',
address='1',
data={
"county": "Wychavon",
"multi-glaze-proportion": 80,
"uprn": 0
}
)
property_3.windows = {'original_description': 'Fully double glazed', 'has_glazing': True,
'glazing_coverage': 'full',
'glazing_type': 'double', 'no_data': False}
property_3.number_of_windows = 7
recommender3 = WindowsRecommendations(property_instance=property_3, materials=materials)
assert not recommender3.recommendation
recommender3.recommend()
assert not recommender3.recommendation
def test_fully_secondary_glazed(self):
property_4 = Property(
id=1,
postcode='1',
address='1',
data={
"county": "Wychavon",
"multi-glaze-proportion": 100,
"uprn": 0
}
)
property_4.windows = {'original_description': 'Full secondary glazing', 'has_glazing': True,
'glazing_coverage': 'full',
'glazing_type': 'secondary', 'no_data': False}
property_4.number_of_windows = 7
recommender4 = WindowsRecommendations(property_instance=property_4, materials=materials)
assert not recommender4.recommendation
recommender4.recommend()
assert not recommender4.recommendation
def test_partial_secondary_glazing(self):
property_5 = Property(
id=1,
postcode='1',
address='1',
data={
"county": "Wychavon",
"multi-glaze-proportion": 50,
"uprn": 0
}
)
property_5.windows = {'original_description': 'Partial secondary glazing', 'has_glazing': True,
'glazing_coverage': 'partial',
'glazing_type': 'secondary', 'no_data': False}
property_5.number_of_windows = 7
recommender5 = WindowsRecommendations(property_instance=property_5, materials=materials)
assert not recommender5.recommendation
recommender5.recommend()
assert recommender5.recommendation == [
{'parts': [], 'type': 'windows_glazing',
'description': 'Install secondary glazing to the remaining windows',
'starting_u_value': None, 'new_u_value': None, 'sap_points': None, 'total': 1089.893952,
'subtotal': 908.24496, 'vat': 181.64899200000002, 'contingency': 64.87464, 'preliminaries': 64.87464,
'material': 729.0, 'profit': 129.74928, 'labour_hours': 13.0, 'labour_cost': 568.4928,
'labour_days': 0.8125, 'is_secondary_glazing': True}]
def test_single_glazed_restricted_measures(self):
property_6 = Property(
id=1,
postcode='1',
address='1',
data={
"county": "Wychavon",
"multi-glaze-proportion": 0,
"uprn": 0
}
)
property_6.windows = {'original_description': 'Single glazed', 'has_glazing': False, 'glazing_coverage': None,
'glazing_type': 'single',
'no_data': False}
property_6.number_of_windows = 7
property_6.restricted_measures = True
property_6.is_heritage = True
recommender6 = WindowsRecommendations(property_instance=property_6, materials=materials)
assert not recommender6.recommendation
recommender6.recommend()
assert recommender6.recommendation == [
{'parts': [], 'type': 'windows_glazing',
'description': 'Install secondary glazing to all windows. Secondary '
'glazing recommended due to herigate building status',
'starting_u_value': None, 'new_u_value': None, 'sap_points': None,
'total': 1907.314416, 'subtotal': 1589.42868, 'vat': 317.885736,
'contingency': 113.53062, 'preliminaries': 113.53062,
'material': 1275.75, 'profit': 227.06124, 'labour_hours': 22.75,
'labour_cost': 994.8624, 'labour_days': 1.421875, 'is_secondary_glazing': True}
]
def test_full_triple_glazed(self):
property_7 = Property(
id=1,
postcode='1',
address='1',
data={
"county": "Wychavon",
"multi-glaze-proportion": 100,
"uprn": 0
}
)
property_7.windows = {'original_description': 'Fully triple glazed', 'has_glazing': True,
'glazing_coverage': 'full',
'glazing_type': 'triple', 'no_data': False}
property_7.number_of_windows = 7
recommender7 = WindowsRecommendations(property_instance=property_7, materials=materials)
assert not recommender7.recommendation
recommender7.recommend()
assert not recommender7.recommendation
def test_partial_triple_glazed(self):
"""
We should just recommend double glazing to the remaining windows, since it's a cheaper option
"""
property_8 = Property(
id=1,
postcode='1',
address='1',
data={
"county": "Wychavon",
"multi-glaze-proportion": 80,
"uprn": 1
}
)
property_8.windows = {'original_description': 'Mostly triple glazing', 'has_glazing': True,
'glazing_coverage': 'most',
'glazing_type': 'triple', 'no_data': False}
property_8.number_of_windows = 7
recommender8 = WindowsRecommendations(property_instance=property_8, materials=materials)
assert not recommender8.recommendation
recommender8.recommend()
assert recommender8.recommendation == [
{'parts': [], 'type': 'windows_glazing', 'description': 'Install double glazing to the remaining windows',
'starting_u_value': None, 'new_u_value': None, 'sap_points': None, 'total': 1634.840928,
'subtotal': 1362.36744, 'vat': 272.47348800000003, 'contingency': 97.31196, 'preliminaries': 97.31196,
'material': 364.5, 'profit': 194.62392, 'labour_hours': 13.0, 'labour_cost': 284.2464,
'labour_days': 0.8125, 'is_secondary_glazing': False}]