Merge pull request #291 from Hestia-Homes/main

Huge dev deployment
This commit is contained in:
KhalimCK 2024-04-15 13:41:58 +01:00 committed by GitHub
commit c23ad48e1b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
129 changed files with 50560 additions and 3785 deletions

8
.gitignore vendored
View file

@ -241,6 +241,7 @@ fabric.properties
# Locally stored data
local_data/*
/local_data/*
etl/epc/local_data/*
*.DS_Store
infrastructure/terraform/.terraform*
@ -255,7 +256,7 @@ open_uprn/.idea/
conservation_areas/.idea/
model_data/.idea/
model_data/simulation_system/.idea/
model_data/simulation_system/
model_data/simulation_system/data*
model_data/simulation_system/model_directory/
model_data/simulation_system/predictions/
@ -264,4 +265,7 @@ model_data/simulation_system/predictions/
.idea/misc.iml
adhoc
adhoc/*
adhoc/*
etl-router-venv/
refactor_datasets/

2
.idea/.gitignore generated vendored
View file

@ -1,3 +1,5 @@
# Default ignored files
/shelf/
/workspace.xml
# GitHub Copilot persisted chat sessions
/copilot/chatSessions

View file

@ -45,7 +45,9 @@ class Definitions:
# contain a null value. A resolution to correct these anomalies will be considered for future data releases.
"NULL",
# We sometimes see fields populated with just an empty string.
""
"",
# An older value which rarely shows up but has been seen in the data.
"UNKNOWN",
}
DATA_ANOMALY_SUBSTRINGS = {

7
backend/DbClient.py Normal file
View file

@ -0,0 +1,7 @@
class DbClient:
def __init__(self):
"""
This class handles interaction with the database
"""
pass

105
backend/OrdnanceSurvey.py Normal file
View file

@ -0,0 +1,105 @@
from functools import lru_cache
import urllib.parse
import requests
from utils.logger import setup_logger
logger = setup_logger()
class OrdnanceSuveyClient:
def __init__(self, address, postcode, api_key):
"""
This class is tasked with interaction with the ordnance survey API.
:param address: The address for the property to search for
:param postcode: The postcode for the property to search for
"""
self.address = address
self.postcode = postcode
self.full_address = ", ".join([self.address, self.postcode])
self.api_key = api_key
self.results = None
self.most_relevant_result = None
self.property_type = None
self.built_form = None
# This will be postcode and address, as returned by the ordnance survey
self.address_os = None
self.postcode_os = None
def set_places_address(self):
"""
Given a response from the places api, this function will set the address and postcode of the property
"""
if self.most_relevant_result is None:
raise ValueError("No results found - run get_places_api first")
self.address_os = self.most_relevant_result["ADDRESS"]
self.postcode_os = self.most_relevant_result["POSTCODE"]
# We strip out the postcode from the address as this is already stored separately
self.address_os = self.address_os.replace(self.postcode_os, "").strip()
# Remove trailing comma
self.address_os = self.address_os.rstrip(",").strip()
# Convert to title case
self.address_os = self.address_os.title()
# Make sure postcode is upper case
self.postcode_os = self.postcode_os.upper()
@lru_cache(maxsize=128)
def get_places_api(self):
"""
This method is tasked with getting the places api from the Ordnance Survey.
"""
if not self.api_key:
raise ValueError("Ordnance Survey API key not specified")
encoded_address_query = urllib.parse.quote(self.full_address)
url = (f"https://api.os.uk/search/places/v1/find?query={encoded_address_query}&key="
f"{self.api_key}")
response = requests.get(url)
if response.status_code == 200:
data = response.json()
results = data['results']
self.results = results
# Extract some details about the best match
self.most_relevant_result = self.results[0]["DPA"]
self.parse_classification_code(self.most_relevant_result["CLASSIFICATION_CODE"])
self.set_places_address()
else:
logger.info("Could not find any results for the provided address and postcode")
return {"status": response.status_code}
def parse_classification_code(self, classification_code: str):
"""
This function will convert the classification code, returned by the OS places api, to a property type that is
compatible with the EPC database.
The various classifications cane be found here:
https://osdatahub.os.uk/docs/places/technicalSpecification
Under LPI Output, CLASSIFICATION_CODE is described, and a link is provided to the full table of classifications
For these purposes, we do not need the full classification as this includes non-residential properties. We only
parse the ones of interest to us
:return:
"""
value_map = {
# In the OS api, "RD" is a "Dwelling" however this is not valid property type in the EPC database
'RD': {},
'RD02': {'property_type': 'House', 'built_form': 'Detatched'},
'RD03': {'property_type': 'House', 'built_form': 'Semi-Detatched'},
'RD04': {'property_type': 'House', 'built_form': 'Mid-Terrace'},
'RD06': {'property_type': 'Flat'},
}
mapped = value_map.get(classification_code, {})
self.property_type = mapped.get("property_type", "")
self.built_form = mapped.get("built_form", "")

File diff suppressed because it is too large Load diff

744
backend/SearchEpc.py Normal file
View file

@ -0,0 +1,744 @@
import os
import time
import re
import usaddress
import pandas as pd
import numpy as np
from epc_api.client import EpcClient
from backend.OrdnanceSurvey import OrdnanceSuveyClient
from BaseUtility import Definitions
from utils.logger import setup_logger
from typing import List
from fuzzywuzzy import process
logger = setup_logger()
vartypes = {
'low-energy-fixed-light-count': "Int64",
# 'address': 'str',
# 'uprn-source': 'str',
'floor-height': 'float',
'heating-cost-potential': 'float',
'unheated-corridor-length': 'float',
'hot-water-cost-potential': 'float',
'construction-age-band': 'str',
'potential-energy-rating': 'str',
'mainheat-energy-eff': 'str',
'windows-env-eff': 'str',
'lighting-energy-eff': 'str',
'environment-impact-potential': "Int64",
'glazed-type': 'str',
'heating-cost-current': 'float',
# 'address3': 'str',
'mainheatcont-description': 'str',
'sheating-energy-eff': 'str',
'property-type': 'str',
'local-authority-label': 'str',
'fixed-lighting-outlets-count': "Int64",
'energy-tariff': 'str',
'mechanical-ventilation': 'str',
'hot-water-cost-current': 'str',
'county': 'str',
# 'postcode': 'str',
'solar-water-heating-flag': 'str',
'constituency': 'str',
'co2-emissions-potential': 'float',
'number-heated-rooms': 'float',
'floor-description': 'str',
'energy-consumption-potential': 'float',
'local-authority': 'str',
'built-form': 'str',
'number-open-fireplaces': "Int64",
'windows-description': 'str',
'glazed-area': 'str',
# 'inspection-date': str,
'mains-gas-flag': 'str',
'co2-emiss-curr-per-floor-area': 'float',
# 'address1': 'str',
'heat-loss-corridor': 'str',
'flat-storey-count': "Int64",
'constituency-label': 'str',
'roof-energy-eff': 'str',
'total-floor-area': 'float',
'building-reference-number': 'str',
'environment-impact-current': 'float',
'co2-emissions-current': 'float',
'roof-description': 'str',
'floor-energy-eff': 'str',
'number-habitable-rooms': 'float',
# 'address2': 'str',
'hot-water-env-eff': 'str',
'posttown': 'str',
'mainheatc-energy-eff': 'str',
'main-fuel': 'str',
'lighting-env-eff': 'str',
'windows-energy-eff': 'str',
'floor-env-eff': 'str',
'sheating-env-eff': 'str',
'lighting-description': 'str',
'roof-env-eff': 'str',
'walls-energy-eff': 'str',
'photo-supply': 'float',
'lighting-cost-potential': 'float',
'mainheat-env-eff': 'str',
'multi-glaze-proportion': 'float',
'main-heating-controls': 'str',
# 'lodgement-datetime',
'flat-top-storey': 'str',
'current-energy-rating': 'str',
'secondheat-description': 'str',
'walls-env-eff': 'str',
'transaction-type': 'str',
# 'uprn': "Int64",
'current-energy-efficiency': 'float',
'energy-consumption-current': 'float',
'mainheat-description': 'str',
'lighting-cost-current': 'float',
# 'lodgement-date',
'extension-count': "Int64",
'mainheatc-env-eff': 'str',
# 'lmk-key': 'str',
'wind-turbine-count': "Int64",
'tenure': 'str',
'floor-level': 'str',
'potential-energy-efficiency': "Int64",
'hot-water-energy-eff': 'str',
'low-energy-lighting': 'float',
'walls-description': 'str',
'hotwater-description': 'str'
}
class SearchEpc:
"""
Given address information about a home, this class is responsible for retrieving the EPC data associated
to the property.
For a home, we might have address lines 1, 2, 3 and 4, as well as a postcode.
Often, simply searching the EPC database with address line 1 and postcode will be enough to find
the property, but there are some cases where this is not true and we might need to utilise other
combinations about the home to find the property
"""
MAX_RETRIES = 5
SUCCESS = {
"status": 200,
"message": "success",
"error": None
}
NODATA = {
"status": 201,
"message": "No data",
"error": None
}
def __init__(
self,
address1: str,
postcode: str,
auth_token: str,
os_api_key: str,
full_address: str | None = None,
max_retries: int = None,
uprn: [int, None] = None,
size=None,
property_type=None,
fast=False
):
"""
Address lines 1 and postcode are mandatory fields. The other address lines are optional
but can be used to find the epc for the home, if address1 and postcode are insufficient
:param address1: string, propery's address line 1
:param postcode: string, propery's postcode
:param full_address: string, optional parameter, the full address of the property
:param max_retries: int, optional, number of retries to make when searching the api
:param uprn: int, optional, the uprn of the property
:param size: int, optional, the number of results to return. If not provided, defaults to 25 which is the api's
default
:param property_type: str, optional, the property type of the property, if known before hand
"""
self.address1 = address1
self.postcode = postcode
self.full_address = full_address
self.uprn = uprn
self.house_number = self.get_house_number(self.address1)
self.numeric_house_number = self.extract_numeric_housenumber_part(self.house_number)
self.max_retries = max_retries if max_retries is not None else self.MAX_RETRIES
self.client = EpcClient(auth_token=auth_token)
self.ordnance_survey_client = OrdnanceSuveyClient(
address=self.address1, postcode=self.postcode, api_key=os_api_key
)
self.data = None
self.newest_epc = None
self.older_epcs = None
self.full_sap_epc = None
# These are the address and postcode values, which we store in the database
self.address_clean = None
self.postcode_clean = None
self.size = size if size is not None else 25
self.property_type = property_type
self.fast = fast
@classmethod
def get_house_number(cls, address: str) -> str | None:
"""
This method will use the usaddress library to parse an address and extract the house number
:return:
"""
parsed = usaddress.parse(address)
parsed_house_number = [x for x in parsed if (x[1] == "AddressNumber")]
parsed_house_number = parsed_house_number[0][0] if parsed_house_number else None
if parsed_house_number is None:
# Because usaddress isn't optimal for parsing addresses with some prefixes such as 'Flat',
# we also add a custom approach
# Pattern to look for 'Flat' or 'Apartment' followed by a number, or just a number at the beginning
pattern = r'(?i)(?:flat|apartment)\s*(\d+)|^\s*(\d+)'
match = re.search(pattern, address)
if match:
# Return the first non-None group found
return next(g for g in match.groups() if g is not None)
else:
return None
# Remove training commas
parsed_house_number = parsed_house_number.replace(",", "")
return parsed_house_number
@staticmethod
def extract_numeric_housenumber_part(house_number: str | None) -> int | None:
# Regular expression to find the first occurrence of one or more digits
if house_number is None:
return None
match = re.search(r'\d+', house_number)
if match:
return int(match.group())
else:
return None
def get_epc(self, params=None, size=None):
# Get the EPC data with retries
size = size if size is not None else self.size
if params is None:
if self.uprn:
params = {"uprn": self.uprn}
else:
params = {"address": self.address1, "postcode": self.postcode}
for retry in range(self.max_retries):
try:
if "uprn" in params:
# We use the direct call method inside, since we need to implement uprn as a valid
# parameter for the search function
url = os.path.join(self.client.domestic.host, "search")
response = self.client.domestic.call(method="get", url=url, params=params)
else:
response = self.client.domestic.search(params=params, size=size)
if response:
self.data = response
return self.SUCCESS
if retry > 0:
logger.info("Failed previous attempt but retry successful")
# If we got nothing, final try
if not response:
return {
"status": 204,
"message": "no data",
"error": None
}
return {
"status": 200,
"message": "success",
"error": None
}
except Exception as e:
if retry < self.max_retries - 1:
# If not the last retry, wait for 3 seconds before retrying
time.sleep(3)
else:
# If it's the last retry, we continue
return {
"status": 500,
"message": "Could not retrieve EPC data",
"error": str(e)
}
@staticmethod
def filter_rows(rows, property_type=None, address=None):
"""
This method should not be used when property_type and address are both not None
:param rows:
:param property_type:
:param address:
:return:
"""
# Given the results from the EPC api, attempts to reduce the number of rows
uprns = {r["uprn"] for r in rows}
if (property_type is None) and (address is None):
return rows
if len(uprns) == 1:
return rows
if property_type is not None:
# We can do a filter on the property type
rows_filtered = [r for r in rows if r["property-type"] == property_type]
if rows_filtered:
return rows_filtered
return rows
if address is not None:
# We can do a filter on the property type
best_match = process.extractOne(address, [r["address"] for r in rows], score_cutoff=0)
rows_filtered = [r for r in rows if r["address"] == best_match[0]]
if rows_filtered:
return rows_filtered
return rows
@staticmethod
def format_address(newest_epc):
"""
Format address and postcode for storage in the database
"""
postcode = newest_epc["postcode"]
address = newest_epc["address"]
# Format them
address = address.replace(postcode, "").strip()
address = address.rstrip(",").strip()
address = address.title()
postcode = postcode.upper()
return address, postcode
def extract_epc_data(self, address=None):
"""
Given a successful search, this method will format the data and return it
:return:
"""
if self.data is None:
raise ValueError("data is missing, run search first")
rows = self.data["rows"]
# We perform some checks on the rows
# Firstly, we should only have 1 urpn so if we have multiple, we'll need to filter down the
# property further
rows = self.filter_rows(rows, property_type=self.property_type, address=None)
rows = self.filter_rows(rows, property_type=None, address=address)
# We now check for a full sap epc:
full_sap_epc = [r for r in rows if r["transaction-type"] == "new dwelling"]
full_sap_epc = full_sap_epc[0] if full_sap_epc else {}
# Finally, we identify the newest epc and the rest, and then return
newest_epc, older_epcs = self.filter_newest_epc(list_of_epcs=rows)
# Ge the uprn from the newest record for this home
uprns = {r["uprn"] for r in rows if r["uprn"]}
# We can sometimes have no uprn for a property
if (len(uprns) == 0) and len(rows) > 0:
logger.warning("Found data but missing uprn")
elif len(uprns) != 1:
# There is a possibility that we have multiple UPRNs for a single property, which is an error
addresses = {r["address"] for r in rows}
if len(addresses) == 1:
# Take the uprn from the most recent
uprns = {newest_epc["uprn"]}
else:
raise ValueError("Multiple UPRNs found - investigate me")
uprn = uprns.pop() if uprns else None
if self.fast:
return newest_epc, [], {}, "", "", None
# Retrieve postcode and address
address_epc, postcode_epc = self.format_address(newest_epc=newest_epc)
return newest_epc, older_epcs, full_sap_epc, address_epc, postcode_epc, uprn
@staticmethod
def filter_newest_epc(list_of_epcs: List):
newest_response = [
r for r in list_of_epcs if
r["lodgement-datetime"] == max([x["lodgement-datetime"] for x in list_of_epcs])
]
if not newest_response:
return {}, []
if len(newest_response) != 1:
# It is possible (but rare, and likely an error on EPC lodgement) that we have multiple EPCs that
# were lodged at the exact same time. In this case, we will take the first one
newest_response = [newest_response[0]]
older_epcs = [epc for epc in list_of_epcs if epc["lmk-key"] != newest_response[0]["lmk-key"]]
return newest_response[0], older_epcs
@staticmethod
def _get_epc_mode(col: str, epc_data: pd.DataFrame):
"""
Simple method to extract the mode value from the EPC data
:param col: name of the column to take the mode of
:param epc_data: pandas dataframe of epc data
"""
mode_value = epc_data[[col]].mode(dropna=True)
if len(mode_value) != 1:
raise NotImplementedError("TODO: Handle multiple modes")
mode_value = mode_value.iloc[0][col]
return mode_value
def fetch_nearby_epcs(
self, initial_postcode: str,
lmks_to_drop: list[str] | None = None,
built_form: str = "",
property_type: str = ""
):
"""
Fetches and processes EPC data for a given initial postcode, applying successive trimming
to the postcode and filtering the data until a non-empty result set is found.
The function queries the EPC API with the provided postcode, and if no data is found or
if the data doesn't meet certain criteria, it progressively shortens the postcode by
removing the last character and retries the query. This process continues until a valid
set of EPC data is obtained or the postcode is exhausted.
Additional filtering is applied to the obtained EPC data based on 'lmk-key', 'built-form',
and 'property-type'. The data is also processed to extract and numerically interpret house
numbers, calculate house number distances, and apply weights based on these distances.
:param initial_postcode: The initial full postcode for the EPC data query.
:param lmks_to_drop: List of 'lmk-key' values to be excluded from the EPC data.
:param built_form: The 'built-form' value to be used for filtering the EPC data.
:param property_type: The 'property-type' value to be used for filtering the EPC data.
:return:
"""
property_type_api_map = {
"Bungalow": "bungalow",
"Flat": "flat",
"House": "house",
"Maisonette": "maisonette",
"Park home": "park home",
}
postcode = initial_postcode
while postcode:
# Fetch data from EPC API
params = {"postcode": postcode}
if property_type:
params["property-type"] = property_type_api_map[property_type]
# We take the 20 nearest homes of the relevant type, so not to pull in too many irrelevant homes
epc_response = self.get_epc(params=params, size=100)
if epc_response["status"] == 200:
epc_data = pd.DataFrame(self.data["rows"])
if lmks_to_drop is not None:
epc_data = epc_data[~epc_data["lmk-key"].isin(lmks_to_drop)]
if not epc_data.empty:
# Further processing of the EPC data
epc_data['lodgement-datetime'] = pd.to_datetime(epc_data['lodgement-datetime'], errors='coerce')
epc_data = epc_data.sort_values("lodgement-datetime", ascending=False).groupby("uprn").head(1)
epc_data["house_number"] = epc_data["address"].apply(lambda add1: self.get_house_number(add1))
epc_data["numeric_house_number"] = epc_data["house_number"].apply(
lambda house_num: self.extract_numeric_housenumber_part(house_num)
)
if self.numeric_house_number is None:
# If we don't have a house number, we treat all weights as equal
epc_data["weight"] = 1
else:
epc_data["house_number_distance"] = abs(
epc_data["numeric_house_number"] - self.numeric_house_number
)
# # We add 1, just in case we have a 0 weight (e.g. comparing house number 7a to 7b, or 9A to 9)
# epc_data["weight"] = 1 / (epc_data["house_number_distance"] + 1)
# # If we have a home without a house number, fill that weight with average
# epc_data["weight"] = epc_data["weight"].fillna(epc_data["weight"].mean())
# # Finally, we might not have any house numbers whatsoever so everything could be
# # missing, so we fill with 1
# epc_data["weight"] = epc_data["weight"].fillna(1)
# TODO: Testing
# If the postcode is different from the initial postcode, it doesn't make sense to have
# any weightings
if all(pd.isnull(epc_data["house_number_distance"])) or (postcode != initial_postcode):
epc_data["weight"] = 1
else:
epc_data["weight"] = 1 / np.sqrt(epc_data["house_number_distance"] + 1)
epc_data["weight"] = epc_data["weight"].fillna(epc_data["weight"].mean())
estimation_property_type = self._estimate_str(
key="property-type", estimation_data=epc_data
) if property_type == "" else property_type
epc_built_form = self._estimate_str(
key="built-form",
estimation_data=epc_data[epc_data["property-type"] == estimation_property_type]
)
if built_form == "Semi-Detached" and epc_built_form in ["End-Terraced", "Mid-Terraced"]:
estimation_built_form = "End-Terraced"
elif (built_form == "") or (pd.isnull(built_form)):
estimation_built_form = epc_built_form
else:
estimation_built_form = built_form
# We handle some edge cases experiences with maisonettes - if built form is detatched, just filter
# on maisonette
# We also add some additional logic for Park homes, because they are far less common than other
# property types
is_maisonette_with_bad_built_form = (estimation_property_type == "Maisonette") & (
estimation_built_form in ["Detached", "Semi-Detached"]
)
is_park_home_without_built_form = (estimation_property_type == "Park home") & (
sum(epc_data["built-form"] == estimation_built_form) == 0
)
has_missing_built_form = not estimation_built_form
if is_maisonette_with_bad_built_form or is_park_home_without_built_form or has_missing_built_form:
epc_data = epc_data[epc_data["property-type"] == estimation_property_type]
else:
epc_data = epc_data[
(epc_data["built-form"] == estimation_built_form) & (
epc_data["property-type"] == estimation_property_type)
]
if not epc_data.empty:
return epc_data # Return the filtered data if it's not empty
# Shorten the postcode by one character for the next iteration
postcode = postcode[:-1].rstrip()
# If loop finishes without a valid response, raise an exception
raise Exception("Unable to find postcode data after trimming - investigate me")
def estimate_epc(self, property_type, built_form, lmks_to_drop=None):
"""
For a property that does not have an EPC, we retrieve the EPC data for the closest properties
and estimate the EPC for the property in question.
Note - do we have postcodes with just a single address? We would need to use a different approach
to find the closest homes
:param property_type: This is the property type of the property we are estimating, that can be retrieved from
the ordnance survey api
:param built_form: This is the built form of the property we are estimating, that can be retrieved from
the ordnance survey api
:param lmks_to_drop: This is a list of LMK keys that should be dropped from the estimation process. This
is used as an override for testing, to drop EPCs for the property we are testing
:return:
"""
# From the ordnance survey data, we want to determine the property type and then use only similar property
# types for the estimation process
epc_data = self.fetch_nearby_epcs(
initial_postcode=self.postcode,
lmks_to_drop=lmks_to_drop,
built_form=built_form,
property_type=property_type
)
# If we have missing lodgment date, we fill it with inspection-date
epc_data["lodgement-datetime"] = epc_data["lodgement-datetime"].fillna(epc_data["inspection-date"])
# If we still have missing dates, we set it to the mean of the non NA dates
epc_data["lodgement-datetime"] = epc_data["lodgement-datetime"].fillna(epc_data["lodgement-datetime"].mean())
# For each attribute, we need to determine the datatype and use an appropriate method
# to estimate.
estimated_epc = {}
for key, vartype in vartypes.items():
epc_data[key] = np.where(pd.isnull(epc_data[key]), None, epc_data[key])
epc_data[key] = np.where(epc_data[key] == "", None, epc_data[key])
estimation_data = epc_data[[key, "weight", "lodgement-datetime"]].copy()
estimation_data = estimation_data[~pd.isnull(estimation_data[key])]
estimation_data = estimation_data[~estimation_data[key].isin(Definitions.DATA_ANOMALY_MATCHES)]
if vartype == "Int64":
# We have some edge cases where we get the error "invalid literal for int() with base 10: '1.0'"
# so this handles this
estimation_data[key] = estimation_data[key].astype(float).astype(vartype)
else:
estimation_data[key] = estimation_data[key].astype(vartype)
if estimation_data.shape[0] == 0:
estimated_epc[key] = None
continue
if vartype == "Int64":
estimated_value = self._estimate_int(estimation_data, key)
elif vartype == "float":
estimated_value = self._estimate_float(estimation_data, key)
elif vartype == "str":
estimated_value = self._estimate_str(estimation_data, key)
else:
raise NotImplementedError("estimation method not implemented for type")
estimated_epc[key] = estimated_value
# Insert an estimated lodgement datetime, with a weighted average
estimated_epc["lodgement-datetime"] = self.calculate_weighted_lodgement_datetime(epc_data=epc_data)
# Extract logement date
# It is possible that there is still no lodgement date, so we need to handle this
if pd.isnull(estimated_epc["lodgement-datetime"]):
estimated_epc["lodgement-date"] = None
else:
estimated_epc["lodgement-date"] = estimated_epc["lodgement-datetime"].strftime("%Y-%m-%d")
estimated_epc["postcode"] = self.postcode
estimated_epc["uprn"] = self.uprn
estimated_epc["address"] = self.full_address
# Indicate that this epc was estimated
estimated_epc["estimated"] = True
return estimated_epc
@staticmethod
def calculate_weighted_lodgement_datetime(epc_data):
numeric_dates = pd.to_datetime(epc_data['lodgement-datetime']).view('int64')
# Calculate the weighted sum of dates
weighted_sum = (numeric_dates * epc_data['weight']).sum()
# Calculate the sum of weights
total_weights = epc_data['weight'].sum()
# Calculate the weighted mean in numeric format
weighted_mean_numeric = weighted_sum / total_weights
# Convert the numeric weighted mean back to datetime
weighted_mean_datetime = pd.to_datetime(weighted_mean_numeric)
return weighted_mean_datetime
@staticmethod
def _estimate_int(estimation_data, key):
return round(np.average(a=estimation_data[key], weights=estimation_data["weight"]))
@staticmethod
def _estimate_float(estimation_data, key):
return round(np.average(a=estimation_data[key], weights=estimation_data["weight"]), 2)
@staticmethod
def _estimate_str(estimation_data, key):
agg = estimation_data.groupby(key)["weight"].sum().reset_index()
agg = agg[agg["weight"] == agg["weight"].max()]
if agg.shape[0] != 1:
# If we have multiple modes, we take the more recent data on average
recent_grouped = estimation_data[
estimation_data[key].isin(agg[key].values)
].groupby(key)["lodgement-datetime"].mean()
newest_group = recent_grouped.idxmax()
return newest_group
return agg[key].values[0]
def find_property(self, skip_os=False):
"""
This method will attempt to identify a property. It will, at first, use the EPC api to try and
find the EPC for the property and the associated UPRN. If this fails, it will use the Ordnance Survey API to
find the UPRN of the address.
Because no result may have been provided by the EPC api because of formatting issues with the address,
if the ordnance survey api is used and the uprn retrieved, the EPC api is queried again with the UPRN, just
as a final check to see if there is any EPC data.
If there is no EPC data, the epc data will be estimated based on the surrounding properties
"""
# Step 1: use the epc api to find the property and uprn
response = self.get_epc()
if response["status"] == 200:
(
self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean, self.uprn
) = self.extract_epc_data(address=self.full_address)
return
# Step 2: If we don't have an EPC, we use the ordnance survey api to find the uprn
if skip_os:
if self.ordnance_survey_client.property_type is not None:
# We can try and estimate
estimated_epc = self.estimate_epc(
property_type=self.ordnance_survey_client.property_type,
built_form=self.ordnance_survey_client.built_form
)
self.newest_epc = estimated_epc
self.older_epcs = []
self.full_sap_epc = {}
# Finally, set a standardised address 1 and postcode
self.address_clean = self.ordnance_survey_client.address_os
self.postcode_clean = self.ordnance_survey_client.postcode_os
return
os_response = self.ordnance_survey_client.get_places_api()
if os_response["status"] != 200:
# Investigate this if it happens
raise Exception("Unable to find property - investigate me")
# Step 3: Now that we have a urpn, do another check against the epc api, this time searching with the uprn
self.uprn = self.ordnance_survey_client.most_relevant_result["UPRN"]
response = self.get_epc()
if response["status"] == 200:
(
self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean, self.uprn
) = self.extract_epc_data()
return
# Step 4: If we still don't have an EPC, we estimate the EPC data
self.full_address = self.ordnance_survey_client.most_relevant_result["ADDRESS"]
estimated_epc = self.estimate_epc(
property_type=self.ordnance_survey_client.property_type,
built_form=self.ordnance_survey_client.built_form
)
self.newest_epc = estimated_epc
self.older_epcs = []
self.full_sap_epc = {}
# Finally, set a standardised address 1 and postcode
self.address_clean = self.ordnance_survey_client.address_os
self.postcode_clean = self.ordnance_survey_client.postcode_os
return

View file

@ -8,9 +8,12 @@ class Settings(BaseSettings):
SECRET_KEY: str
ENVIRONMENT: str
DATA_BUCKET: str
PREDICTIONS_BUCKET: str
SAP_PREDICTIONS_BUCKET: str
CARBON_PREDICTIONS_BUCKET: str
HEAT_PREDICTIONS_BUCKET: str
PLAN_TRIGGER_BUCKET: str
EPC_AUTH_TOKEN: str
ORDNANCE_SURVEY_API_KEY: str
DB_HOST: str
DB_PASSWORD: str
DB_USERNAME: str

View file

@ -0,0 +1,50 @@
from sqlalchemy.orm import Session
from backend.app.db.models.non_intrusive_surveys import NonIntrusiveSurvey, NonIntrusiveSurveyNotes
def upload_non_intrusive_survey_notes(session: Session, non_invasive_notes, batch_size=500):
"""
Uploads a list of non-intrusive survey notes into the database in batches. Each dictionary in the list represents
one survey and its associated notes.
:param session: SQLAlchemy Session object through which all database transactions are handled.
:param non_invasive_notes: List of dictionaries where each dictionary contains survey details including 'uprn',
'survey_date', 'surveyor', and other notes as key-value pairs.
:param batch_size: The size of each batch to be processed (default is 500).
:return: None
"""
# Helper function to process each batch
def process_batch(batch):
surveys = []
notes = []
for note in batch:
survey = NonIntrusiveSurvey(
uprn=note['uprn'],
survey_date=note['survey_date'],
surveyor=note['surveyor']
)
surveys.append(survey)
session.add_all(surveys)
session.flush() # Get IDs for surveys
for note, survey in zip(batch, surveys):
for key, value in note.items():
if key not in ['uprn', 'survey_date', 'surveyor']:
notes.append(NonIntrusiveSurveyNotes(
survey_id=survey.id,
title=key,
note=value
))
session.bulk_save_objects(notes)
session.commit()
# Split the data into batches and process each batch
total = len(non_invasive_notes)
for start in range(0, total, batch_size):
end = min(start + batch_size, total)
batch = non_invasive_notes[start:end]
process_batch(batch)

View file

@ -3,15 +3,17 @@ from backend.app.db.models.recommendations import Plan, PlanRecommendations, Rec
from backend.app.db.models.portfolio import Portfolio
def aggregate_portfolio_recommendations(session, portfolio_id: int):
def aggregate_portfolio_recommendations(
session, portfolio_id: int, total_valuation_increase: float, labour_days: float
):
# Aggregate multiple fields
aggregates = (
session.query(
func.sum(Recommendation.estimated_cost).label("cost"),
func.sum(Recommendation.total_work_hours).label("total_work_hours"),
# For future usage we will aggregate multiple fields in this step
# func.sum(Recommendation.heat_demand).label("total_heat_demand"),
# func.sum(Recommendation.energy_savings).label("total_energy_savings")
func.sum(Recommendation.adjusted_heat_demand).label("energy_savings"),
func.sum(Recommendation.co2_equivalent_savings).label("co2_equivalent_savings"),
func.sum(Recommendation.energy_cost_savings).label("energy_cost_savings"),
)
.join(PlanRecommendations, PlanRecommendations.recommendation_id == Recommendation.id)
.join(Plan, Plan.id == PlanRecommendations.plan_id)
@ -22,8 +24,9 @@ def aggregate_portfolio_recommendations(session, portfolio_id: int):
aggregates_dict = {
"cost": aggregates.cost or 0,
"total_work_hours": aggregates.total_work_hours or 0,
# "total_heat_demand": aggregates.total_heat_demand or 0,
# "total_energy_savings": aggregates.total_energy_savings or 0
"energy_savings": aggregates.energy_savings or 0,
"co2_equivalent_savings": aggregates.co2_equivalent_savings or 0,
"energy_cost_savings": aggregates.energy_cost_savings or 0,
}
# Get the portfolio and update the fields
@ -32,6 +35,10 @@ def aggregate_portfolio_recommendations(session, portfolio_id: int):
for key, value in aggregates_dict.items():
setattr(portfolio, key, value)
# Insert total valuation increase and labour days
portfolio.property_valuation_increase = total_valuation_increase
portfolio.labour_days = labour_days
# Merge the updated portfolio back into the session
session.merge(portfolio)
session.flush()

View file

@ -3,13 +3,15 @@
###
import datetime
import pytz
from sqlalchemy.orm import Session
from backend.app.db.models.portfolio import (
PropertyModel, PropertyCreationStatus, PortfolioStatus, PropertyTargetsModel, PropertyDetailsEpcModel
PropertyModel, PropertyCreationStatus, PortfolioStatus, PropertyTargetsModel, PropertyDetailsEpcModel,
PropertyDetailsSpatial
)
from sqlalchemy.orm.exc import NoResultFound
def create_property(session, portfolio_id: int, address: str, postcode: str) -> (int, bool):
def create_property(session: Session, portfolio_id: int, address: str, postcode: str, uprn: str) -> (int, bool):
"""
This function will create a record for the property in the database if it does not exist.
If it does exist, it will just update the updated_at field.
@ -23,7 +25,7 @@ def create_property(session, portfolio_id: int, address: str, postcode: str) ->
try:
# Attempt to fetch the existing property
existing_property = session.query(PropertyModel).filter_by(
address=address, postcode=postcode, portfolio_id=portfolio_id
uprn=uprn, portfolio_id=portfolio_id
).one()
# Update the 'updated_at' field
@ -41,6 +43,7 @@ def create_property(session, portfolio_id: int, address: str, postcode: str) ->
address=address,
postcode=postcode,
portfolio_id=portfolio_id,
uprn=uprn,
creation_status=PropertyCreationStatus.LOADING,
status=PortfolioStatus.ASSESSMENT.value,
has_pre_condition_report=False,
@ -55,7 +58,9 @@ def create_property(session, portfolio_id: int, address: str, postcode: str) ->
return new_property.id, True
def create_property_targets(session, property_id: int, portfolio_id: int, epc_target=None, heat_demand_target=None):
def create_property_targets(
session: Session, property_id: int, portfolio_id: int, epc_target=None, heat_demand_target=None
):
"""
This function will create a record for the property targets in the database if it does not exist.
:param session: The database session
@ -78,7 +83,9 @@ def create_property_targets(session, property_id: int, portfolio_id: int, epc_ta
return True
def update_property_data(session, property_id: int, portfolio_id: int, property_data: dict):
def update_property_data(
session: Session, property_id: int, portfolio_id: int, property_data: dict
):
now = datetime.datetime.now(pytz.utc)
try:
@ -103,7 +110,9 @@ def update_property_data(session, property_id: int, portfolio_id: int, property_
return True
def create_property_details_epc(session, property_details_epc: dict):
def create_property_details_epc(
session: Session, property_details_epc: dict
):
"""
This function will create or update a record for the property details EPC in the database.
:param session: The database session
@ -128,3 +137,36 @@ def create_property_details_epc(session, property_details_epc: dict):
session.flush()
return True
def update_or_create_property_spatial_details(session: Session, uprn: int, property_details_spatial: dict):
"""
Update an existing property details record or create a new one based on the UPRN.
:param session: The SQLAlchemy session for database interaction.
:param uprn: The unique property reference number (UPRN) of the property.
:param property_details_spatial: A dictionary containing the spatial property details to store or update.
:return: True if the operation is successful, otherwise raises an exception.
"""
try:
# Attempt to fetch the existing property details
existing_property_details = session.query(PropertyDetailsSpatial).filter_by(
uprn=uprn
).one()
# Update the fields with the data in property_details
for key, value in property_details_spatial.items():
setattr(existing_property_details, key, value)
# Merge the updated property details back into the session and flush
session.merge(existing_property_details)
session.flush()
except NoResultFound:
# Create a new record if not found
new_property_details = PropertyDetailsSpatial(uprn=uprn, **property_details_spatial)
session.add(new_property_details)
session.flush()
return True

View file

@ -80,7 +80,13 @@ def upload_recommendations(session: Session, recommendations_to_upload, property
"starting_u_value": rec.get("starting_u_value"),
"new_u_value": rec.get("new_u_value"),
"sap_points": rec["sap_points"],
"heat_demand": rec["heat_demand"],
"adjusted_heat_demand": rec["adjusted_heat_demand"],
"co2_equivalent_savings": rec["co2_equivalent_savings"],
"total_work_hours": rec["labour_hours"],
"energy_cost_savings": rec["energy_cost_savings"],
"labour_days": rec["labour_days"],
"already_installed": rec["already_installed"],
}
for rec in recommendations_to_upload
]

View file

@ -18,6 +18,7 @@ class MaterialType(enum.Enum):
exposed_floor_insulation = "exposed_floor_insulation"
flat_roof_insulation = "flat_roof_insulation"
room_roof_insulation = "room_roof_insulation"
windows_glazing = "windows_glazing"
iwi_wall_demolition = "iwi_wall_demolition"
iwi_vapour_barrier = "iwi_vapour_barrier"
@ -32,6 +33,10 @@ class MaterialType(enum.Enum):
ewi_wall_demolition = "ewi_wall_demolition"
ewi_wall_preparation = "ewi_wall_preparation"
ewi_wall_redecoration = "ewi_wall_redecoration"
low_energy_lighting_installation = "low_energy_lighting_installation"
flat_roof_preparation = "flat_roof_preparation"
flat_roof_vapour_barrier = "flat_roof_vapour_barrier"
flat_roof_waterproofing = "flat_roof_waterproofing"
class DepthUnit(enum.Enum):
@ -42,6 +47,7 @@ class CostUnit(enum.Enum):
gbp_sq_meter = "gbp_sq_meter"
gbp_per_unit = "gbp_per_unit"
gbp_per_m2 = "gbp_per_m2"
gbp_per_m = "gbp_per_m"
class RValueUnit(enum.Enum):

View file

@ -0,0 +1,22 @@
from sqlalchemy import Column, BigInteger, String, TIMESTAMP, ForeignKey, Integer
from sqlalchemy.orm import declarative_base
Base = declarative_base()
class NonIntrusiveSurvey(Base):
__tablename__ = 'non_intrusive_survey'
id = Column(BigInteger, primary_key=True, autoincrement=True)
uprn = Column(Integer, nullable=False)
survey_date = Column(TIMESTAMP, nullable=False)
surveyor = Column(String, nullable=False)
class NonIntrusiveSurveyNotes(Base):
__tablename__ = 'non_intrusive_survey_notes'
id = Column(BigInteger, primary_key=True, autoincrement=True)
survey_id = Column(BigInteger, ForeignKey('non_intrusive_survey.id'), nullable=False)
title = Column(String, nullable=False)
note = Column(String, nullable=False)

View file

@ -42,6 +42,7 @@ class Portfolio(Base):
property_valuation_increase = Column(Float) # Unit is always £ so we don't need to store the unit for the moment
rental_yield_increase = Column(Float) # Unit is always £ so we don't need to store the unit for the moment
total_work_hours = Column(Float)
labour_days = Column(Float)
created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
@ -85,6 +86,7 @@ class PropertyModel(Base):
tenure = Column(Text)
current_epc_rating = Column(Enum(Epc))
current_sap_points = Column(Float)
current_valuation = Column(Float)
class FeatureRating(enum.Enum):
@ -151,6 +153,21 @@ class PropertyDetailsEpcModel(Base):
energy_tariff = Column(Text)
primary_energy_consumption = Column(Float)
co2_emissions = Column(Float)
adjusted_energy_consumption = Column(Float)
estimated = Column(Boolean, default=False)
class PropertyDetailsSpatial(Base):
__tablename__ = "property_details_spatial"
id = Column(Integer, primary_key=True, autoincrement=True)
uprn = Column(Integer, nullable=False)
x_coordinate = Column(Float)
y_coordinate = Column(Float)
latitude = Column(Float)
longitude = Column(Float)
conservation_status = Column(Boolean)
is_listed_building = Column(Boolean)
is_heritage_building = Column(Boolean)
class PropertyDetailsMeter(Base):

View file

@ -22,12 +22,15 @@ class Recommendation(Base):
new_u_value = Column(Float)
sap_points = Column(Float)
heat_demand = Column(Float)
adjusted_heat_demand = Column(Float)
co2_equivalent_savings = Column(Float)
energy_savings = Column(Float)
energy_cost_savings = Column(Float)
property_valuation_increase = Column(Float)
rental_yield_increase = Column(Float)
total_work_hours = Column(Float)
labour_days = Column(Float)
already_installed = Column(Boolean, nullable=False, default=False)
class RecommendationMaterials(Base):
@ -51,6 +54,9 @@ class Plan(Base):
property_id = Column(BigInteger, ForeignKey(PropertyModel.id), nullable=False)
created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
is_default = Column(Boolean, nullable=False)
valuation_increase_lower_bound = Column(Float)
valuation_increase_upper_bound = Column(Float)
valuation_increase_average = Column(Float)
class PlanRecommendations(Base):

View file

@ -1,7 +1,9 @@
from datetime import datetime
from tqdm import tqdm
import pandas as pd
from epc_api.client import EpcClient
from etl.epc.Record import EPCRecord
from backend.SearchEpc import SearchEpc
from fastapi import APIRouter, Depends
from sqlalchemy.exc import IntegrityError, OperationalError
from sqlalchemy.orm import sessionmaker
@ -12,7 +14,8 @@ from backend.app.db.connection import db_engine
from backend.app.db.functions.materials_functions import get_materials
from backend.app.db.functions.portfolio_functions import aggregate_portfolio_recommendations
from backend.app.db.functions.property_functions import (
create_property, create_property_details_epc, create_property_targets, update_property_data
create_property, create_property_details_epc, create_property_targets, update_property_data,
update_or_create_property_spatial_details
)
from backend.app.db.functions.recommendations_functions import (
create_plan, create_plan_recommendations, upload_recommendations
@ -20,29 +23,39 @@ from backend.app.db.functions.recommendations_functions import (
from backend.app.db.models.portfolio import rating_lookup
from backend.app.dependencies import validate_token
from backend.app.plan.schemas import PlanTriggerRequest
from backend.app.plan.utils import (
create_recommendation_scoring_data, get_cleaned, insert_temp_recommendation_id
)
from backend.app.utils import epc_to_sap_lower_bound, read_csv_from_s3, read_parquet_from_s3
from backend.app.plan.utils import get_cleaned
from backend.app.utils import epc_to_sap_lower_bound, sap_to_epc
from backend.ml_models.sap_change_model.api import SAPChangeModelAPI
from backend.ml_models.api import ModelApi
from backend.Property import Property
from etl.epc.DataProcessor import DataProcessor
from etl.epc.settings import COLUMNS_TO_MERGE_ON
from recommendations.FloorRecommendations import FloorRecommendations
from recommendations.RoofRecommendations import RoofRecommendations
from recommendations.VentilationRecommendations import VentilationRecommendations
from recommendations.FireplaceRecommendations import FireplaceRecommendations
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
from recommendations.optimiser.CostOptimiser import CostOptimiser
from recommendations.optimiser.GainOptimiser import GainOptimiser
from recommendations.optimiser.optimiser_functions import prepare_input_measures
from recommendations.WallRecommendations import WallRecommendations
from recommendations.Recommendations import Recommendations
from utils.logger import setup_logger
from utils.s3 import read_dataframe_from_s3_parquet
from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3
from backend.ml_models.Valuation import PropertyValuation
logger = setup_logger()
BATCH_SIZE = 5
SCORING_BATCH_SIZE = 400
def patch_epc(patch, epc_records):
"""
This utility function is useful to patch the epc data if we have data from the customer
:return:
"""
for patch_variable, patch_value in patch.items():
if patch_variable in epc_records["original_epc"]:
epc_records["original_epc"][patch_variable] = patch_value
return epc_records
router = APIRouter(
prefix="/plan",
@ -58,31 +71,52 @@ async def trigger_plan(body: PlanTriggerRequest):
session = sessionmaker(bind=db_engine)()
created_at = datetime.now().isoformat()
# TODO: We should store the trigger file path in the database with the plan so we can track the file that
# triggered the plan
# TODO: if the measure is already installed, it should actually be the very first phase
try:
session.begin()
logger.info("Getting the inputs")
epc_client = EpcClient(auth_token=get_settings().EPC_AUTH_TOKEN)
plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path)
uprn_filenames = read_dataframe_from_s3_parquet(
bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
)
cleaning_data = read_parquet_from_s3(
# If we have patches or overrides, we should read them in here
patches = []
if body.patches_file_path:
patches = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.patches_file_path)
already_installed = []
if body.already_installed_file_path:
already_installed = read_csv_from_s3(
bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.already_installed_file_path
)
cleaning_data = read_dataframe_from_s3_parquet(
bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
)
input_properties = []
for config in plan_input:
for config in tqdm(plan_input):
# We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
# TODO: implment validation. We should also standardise postcode and address in some fashion as
# a postcode of abcdef would be considered different to ABCDEF
uprn = config.get("uprn", None)
if uprn:
uprn = int(float(uprn))
epc_searcher = SearchEpc(
address1=config["address"],
postcode=config["postcode"],
uprn=uprn,
auth_token=get_settings().EPC_AUTH_TOKEN,
os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY
)
epc_searcher.find_property()
# Create a record in db
property_id, is_new = create_property(
session, portfolio_id=body.portfolio_id, address=config['address'], postcode=config['postcode']
session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
)
# if a new record was not created, we don't produduce recommendations
if not is_new:
continue
# TODO: Need to add heat demand target
create_property_targets(
session,
property_id=property_id,
@ -91,24 +125,41 @@ async def trigger_plan(body: PlanTriggerRequest):
heat_demand_target=None
)
epc_records = {
'original_epc': epc_searcher.newest_epc.copy(),
'full_sap_epc': epc_searcher.full_sap_epc.copy(),
'old_data': epc_searcher.older_epcs.copy(),
}
patch = next((
x for x in patches if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
), {})
epc_records = patch_epc(patch, epc_records)
prepared_epc = EPCRecord(
epc_records=epc_records,
run_mode="newdata",
cleaning_data=cleaning_data
)
property_already_installed = next((
x for x in already_installed if
(x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
), {})
input_properties.append(
Property(
postcode=config['postcode'],
address1=config['address'],
epc_client=epc_client,
id=property_id
id=property_id,
address=epc_searcher.address_clean,
postcode=epc_searcher.postcode_clean,
epc_record=prepared_epc,
already_installed=property_already_installed,
**Property.extract_kwargs(config)
)
)
if not input_properties:
return Response(status_code=204)
logger.info("Getting EPC, and spatial data")
for p in input_properties:
p.search_address_epc()
p.set_year_built()
p.get_spatial_data(uprn_filenames)
# The materials data could be cached or local so we don't need to make
# consistent requests to the backend for
# the same data
@ -116,173 +167,112 @@ async def trigger_plan(body: PlanTriggerRequest):
materials = get_materials(session)
cleaned = get_cleaned()
uprn_filenames = read_dataframe_from_s3_parquet(
bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
)
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket=get_settings().DATA_BUCKET)
logger.info("Getting spatial data")
for p in input_properties:
p.get_spatial_data(uprn_filenames)
logger.info("Getting components and epc recommendations")
# TODO: Move this to a class. We probably want a Recommender class which takes the injects the optimisers
# in as a dependency and then the optimisers can take the input measures in as part of the setup() method
recommendations = {}
recommendations_scoring_data = []
for p in input_properties:
representative_recommendations = {}
for p in tqdm(input_properties):
# Property recommendations
p.get_components(cleaned)
p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
property_recommendations = []
# Floor recommendations
floor_recommender = FloorRecommendations(property_instance=p, materials=materials)
floor_recommender.recommend()
if floor_recommender.recommendations:
property_recommendations.append(floor_recommender.recommendations)
# Wall recommendations
wall_recomender = WallRecommendations(property_instance=p, materials=materials)
wall_recomender.recommend()
if wall_recomender.recommendations:
property_recommendations.append(wall_recomender.recommendations)
# Roof recommendations
roof_recommender = RoofRecommendations(property_instance=p, materials=materials)
roof_recommender.recommend()
if roof_recommender.recommendations:
property_recommendations.append(roof_recommender.recommendations)
# Ventilation recommendations
ventilation_recomender = VentilationRecommendations(
property_instance=p,
materials=[part for part in materials if part["type"] == "mechanical_ventilation"]
)
ventilation_recomender.recommend()
if ventilation_recomender.recommendation:
property_recommendations.append(ventilation_recomender.recommendation)
# Fireplace sealing recommendations
fireplace_recommender = FireplaceRecommendations(property_instance=p)
fireplace_recommender.recommend()
if fireplace_recommender.recommendation:
property_recommendations.append(fireplace_recommender.recommendation)
# We insert temporary ids into the recommendations which is important for the optimiser later
property_recommendations = insert_temp_recommendation_id(property_recommendations)
recommender = Recommendations(property_instance=p, materials=materials, exclusions=body.exclusions)
property_recommendations, property_representative_recommendations = recommender.recommend()
if not property_recommendations:
continue
recommendations[p.id] = property_recommendations
representative_recommendations[p.id] = property_representative_recommendations
# Finally, we'll prepare data for predicting the impact on SAP
data_processor = DataProcessor(None, newdata=True)
data_processor.insert_data(pd.DataFrame([p.get_model_data()]))
# TODO: Temp
if data_processor.data["UPRN"].values[0] == "":
data_processor.data["UPRN"] = 0
p.create_base_difference_epc_record(cleaned_lookup=cleaned)
p.adjust_difference_record_with_recommendations(
property_recommendations, property_representative_recommendations
)
data_processor.pre_process()
recommendations_scoring_data.extend(p.recommendations_scoring_data)
starting_epc_data = data_processor.get_component_features(suffix="_STARTING")
ending_epc_data = data_processor.get_component_features(suffix="_ENDING")
fixed_data = data_processor.get_fixed_features()
# We update the ending record with the recommended updates and we set lodgement date to today
ending_epc_data["DAYS_TO_ENDING"] = data_processor.calculate_days_to(created_at)
for recommendations_by_type in property_recommendations:
for i, rec in enumerate(recommendations_by_type):
scoring_dict = create_recommendation_scoring_data(
property=p,
recommendation=rec,
starting_epc_data=starting_epc_data,
ending_epc_data=ending_epc_data,
fixed_data=fixed_data,
)
recommendations_scoring_data.append(scoring_dict)
# cleanup
del data_processor
# TODO: Make sure that number_habitable_rooms has been dropped
logger.info("Preparing data for scoring in sap change api")
recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
# Perform the same cleaning as in the model - first clean number of room variables though
recommendations_scoring_data = DataProcessor.apply_averages_cleaning(
data_to_clean=recommendations_scoring_data,
cleaning_data=cleaning_data,
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
recommendations_scoring_data = recommendations_scoring_data.drop(
columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
"carbon_ending"]
)
recommendations_scoring_data = DataProcessor.apply_averages_cleaning(
data_to_clean=recommendations_scoring_data,
cleaning_data=cleaning_data,
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
).drop(columns=["LOCAL_AUTHORITY"])
model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
recommendations_scoring_data = DataProcessor.clean_missings_after_description_process(
recommendations_scoring_data,
ignore_cols=[c for c in recommendations_scoring_data.columns if ("thermal_transmittance" in c) or (
"insulation_thickness" in c) or ("ENERGY_EFF" in c)]
)
recommendations_scoring_data = DataProcessor.clean_efficiency_variables(recommendations_scoring_data)
sap_change_model_api = SAPChangeModelAPI(portfolio_id=body.portfolio_id, timestamp=created_at)
file_location = sap_change_model_api.upload_scoring_data(
df=recommendations_scoring_data, bucket=get_settings().DATA_BUCKET
)
response = sap_change_model_api.predict(
file_location="s3://{DATA_BUCKET}/".format(DATA_BUCKET=get_settings().DATA_BUCKET) + file_location,
)
# Retrieve the predictions
predictions = pd.DataFrame(
read_parquet_from_s3(
bucket_name=get_settings().PREDICTIONS_BUCKET,
file_key=response["storage_filepath"].split(get_settings().PREDICTIONS_BUCKET + "/")[1]
all_predictions = {
"sap_change_predictions": pd.DataFrame(),
"heat_demand_predictions": pd.DataFrame(),
"carbon_change_predictions": pd.DataFrame()
}
to_loop_over = range(0, recommendations_scoring_data.shape[0], SCORING_BATCH_SIZE)
for chunk in tqdm(to_loop_over, total=len(to_loop_over)):
predictions_dict = model_api.predict_all(
df=recommendations_scoring_data.iloc[chunk:chunk + SCORING_BATCH_SIZE],
bucket=get_settings().DATA_BUCKET,
prediction_buckets={
"sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET,
"heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET,
"carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET
}
)
)
predictions["predictions"] = predictions["predictions"].astype(float).round(1)
predictions[['property_id', 'recommendation_id']] = predictions['id'].str.split('+', expand=True)
# Append the predictions to the predictions dictionary
for key, scored in predictions_dict.items():
all_predictions[key] = pd.concat([all_predictions[key], scored])
# Insert the predictions into the recommendations and run the optimiser
# TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a
# possibility with heating system
# TODO: After optimising, if there are any cheap, quick win measures (e.g. insulate water tank with hot water
# cylinder jacket), we should add these to the recommendations as default
logger.info("Optimising recommendations")
for property_id in recommendations.keys():
property = [p for p in input_properties if p.id == property_id][0]
property_predictions = predictions[predictions["property_id"] == str(property_id)]
property_instance = [p for p in input_properties if p.id == property_id][0]
for recommendations_by_type in recommendations[property_id]:
for rec in recommendations_by_type:
new_sap = property_predictions[property_predictions["recommendation_id"] == str(
rec["recommendation_id"]
)]["predictions"].values[0]
recommendations_with_impact, current_adjusted_energy, expected_adjusted_energy = (
Recommendations.calculate_recommendation_impact(
property_instance=property_instance,
all_predictions=all_predictions,
recommendations=recommendations
)
)
rec["sap_points"] = new_sap - float(property.data["current-energy-efficiency"])
# Store the resulting adjusted energy in the property instance
property_instance.set_adjusted_energy(
current_adjusted_energy=current_adjusted_energy,
expected_adjusted_energy=expected_adjusted_energy
)
if rec["sap_points"] is None:
raise ValueError("Sap points missing")
input_measures = prepare_input_measures(recommendations_with_impact, body.goal)
input_measures = prepare_input_measures(recommendations[property_id], body.goal)
current_sap_points = int(property_instance.data["current-energy-efficiency"])
target_sap_points = epc_to_sap_lower_bound(body.goal_value)
sap_gain = CostOptimiser.calculate_sap_gain_with_slack(target_sap_points - current_sap_points)
if body.budget:
optimiser = GainOptimiser(input_measures, max_cost=body.budget)
optimiser = GainOptimiser(
input_measures, max_cost=body.budget, max_gain=sap_gain if sap_gain > 0 else 0
)
else:
# The minimum gain is the minimum number of SAP points required to get to the target SAP band
current_sap_points = int(property.data["current-energy-efficiency"])
target_sap_points = epc_to_sap_lower_bound(body.goal_value)
# If the gain is negative, the optimiser will return an empty solution
optimiser = CostOptimiser(
input_measures, min_gain=target_sap_points - current_sap_points
input_measures,
min_gain=sap_gain
)
optimiser.setup()
@ -291,13 +281,26 @@ async def trigger_plan(body: PlanTriggerRequest):
selected_recommendations = {r["id"] for r in solution}
# If wall insulation is selected, we also include mechanical ventilation as a best practice measure
if any(x in [r["type"] for r in solution] for x in [
"internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation"
]):
ventilation_rec = next(
(r[0] for r in recommendations_with_impact if r[0]["type"] == "mechanical_ventilation"),
None
)
# If a matching recommendation was found, add its ID to the selected recommendations
if ventilation_rec:
selected_recommendations.add(ventilation_rec["recommendation_id"])
# We'll use the set of selected recommendations to filter the recommendations to upload
final_recommendations = [
[
{**rec, "default": True if rec["recommendation_id"] in selected_recommendations else False}
for rec in recommendations_by_type
]
for recommendations_by_type in recommendations[property_id]
for recommendations_by_type in recommendations_with_impact
]
# We'll also unlist the recommendations so they're a bit easier to handle from here onwards
@ -311,6 +314,7 @@ async def trigger_plan(body: PlanTriggerRequest):
# 3) the recommendations
logger.info("Uploading recommendations to the database")
property_valuation_increases = []
session.commit()
for i in range(0, len(input_properties), BATCH_SIZE):
try:
@ -318,30 +322,43 @@ async def trigger_plan(body: PlanTriggerRequest):
batch_properties = input_properties[i:i + BATCH_SIZE]
for p in batch_properties:
recommendations_to_upload = recommendations.get(p.id, [])
default_recommendations = [r for r in recommendations_to_upload if r["default"]]
total_sap_points = sum([r["sap_points"] for r in default_recommendations])
new_sap_points = float(p.data["current-energy-efficiency"]) + total_sap_points
new_epc = sap_to_epc(new_sap_points)
valuations = PropertyValuation.estimate(property_instance=p, target_epc=new_epc)
# Your existing operations
property_details_epc = p.get_property_details_epc(
portfolio_id=body.portfolio_id, rating_lookup=rating_lookup
portfolio_id=body.portfolio_id, rating_lookup=rating_lookup,
)
create_property_details_epc(session, property_details_epc)
# TODO: TEMP
if p.data["uprn"] == "":
print("Get rid of me!")
p.data["uprn"] = 0
update_or_create_property_spatial_details(session, p.uprn, p.spatial)
property_data = p.get_full_property_data()
property_data = p.get_full_property_data(current_valuation=valuations["current_value"])
update_property_data(
session, property_id=p.id, portfolio_id=body.portfolio_id, property_data=property_data
)
recommendations_to_upload = recommendations.get(p.id, [])
if not recommendations_to_upload:
continue
new_plan_id = create_plan(session, {
"portfolio_id": body.portfolio_id,
"property_id": p.id,
"is_default": True
"is_default": True,
"valuation_increase_lower_bound": (
valuations["lower_bound_increased_value"] - valuations["current_value"]
),
"valuation_increase_upper_bound": (
valuations["upper_bound_increased_value"] - valuations["current_value"]
),
"valuation_increase_average": (
valuations["average_increased_value"] - valuations["current_value"]
),
})
uploaded_recommendation_ids = upload_recommendations(session, recommendations_to_upload, p.id)
@ -350,6 +367,10 @@ async def trigger_plan(body: PlanTriggerRequest):
session, plan_id=new_plan_id, recommendation_ids=uploaded_recommendation_ids
)
property_valuation_increases.append(
valuations["average_increased_value"] - valuations["current_value"]
)
# Commit the session after each batch
session.commit()
@ -365,7 +386,18 @@ async def trigger_plan(body: PlanTriggerRequest):
# way to do this, but it's the simplest and will be a process that we can re-use since when we change a
# recommendation from being default to not default, we'll need to re-run this process to re-calculate the
# the portfolion level impact
aggregate_portfolio_recommendations(session, portfolio_id=body.portfolio_id)
total_valuation_increase = sum(property_valuation_increases)
labour_days = round(max(
[sum(r["labour_days"] for r in rec_group if r["default"]) for p_id, rec_group in recommendations.items()]
))
aggregate_portfolio_recommendations(
session,
portfolio_id=body.portfolio_id,
total_valuation_increase=total_valuation_increase,
labour_days=labour_days
)
# Commit final changes
session.commit()

View file

@ -1,10 +1,53 @@
from pydantic import BaseModel
from pydantic import BaseModel, conlist, validator
from typing import Optional
class PlanTriggerRequest(BaseModel):
budget: float | None = None
budget: Optional[float] = None
goal: str
housing_type: str
goal_value: str
portfolio_id: int
trigger_file_path: str
already_installed_file_path: Optional[str] = None
patches_file_path: Optional[str] = None
exclusions: Optional[conlist(str, min_items=1)] = None
# Pre-defined list of possibilities for exclusions
_allowed_exclusions = {
"wall_insulation",
"ventilation",
"roof_insulation",
"floor_insulation",
"windows",
"fireplace",
"heating",
"hot_water",
"lighting",
"solar_pv"
}
_allowed_goals = {"Increase EPC"}
_allowed_housing_types = {"Social", "Private"}
# Validator to ensure exclusions are within the pre-defined possibilities
@validator('exclusions', each_item=True)
def check_exclusions(cls, v):
if v not in cls._allowed_exclusions:
raise ValueError(f"{v} is not an allowed exclusion")
return v
# Validator to ensure that the goal is within the pre-defined possibilities
@validator('goal')
def check_goal(cls, v):
if v not in cls._allowed_goals:
raise ValueError(f"{v} is not a valid goal")
return v
# Validator to ensure that the housing type is within the pre-defined possibilities
@validator('housing_type')
def check_housing_type(cls, v):
if v not in cls._allowed_housing_types:
raise ValueError(f"{v} is not a valid housing type")
return v

View file

@ -8,25 +8,6 @@ from backend.app.config import get_settings
import msgpack
def insert_temp_recommendation_id(property_recommendations):
"""
Creates a temporary recommendation id which is needed for
filtering recommendations between default and no, after the optimiser has been
run
:param property_recommendations: nested list of recommendations, grouped by data_types
:return: Updated recommendations_to_upload, where where recommendation has a "recommendation_id"
integer inserted
"""
idx = 0
for recs in property_recommendations:
for rec in recs:
rec["recommendation_id"] = idx
idx += 1
return property_recommendations
def get_cleaned():
"""
This function will retrieve the cleaned dataset from s3 which has the cleaned
@ -44,145 +25,3 @@ def get_cleaned():
cleaned = msgpack.unpackb(cleaned, raw=False)
return cleaned
def create_recommendation_scoring_data(
property: Property,
recommendation: dict,
starting_epc_data: pd.DataFrame,
ending_epc_data: pd.DataFrame,
fixed_data: pd.DataFrame,
):
"""
This wrapper function prepares data to be passed to the sap model api
:return:
"""
scoring_dict = {
"UPRN": property.data["uprn"],
"id": "+".join([str(property.id), str(recommendation["recommendation_id"])]),
"LOCAL_AUTHORITY": property.data["local-authority"],
**starting_epc_data.to_dict("records")[0],
**ending_epc_data.to_dict("records")[0],
**fixed_data.to_dict("records")[0]
}
# Set staring u-values if we don't have them
if scoring_dict["walls_thermal_transmittance"] is None:
scoring_dict["walls_thermal_transmittance"] = get_wall_u_value(
clean_description=property.walls["clean_description"],
age_band=property.age_band,
is_granite_or_whinstone=property.walls["is_granite_or_whinstone"],
is_sandstone_or_limestone=property.walls["is_sandstone_or_limestone"]
)
if scoring_dict["floor_thermal_transmittance"] is None:
scoring_dict["floor_thermal_transmittance"] = get_floor_u_value(
floor_type=property.floor_type,
area=property.floor_area,
perimeter=property.perimeter,
wall_type=property.wall_type,
insulation_thickness=property.floor["insulation_thickness"],
age_band=property.age_band,
)
if scoring_dict["roof_thermal_transmittance"] is None:
scoring_dict["roof_thermal_transmittance"] = get_roof_u_value(
insulation_thickness=property.roof["insulation_thickness"],
has_dwelling_above=property.roof["has_dwelling_above"],
is_loft=property.roof["is_loft"],
is_roof_room=property.roof["is_roof_room"],
is_thatched=property.roof["is_thatched"],
age_band=property.age_band,
is_flat=property.roof["is_flat"],
is_pitched=property.roof["is_pitched"],
is_at_rafters=property.roof["is_at_rafters"],
)
for col in [
"walls_insulation_thickness", "floor_insulation_thickness", "roof_insulation_thickness"
]:
if scoring_dict[col] is None:
scoring_dict[col] = "none"
# We update the description to indicate it's insulated
if recommendation["type"] == "wall_insulation":
# The upgrade made here is to the u-value of the walls and the description of the
# insulation thickness
scoring_dict["walls_thermal_transmittance_ENDING"] = recommendation["new_u_value"]
scoring_dict["walls_insulation_thickness_ENDING"] = "above average"
scoring_dict["WALLS_ENERGY_EFF_ENDING"] = "Good"
else:
if scoring_dict["walls_thermal_transmittance_ENDING"] is None:
scoring_dict["walls_thermal_transmittance_ENDING"] = get_wall_u_value(
clean_description=property.walls["clean_description"],
age_band=property.age_band,
is_granite_or_whinstone=property.walls["is_granite_or_whinstone"],
is_sandstone_or_limestone=property.walls["is_sandstone_or_limestone"]
)
if scoring_dict["walls_insulation_thickness_ENDING"] is None:
scoring_dict["walls_insulation_thickness_ENDING"] = "none"
# Update description to indicate it's insulate
if recommendation["type"] == "floor_insulation":
if len(recommendation["parts"]) > 1:
raise NotImplementedError("Have more than 1 floor insulation part - handle this case")
scoring_dict["floor_thermal_transmittance_ENDING"] = recommendation["new_u_value"]
# We don't really see above average for this in the training data
scoring_dict["floor_insulation_thickness_ENDING"] = "average"
scoring_dict["FLOOR_ENERGY_EFF_ENDING"] = "Good"
else:
if scoring_dict["floor_thermal_transmittance_ENDING"] is None:
scoring_dict["floor_thermal_transmittance_ENDING"] = get_floor_u_value(
floor_type=property.floor_type,
area=property.floor_area,
perimeter=property.perimeter,
wall_type=property.wall_type,
insulation_thickness=property.floor["insulation_thickness"],
age_band=property.age_band,
)
if scoring_dict["floor_insulation_thickness_ENDING"] is None:
scoring_dict["floor_insulation_thickness_ENDING"] = "none"
if recommendation["type"] == "roof_insulation":
scoring_dict["roof_thermal_transmittance_ENDING"] = recommendation["new_u_value"]
parts = recommendation["parts"]
if len(parts) != 1:
raise ValueError("More than one part for roof insulation - investiage me")
scoring_dict["roof_insulation_thickness_ENDING"] = str(int(parts[0]["depth"]))
scoring_dict["ROOF_ENERGY_EFF_ENDING"] = "Very Good"
else:
# Fill missing roof u-values - this fill is not based on recommended upgrades
if scoring_dict["roof_thermal_transmittance_ENDING"] is None:
scoring_dict["roof_thermal_transmittance_ENDING"] = get_roof_u_value(
insulation_thickness=property.roof["insulation_thickness"],
has_dwelling_above=property.roof["has_dwelling_above"],
is_loft=property.roof["is_loft"],
is_roof_room=property.roof["is_roof_room"],
is_thatched=property.roof["is_thatched"],
age_band=property.age_band,
is_flat=property.roof["is_flat"],
is_pitched=property.roof["is_pitched"],
is_at_rafters=property.roof["is_at_rafters"],
)
if scoring_dict["roof_insulation_thickness_ENDING"] is None:
scoring_dict["roof_insulation_thickness_ENDING"] = "none"
if recommendation["type"] == "mechanical_ventilation":
scoring_dict["MECHANICAL_VENTILATION_ENDING"] = 'mechanical, extract only'
if recommendation["type"] == "sealing_open_fireplace":
scoring_dict["NUMBER_OPEN_FIREPLACES_ENDING"] = 0
if recommendation["type"] not in [
"wall_insulation", "floor_insulation", "roof_insulation", "mechanical_ventilation", "sealing_open_fireplace",
]:
raise NotImplementedError("Implement me")
return scoring_dict

View file

@ -1,10 +1,7 @@
import boto3
import csv
from io import StringIO
import string
import secrets
import logging
import pandas as pd
from io import BytesIO
@ -42,25 +39,6 @@ def setup_logger(log_file=None, level=logging.INFO, overwrite_handler=False):
return logger
def read_csv_from_s3(bucket_name, filepath):
s3 = boto3.client('s3')
# Get the object from s3
s3_object = s3.get_object(Bucket=bucket_name, Key=filepath)
# Read the CSV body from the s3 object
body = s3_object['Body'].read()
# Use StringIO to create a file-like object from the string
csv_data = StringIO(body.decode('utf-8'))
# Use csv library to read it into a list of dictionaries
reader = csv.DictReader(csv_data)
data = list(reader)
return data
def generate_api_key():
# Define the characters that will be used to generate the api key
characters = string.ascii_letters + string.digits
@ -69,15 +47,15 @@ def generate_api_key():
return api_key
def sap_to_epc(sap_points: int):
def sap_to_epc(sap_points: int | float):
"""
Simple utility function to convert SAP points to EPC rating.
:param sapPoints: numerical value of SAP points, typically between 0 and 100
:param sap_points: numerical value of SAP points, typically between 0 and 100
:return:
"""
if sap_points <= 0 or sap_points > 100:
raise ValueError("SAP points should be between 1 and 100.")
if sap_points <= 0:
raise ValueError("SAP points should be above 0.")
if sap_points >= 92:
return "A"
@ -121,19 +99,6 @@ def epc_to_sap_lower_bound(epc: str):
raise ValueError("EPC rating should be between A and G")
def read_parquet_from_s3(bucket_name, file_key):
client = boto3.client('s3')
# Get the object
s3_object = client.get_object(Bucket=bucket_name, Key=file_key)
# Read the CSV body into a DataFrame
csv_body = s3_object["Body"].read()
df = pd.read_parquet(BytesIO(csv_body))
return df
def save_dataframe_to_s3_parquet(df, bucket_name, file_key):
"""
Save a pandas DataFrame to S3 as a Parquet file.

View file

@ -0,0 +1,117 @@
class AnnualBillSavings:
"""
This is a simple class which will estimate the annual bill savings, based on the kwh savings.
This class uses data from Ofgem, including their price caps, to provide us with an estimate for
1KWH of energy.
"""
# These gas an electricity consumption figures are based off of figures presented by Ofgem
# https://www.ofgem.gov.uk/information-consumers/energy-advice-households/average-gas-and-electricity-use-explained
AVERAGE_ELECTRICITY_CONSUMPTION = 2700
AVERAGE_GAS_CONSUMPTION = 11500
# Latest price cap figures from Ofgem are for April 2024
# https://www.ofgem.gov.uk/publications/new-energy-price-cap-level-april-june-2024-starts-today
ELECTRICITY_PRICE_CAP = 0.245
GAS_PRICE_CAP = 0.0604
# This is a weighted mean of the price caps, using the consumption figures above as weights
PRICE_FACTOR = 0.09549999999999999
# Daily standard charge, based on average across England, Scotland and Wales, and includes VAT
DAILY_STANDARD_CHARGE_GAS = 0.3143
DAILY_STANDARD_CHARGE_ELECTRICITY = 0.601
EPC_BANDS = ["G", "F", "E", "D", "C", "B", "A"]
@classmethod
def estimate(cls, kwh: float):
"""
Estimate the annual bill savings based on the kwh savings
:param kwh: The kwh savings
:return: An estimate for annual bill savings
"""
return cls.PRICE_FACTOR * kwh
@classmethod
def estimate_electric(cls, kwh: float):
"""
Estimate the annual bill savings based on the kwh savings
:param kwh: The kwh savings
:return: An estimate for annual bill savings
"""
return cls.ELECTRICITY_PRICE_CAP * kwh
@classmethod
def calculate_annual_bill(cls, kwh):
"""
This method will estimate the total annual bill for a property
It assumed gas & electricity are used
:param kwh: The total kwh consumption
:return: An estimate for annual bill
"""
return cls.PRICE_FACTOR * kwh + (cls.DAILY_STANDARD_CHARGE_GAS + cls.DAILY_STANDARD_CHARGE_ELECTRICITY * 365)
@classmethod
def adjust_energy_to_metered(cls, epc_energy_consumption, current_epc_rating):
"""
The over-prediction of energy use by EPCs in Great Britain: A comparison
of EPC-modelled and metered primary energy use intensity
Which can be found here: https://www.sciencedirect.com/science/article/pii/S0378778823002542
We implement the results on page 10
:return:
"""
gradients = {
"A": -0.1,
"B": -0.1,
"C": -0.43,
"D": -0.52,
"E": -0.7,
"F": -0.76,
"G": -0.76
}
intercepts = {
"A": 28,
"B": 28,
"C": 97,
"D": 119,
"E": 160,
"F": 157,
"G": 157
}
gradient = gradients[current_epc_rating]
intercept = intercepts[current_epc_rating]
# This should be negative
consumption_difference = gradient * epc_energy_consumption + intercept
adjusted_consumption = (epc_energy_consumption + consumption_difference)
if adjusted_consumption < 0:
raise ValueError("consumption_difference should be negative")
return adjusted_consumption
@classmethod
def adjust_expected_band(cls, expected_epc_rating, current_epc_rating):
"""
Because of the differing intercepts and intercepts when adjusting, it's possible for
expected_adjusted_energy to be bigger than current_adjusted_energy. In this case, we'll
adjust, against at most 1 EPC band above the curent. This function performs the EPC adjustment
:param expected_epc_rating: The expected EPC rating
:param current_epc_rating: The current EPC rating
"""
# Find index of expected EPC rating
expected_index = cls.EPC_BANDS.index(expected_epc_rating)
current_index = cls.EPC_BANDS.index(current_epc_rating)
if expected_index - 1 < current_index:
return current_epc_rating
return cls.EPC_BANDS[expected_index - 1]

View file

@ -0,0 +1,173 @@
import numpy as np
class PropertyValuation:
"""
This is a placeholder class for the property valuation model
"""
UPRN_VALUE_LOOKUP = {
15038202: 202000,
37024763: 213000,
100070478545: 212000,
100070297696: 662000, # Based on Zoopla's estimation of nearby house, 8 bloomfield road
100070476394: 222000, # Based on Zoopla's estimation of next door, 20 Parkside
100071264896: 128000,
# Based on next door neighbour: https://themovemarket.com/tools/propertyprices/flat-2-queens-wood-house-219
# -brandwood-road-birmingham-b14-6pu
100070533688: 218000, # Based on Zoopla's estimation of 95 Tenby Road, which is also mid terrace
100070505235: 344000, # Based on Zoopla's estimation of 131 School road, which is also semi-detached
100070513306: 182000, # Based on Zoopla's estimation of 61 Simmons Drive
100071306896: 77000, # Based on Flat 2 of 44 Wedgewood Road on Zoopla
100021192109: 650000, # Based on Zoopla
766249482: 358000, # Based on Zoopla estimate for 19 Spring Lane, 3 bedroom semi-detached
100120703802: 277000, # Based on Zoopla
10014469685: 286000, # Based on Zoopla
10001328782: 196000, # Based on Zoopla
# Urban Splash - valuations from The Move Market
10023345430: 74_000,
10023345435: 99_000,
10023345436: 62_000,
10023345441: 62_000,
10094183503: 2_988_000,
10094183499: 123_000,
10070056824: 70_000,
110070056242: 100_000,
10070056243: 130_000,
10070056817: 130_000,
10094183501: 185_000,
10070056250: 71_000,
10094183500: 185_000,
10070056843: 67_000,
10070056844: 67_000,
10070056241: 76_000,
10070056834: 63_000,
10023345439: 62_000,
10070056815: 101_000,
10070056816: 101_000,
10094183498: 101_000,
10070056840: 673_000,
10070056848: 76_000,
10070056849: 76_000,
10070056829: 76_000,
10070056920: 76_000,
10023345463: 76_000,
# IMMO Dudley Pilot - search by going to https://www.zoopla.co.uk/property/uprn/{uprn}/
90070461: 172_000, # Based on Zoopla
90022227: 181_000, # Based on Zoopla
90106884: 180_000, # Based on Zoopla
90051858: 201_000, # Based on Zoopla
90060989: 172_000, # Based on Zoopla
90048026: 196_000, # Based on Zoopla
90077535: 192_000, # Based on Zoopla
90093693: 279_000, # Based on Zoopla
90055152: 149_000, # Based on Zoopla
90028499: 238_000, # Based on Zoopla
}
# We base our valuation uplifts on a number of sources
# https://www.moneysupermarket.com/gas-and-electricity/value-of-efficiency/
MSM_MAPPING = [
{"start": "G", "end": "F", "increase_percentage": 0.06},
{"start": "F", "end": "E", "increase_percentage": 0.01},
{"start": "E", "end": "D", "increase_percentage": 0.01},
{"start": "D", "end": "C", "increase_percentage": 0.02},
{"start": "C", "end": "B", "increase_percentage": 0.04},
{"start": "B", "end": "A", "increase_percentage": 0.0},
]
# https://www.lloydsbankinggroup.com/media/press-releases/2021/halifax/homebuyers-pay-a-green-premium-of-40000
# -for-the-most-energy-efficient-properties.html
LLOYDS_MAPPING = [
{"start": "G", "end": "F", "increase_percentage": 0.038},
{"start": "F", "end": "E", "increase_percentage": 0.029},
{"start": "E", "end": "D", "increase_percentage": 0.024},
{"start": "D", "end": "C", "increase_percentage": 0.02},
{"start": "C", "end": "B", "increase_percentage": 0.02},
{"start": "B", "end": "A", "increase_percentage": 0.018},
]
KNIGHT_FRANK_MAPPING = [
{"start": "D", "end": "C", "increase_percentage": 0.03},
{"start": "D", "end": "B", "increase_percentage": 0.088},
{"start": "D", "end": "A", "increase_percentage": 0.088},
]
NATIONWIDE_MAPPING = [
# {"start": "G", "end": "D", "increase_percentage": 0.035},
# {"start": "F", "end": "D", "increase_percentage": 0.035},
# {"start": "D", "end": "B", "increase_percentage": 0.017},
# {"start": "D", "end": "A", "increase_percentage": 0.017},
]
EPC_BANDS = ["G", "F", "E", "D", "C", "B", "A"]
@classmethod
def get_increase(cls, epc_band_range):
increases = []
for i in range(len(epc_band_range)):
if i == len(epc_band_range) - 1:
break
current = epc_band_range[i]
next = epc_band_range[i + 1]
msm_increase = [x for x in cls.MSM_MAPPING if x["start"] == current and x["end"] == next][0]
lloyds_increase = [x for x in cls.LLOYDS_MAPPING if x["start"] == current and x["end"] == next][0]
increases.append(
{
"start": current,
"end": next,
"msm_increase": msm_increase["increase_percentage"],
"lloyds_increase": lloyds_increase["increase_percentage"],
}
)
# We now aggregate the increases. The should be compound increases so we multiply them together
msm_increase = np.prod([1 + x["msm_increase"] for x in increases]) - 1
lloyds_increase = np.prod([1 + x["lloyds_increase"] for x in increases]) - 1
return msm_increase, lloyds_increase
@classmethod
def estimate(cls, property_instance, target_epc):
value = cls.UPRN_VALUE_LOOKUP.get(property_instance.uprn)
if not value:
return {
"current_value": 0,
"lower_bound_increased_value": 0,
"upper_bound_increased_value": 0,
"average_increased_value": 0,
"average_increase": 0
}
current_epc = property_instance.data["current-energy-rating"]
# We get the spectrum of ratings between the current and target EPC
epc_band_range = cls.EPC_BANDS[cls.EPC_BANDS.index(current_epc): cls.EPC_BANDS.index(target_epc) + 1]
msm_increase, lloyds_increase = cls.get_increase(epc_band_range)
# We now use the knight frank and nationwide data to get further valuation evidence, if we have it
kf_increase = [x for x in cls.KNIGHT_FRANK_MAPPING if x["start"] == current_epc and x["end"] == target_epc]
nw_increase = [x for x in cls.NATIONWIDE_MAPPING if x["start"] == current_epc and x["end"] == target_epc]
kf_increase = kf_increase[0]["increase_percentage"] if kf_increase else None
nw_increase = nw_increase[0]["increase_percentage"] if nw_increase else None
all_increases = [x for x in [msm_increase, lloyds_increase, kf_increase, nw_increase] if x is not None]
max_increase = max(all_increases)
min_increase = min(all_increases)
avg_increase = np.mean(all_increases)
return {
"current_value": value,
"lower_bound_increased_value": value * (1 + min_increase),
"upper_bound_increased_value": value * (1 + max_increase),
"average_increased_value": value * (1 + avg_increase),
"average_increase": value * (1 + avg_increase) - value
}

144
backend/ml_models/api.py Normal file
View file

@ -0,0 +1,144 @@
import pandas as pd
import requests
from requests.exceptions import RequestException
from utils.logger import setup_logger
from utils.s3 import save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
logger = setup_logger()
class ModelApi:
MODEL_PREFIXES = [
"sap_change_predictions",
"heat_demand_predictions",
"carbon_change_predictions"
]
MODEL_URLS = {
"sap_change_predictions": "sapmodel",
"heat_demand_predictions": "heatmodel",
"carbon_change_predictions": "carbonmodel"
}
def __init__(
self,
portfolio_id,
timestamp,
base_url="https://api.dev.hestia.homes",
):
"""
This class handles the communication with the Model APIs. These models include SAP change, heat demain change
and carbon change
property_id (int, optional): :
:param portfolio_id: The portfolio ID to be passed in the request payload. Defaults to 4.
:param timestamp: The creation timestamp to be passed in the request payload. Defaults to None.
:param base_url:
"""
self.base_url = base_url
self.portfolio_id = portfolio_id
self.timestamp = timestamp
def upload_scoring_data(self, df: pd.DataFrame, bucket: str, model_prefix: str) -> str:
"""
The sap model api needs a scoring data that is sitting in s3 to use as a dataset to score on
This method allows the user to upload a table as a parquet file. This method will return the file
location, which can be used as the file location in the predict() method
:param df: Pandas dataframe with scoring data to be uploaded to s3
:param bucket: Name of the bucket in s3 to upload to
:param model_prefix: The model prefix to be used in the file location
:return:
"""
if model_prefix not in self.MODEL_PREFIXES:
raise ValueError(f"Model prefix specified is not in {self.MODEL_PREFIXES}")
# Store parquet file in s3 for scoring
file_location = f"{model_prefix}/{self.portfolio_id}/{self.timestamp}.parquet"
logger.info("Storing scoring data to s3")
save_dataframe_to_s3_parquet(
df=df,
bucket_name=bucket,
file_key=file_location
)
return file_location
def predict(self, file_location, model_prefix: str):
"""Makes a POST request to the SAP Change Model API with the provided parameters.
Args:
file_location (str): The file location to be passed in the request payload.
model_prefix (str): The model prefix to be used in the request URL.
Returns:
dict: The API response as a dictionary if the request was successful, None otherwise.
"""
logger.info(f"Making request to {model_prefix} change api")
url = f"{self.base_url}/{self.MODEL_URLS[model_prefix]}/predict"
payload = {
"file_location": file_location,
"property_id": "", # This should get removed
"portfolio_id": self.portfolio_id,
"created_at": self.timestamp
}
try:
response = requests.post(url, json=payload, headers={"Content-Type": "application/json"}, timeout=120)
# Check if the response status code is 2xx (success)
response.raise_for_status()
# Return the JSON response as a Python dictionary
return response.json()
except RequestException as e:
logger.error(f"An error occurred: {e}")
# In case of an error, you might want to return None or raise the exception
# depending on how you want to handle errors in your application
return None
def predict_all(self, df, bucket, prediction_buckets) -> dict:
"""
For each model prefix, this method will upload the scoring data to s3 and then make a request to the
model api to generate predictions. The predictions will be stored in the predictions bucket.
This method will then fetch the stored predictions and format them, returning all of the predictions as
a dictionary of panaas dataframes
:param df: Pandas dataframe with scoring data to be uploaded to s3
:param bucket: Name of the bucket in s3 to upload to
:param prediction_buckets: Dictionary containing the prediction buckets for each model prefix
:return:
"""
predictions = {}
for model_prefix in self.MODEL_PREFIXES:
logger.info(f"Scoring for model prefix: {model_prefix}")
file_location = self.upload_scoring_data(df, bucket, model_prefix)
response = self.predict(
"s3://{DATA_BUCKET}/".format(DATA_BUCKET=bucket) + file_location, model_prefix
)
predictions_bucket = prediction_buckets[model_prefix]
# Retrieve the predictions
predictions_df = pd.DataFrame(
read_dataframe_from_s3_parquet(
bucket_name=predictions_bucket,
file_key=response["storage_filepath"].split(predictions_bucket + "/")[1]
)
)
predictions_df['predictions'] = predictions_df["predictions"].astype(float).round(1)
predictions_df[['property_id', 'recommendation_id']] = predictions_df['id'].str.split('+', expand=True)
# To grab the phase, we pull the integer after "phase=" in the recommendation_id. We can do this with a
# string split on phase= and then grab the second element of the resulting list. We could also use a
# regular expression to do this but we use the string split method here, for safety.
predictions_df['phase'] = predictions_df['recommendation_id'].str.split('phase=').str[1].str[0]
# Convert back to int
predictions_df['phase'] = predictions_df['phase'].astype(int)
predictions[model_prefix] = predictions_df
return predictions

View file

@ -1,83 +0,0 @@
import pandas as pd
import requests
from requests.exceptions import RequestException
from utils.logger import setup_logger
from utils.s3 import save_dataframe_to_s3_parquet
logger = setup_logger()
class SAPChangeModelAPI:
def __init__(
self,
portfolio_id,
timestamp,
base_url="https://api.dev.hestia.homes",
):
"""
property_id (int, optional): :
:param portfolio_id: The portfolio ID to be passed in the request payload. Defaults to 4.
:param timestamp: The creation timestamp to be passed in the request payload. Defaults to None.
:param base_url:
"""
self.base_url = base_url
self.portfolio_id = portfolio_id
self.timestamp = timestamp
def upload_scoring_data(self, df: pd.DataFrame, bucket: str) -> str:
"""
The sap model api needs a scoring data that is sitting in s3 to use as a dataset to score on
This method allows the user to upload a table as a parquet file. This method will return the file
location, which can be used as the file location in the predict() method
:param df: Pandas dataframe with scoring data to be uploaded to s3
:param bucket: Name of the bucket in s3 to upload to
:return:
"""
# Store parquet file in s3 for scoring
file_location = "sap_change_predictions/{portfolio_id}/{timestamp}.parquet".format(
portfolio_id=self.portfolio_id,
timestamp=self.timestamp
)
logger.info("Storing scoring data to s3")
save_dataframe_to_s3_parquet(
df=df,
bucket_name=bucket,
file_key=file_location
)
return file_location
def predict(self, file_location):
"""Makes a POST request to the SAP Change Model API with the provided parameters.
Args:
file_location (str): The file location to be passed in the request payload.
Returns:
dict: The API response as a dictionary if the request was successful, None otherwise.
"""
logger.info("Making request to sap change api")
url = f"{self.base_url}/sapmodel/predict"
payload = {
"file_location": file_location,
"property_id": "", # This should get removed
"portfolio_id": self.portfolio_id,
"created_at": self.timestamp
}
try:
response = requests.post(url, json=payload, headers={"Content-Type": "application/json"}, timeout=120)
# Check if the response status code is 2xx (success)
response.raise_for_status()
# Return the JSON response as a Python dictionary
return response.json()
except RequestException as e:
logger.error(f"An error occurred: {e}")
# In case of an error, you might want to return None or raise the exception
# depending on how you want to handle errors in your application
return None

View file

@ -35,4 +35,5 @@ mip==1.15.0
boto3==1.28.3
pandas==1.5.3
pyarrow==12.0.1
textblob
textblob
usaddress==0.5.10

View file

@ -1,21 +1,24 @@
import pandas as pd
import pytest
from unittest.mock import Mock
from epc_api.client import EpcClient
from backend.Property import Property
from etl.epc_clean.EpcClean import EpcClean
from etl.epc.Record import EPCRecord
# Define some test data
mock_epc_response = {
"rows": [
{
"tenure": "rental (social)",
"lmk-key": 1,
"uprn": 1,
"number-habitable-rooms": 5,
"property-type": "House",
"built-form": "Detached",
"inspection-date": "2023-06-01",
'lodgement-datetime': '2023-06-01 20:29:01',
"some-other-key": "some-value",
"roof-description": "Roof Description",
"roof-description": "pitched, no insulation",
"walls-description": "Walls Description",
"windows-description": "Windows Description",
"mainheat-description": "Main Heating Description",
@ -35,13 +38,15 @@ mock_epc_response = {
"floor-height": 2.5,
"total-floor-area": 100,
"construction-age-band": "England and Wales: 1967-1975",
"floor-description": "Floor Description"
"floor-description": "Floor Description",
"floor-level": "Ground"
},
{
"lmk-key": 2,
"uprn": 2,
"number-habitable-rooms": 5,
"property-type": "House",
"built-form": "Detached",
"inspection-date": "2023-05-01",
'lodgement-datetime': '2023-05-01 20:29:01',
"some-other-key": "some-other-value",
@ -65,7 +70,8 @@ mock_epc_response = {
"floor-height": 2.5,
"total-floor-area": 100,
"construction-age-band": "England and Wales: 1967-1975",
"floor-description": "Floor Description"
"floor-description": "Floor Description",
"floor-level": "Ground"
}
]
}
@ -97,7 +103,8 @@ mock_epc_response_dupe = {
"floor-height": 2.5,
"total-floor-area": 100,
"construction-age-band": "England and Wales: 1967-1975",
"floor-description": "Floor Description"
"floor-description": "Floor Description",
"floor-level": "Ground"
},
{
"lmk-key": 2,
@ -125,7 +132,8 @@ mock_epc_response_dupe = {
"floor-height": 2.5,
"total-floor-area": 100,
"construction-age-band": "England and Wales: 1967-1975",
"floor-description": "Floor Description"
"floor-description": "Floor Description",
"floor-level": "Ground"
},
{
"lmk-key": 3,
@ -153,36 +161,71 @@ mock_epc_response_dupe = {
"floor-height": 2.5,
"total-floor-area": 100,
"construction-age-band": "England and Wales: 1967-1975",
"floor-description": "Floor Description"
"floor-description": "Floor Description",
"floor-level": "Ground"
}
]
}
class TestProperty:
@pytest.fixture(autouse=True)
def property_instance(self, mock_epc_client, mock_cleaner):
property_instance = Property(1, "AB12CD", "Test Address", epc_client=mock_epc_client)
def mock_photo_supply_lookup(self):
return pd.DataFrame(
[
dict(
tenure="rental (social)",
built_form="Detached",
property_type="House",
construction_age_band="England and Wales: 1967-1975",
is_flat=False,
is_pitched=True,
is_roof_room=False,
floor_area_decile=2,
photo_supply_median=40
)
]
)
@pytest.fixture(autouse=True)
def mock_floor_area_decile_thresholds(self):
return pd.DataFrame(
{"floor_area_decile_thresholds": [0, 10, 30, 50]}
)
@pytest.fixture(autouse=True)
def property_instance(self, mock_cleaner):
epc_record = EPCRecord()
epc_record.prepared_epc = mock_epc_response["rows"][0]
property_instance = Property(id=1, postcode="AB12CD", address="Test Address", epc_record=epc_record)
property_instance.number_of_floors = 2
property_instance.number_of_rooms = 5
property_instance.floor_area = 100
property_instance.floor_height = 2.5
return property_instance
@pytest.fixture(autouse=True)
def property_instance_dupe_data(self, mock_epc_client_dupe_data):
property_instance_dupe_data = Property(2, "AB12CD", "Test Address", epc_client=mock_epc_client_dupe_data)
def property_instance_dupe_data(self):
epc_record = EPCRecord()
epc_record.prepared_epc = mock_epc_response_dupe["rows"][0]
property_instance_dupe_data = Property(id=2, postcode="AB12CD", address="Test Address", epc_record=epc_record)
return property_instance_dupe_data
@pytest.fixture
def mock_epc_client(self):
mock_epc_client = Mock(spec=EpcClient(auth_token="mocked_auth_token"))
mock_epc_client.domestic.search.return_value = mock_epc_response.copy()
mock_epc_client.auth_token = "mocked_auth_token"
return mock_epc_client
@pytest.fixture
def mock_epc_client_dupe_data(self):
mock_epc_client_dupe_data = Mock(spec=EpcClient(auth_token="mocked_auth_token"))
mock_epc_client_dupe_data.domestic.search.return_value = mock_epc_response_dupe.copy()
mock_epc_client_dupe_data.auth_token = "mocked_auth_token"
return mock_epc_client_dupe_data
# @pytest.fixture
# def mock_epc_client(self):
# mock_epc_client = Mock(spec=EpcClient(auth_token="mocked_auth_token"))
# mock_epc_client.domestic.search.return_value = mock_epc_response.copy()
# mock_epc_client.auth_token = "mocked_auth_token"
# return mock_epc_client
#
# @pytest.fixture
# def mock_epc_client_dupe_data(self):
# mock_epc_client_dupe_data = Mock(spec=EpcClient(auth_token="mocked_auth_token"))
# mock_epc_client_dupe_data.domestic.search.return_value = mock_epc_response_dupe.copy()
# mock_epc_client_dupe_data.auth_token = "mocked_auth_token"
# return mock_epc_client_dupe_data
@pytest.fixture
def mock_cleaner(self):
@ -221,7 +264,11 @@ class TestProperty:
}
mock_cleaner.cleaned = {
"roof-description": [{"original_description": "Roof Description"}],
"roof-description": [
{"original_description": "Roof Description"},
{"original_description": "pitched, no insulation", "is_pitched": True, "is_flat": False,
"is_roof_room": False}
],
"walls-description": [walls_data],
"windows-description": [{"original_description": "Windows Description"}],
"mainheat-description": [{"original_description": "Main Heating Description"}],
@ -232,37 +279,34 @@ class TestProperty:
}
return mock_cleaner
def test_init(self, mock_epc_client):
inst1 = Property(0, "AB12CD", "Test Address", epc_client=mock_epc_client)
# Should be mocked auth token
assert inst1.epc_client.auth_token == "mocked_auth_token"
def test_init(self):
epc_record = EPCRecord()
epc_record.prepared_epc = {"uprn": 1}
inst1 = Property(0, postcode="AB12CD", address="Test Address", epc_record=epc_record)
inst2 = Property(3, "AB12CD", "Test Address", epc_client=mock_epc_client)
assert inst2.epc_client.auth_token
assert inst1.data is not None
inst3 = Property(4, "AB12CD", "Test Address", data={"some": "data"}, epc_client=mock_epc_client)
assert inst3.data == {"some": "data"}
inst2 = Property(3, "AB12CD", "Test Address", epc_record=epc_record)
assert inst2.id == 3
data = inst3.search_address_epc()
assert data is None
inst3 = Property(4, "AB12CD", "Test Address", epc_record=epc_record)
assert inst3.data == {"uprn": 1}
def test_search_address_epc(self, property_instance):
# Call the method to test
property_instance.search_address_epc()
# Verify that the correct data is being returned
assert property_instance.data == mock_epc_response["rows"][0]
def test_search_address_epc_multiple_results(self, property_instance_dupe_data, mock_epc_client_dupe_data):
with pytest.raises(Exception, match="More than one result found for this address - investigate me"):
property_instance_dupe_data.search_address_epc()
def test_get_components(self, property_instance, mock_cleaner, mock_epc_client):
property_instance.search_address_epc()
property_instance.get_components(mock_cleaner.cleaned)
def test_get_components(
self, property_instance, mock_cleaner, mock_photo_supply_lookup, mock_floor_area_decile_thresholds
):
property_instance.get_components(
mock_cleaner.cleaned,
photo_supply_lookup=mock_photo_supply_lookup,
floor_area_decile_thresholds=mock_floor_area_decile_thresholds
)
# Verify that the components are set correctly
assert property_instance.roof == {"original_description": "Roof Description"}
assert property_instance.roof == {
'original_description': 'pitched, no insulation', 'is_pitched': True,
'is_flat': False, 'is_roof_room': False
}
assert property_instance.walls == {
"original_description": "Walls Description",
"is_cavity_wall": True,
@ -286,24 +330,15 @@ class TestProperty:
# Verify that ValueError is raised when EpcClean doesn't contain cleaned data
with pytest.raises(ValueError, match="Cleaner does not contain cleaned data"):
property_instance.get_components(mock_cleaner.cleaned)
property_instance.get_components(mock_cleaner.cleaned, pd.DataFrame(), pd.DataFrame())
def test_get_components_no_data(self, property_instance, mock_cleaner):
def test_get_components_no_attributes(
self, property_instance, mock_cleaner, mock_photo_supply_lookup, mock_floor_area_decile_thresholds
):
# Modify the mock cleaner to have no attributes for a specific description
mock_cleaner.cleaned = {
"roof-description": []
}
# Verify that ValueError is raised when no attributes are found
with pytest.raises(ValueError, match="Property does not contain data"):
property_instance.get_components(mock_cleaner.cleaned)
def test_get_components_no_attributes(self, property_instance, mock_cleaner):
# Modify the mock cleaner to have no attributes for a specific description
mock_cleaner.cleaned = {
"roof-description": []
}
property_instance.search_address_epc()
property_instance.data["roof-description"] = "Pitched, no insulation"
property_instance.walls = {
"original_description": "Walls Description",
@ -324,14 +359,17 @@ class TestProperty:
}
# Assert backup cleaning has been applied
property_instance.get_components(mock_cleaner.cleaned)
property_instance.get_components(
mock_cleaner.cleaned, mock_photo_supply_lookup, mock_floor_area_decile_thresholds
)
assert property_instance.roof["clean_description"] == "Pitched, no insulation"
assert property_instance.roof["is_pitched"]
def test_get_components_multiple_attributes(self, property_instance, mock_cleaner):
def test_get_components_multiple_attributes(
self, property_instance, mock_cleaner, mock_photo_supply_lookup, mock_floor_area_decile_thresholds
):
# This shouldn't happen - it would mean a cleaning error
property_instance.search_address_epc()
property_instance.data["roof-description"] = "Roof Description"
cleaned = {
"roof-description": [
@ -342,4 +380,102 @@ class TestProperty:
# Verify that ValueError is raised when multiple attributes are found
with pytest.raises(ValueError, match="Either No attributes or multiple found for roof-description"):
property_instance.get_components(cleaned)
property_instance.get_components(cleaned, mock_photo_supply_lookup, mock_floor_area_decile_thresholds)
def test_set_spatial(self):
epc_record = EPCRecord()
epc_record.prepared_epc = mock_epc_response["rows"][0]
prop = Property(1, postcode="AB12CD", address="Test Address", epc_record=epc_record)
spatial1 = pd.DataFrame([{
'X_COORDINATE': 411143.0, 'Y_COORDINATE': 281701.0, 'LATITUDE': 52.4331896, 'LONGITUDE': -1.8375238,
'conservation_status': True, 'is_listed_building': False, 'is_heritage_building': True
}])
prop.set_spatial(spatial1)
assert prop.in_conservation_area
assert not prop.is_listed
assert prop.is_heritage
assert prop.restricted_measures
prop2 = Property(1, "AB12CD", "Test Address", epc_record=epc_record)
spatial2 = pd.DataFrame([{
'X_COORDINATE': 411143.0, 'Y_COORDINATE': 281701.0, 'LATITUDE': 52.4331896, 'LONGITUDE': -1.8375238,
'conservation_status': None, 'is_listed_building': False, 'is_heritage_building': False
}])
prop2.set_spatial(spatial2)
assert prop2.in_conservation_area is None
assert not prop2.is_listed
assert not prop2.is_heritage
assert not prop2.restricted_measures
def test_set_floor_level(self):
# In this case, we have a flat which looks looks it's on the first floor, but it's actually on the ground
# floor, so we should set floor_level to 0
epc_record = EPCRecord()
epc_record.prepared_epc = {'floor-level': '01', 'property-type': 'Flat'}
prop = Property(1, postcode="AB12CD", address="Test Address", epc_record=epc_record)
prop.floor = {
'original_description': 'Solid, no insulation (assumed)', 'clean_description': 'Solid, no insulation',
'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_assumed': True,
'is_to_unheated_space': False, 'is_to_external_air': False, 'is_suspended': False, 'is_solid': True,
'another_property_below': False, 'insulation_thickness': 'none', 'floor_thermal_transmittance': None,
'floor_insulation_thickness': 'none'
}
prop.set_floor_level()
assert prop.floor_level == 0
# This property is labelled as being on the ground floor but actually has another property below
# so we set floor level to 1
epc_record = EPCRecord()
epc_record.prepared_epc = {'floor-level': 'Ground', 'property-type': 'Flat'}
prop2 = Property(1, postcode="AB12CD", address="Test Address", epc_record=epc_record)
prop2.floor = {
'original_description': '(Another dwelling below)', 'clean_description': 'Solid, no insulation',
'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_assumed': False,
'is_to_unheated_space': False, 'is_to_external_air': False, 'is_suspended': False, 'is_solid': False,
'another_property_below': True, 'insulation_thickness': 'none', 'floor_thermal_transmittance': None,
'floor_insulation_thickness': 'none'
}
prop2.set_floor_level()
assert prop2.floor_level == 1
# this property is correctly labelled as being on the 2nd floor
epc_record = EPCRecord()
epc_record.prepared_epc = {'floor-level': '02', 'property-type': 'Flat'}
prop3 = Property(1, postcode="AB12CD", address="Test Address", epc_record=epc_record)
prop3.floor = {
'original_description': '(Another dwelling below)', 'clean_description': 'Solid, no insulation',
'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_assumed': False,
'is_to_unheated_space': False, 'is_to_external_air': False, 'is_suspended': False, 'is_solid': False,
'another_property_below': True, 'insulation_thickness': 'none', 'floor_thermal_transmittance': None,
'floor_insulation_thickness': 'none'
}
prop3.set_floor_level()
assert prop3.floor_level == 2
# Example of a house
epc_record = EPCRecord()
epc_record.prepared_epc = {'floor-level': '', 'property-type': 'House'}
prop4 = Property(1, postcode="AB12CD", address="Test Address", epc_record=epc_record)
prop4.floor = {
'original_description': '(Another dwelling below)', 'clean_description': 'Solid, no insulation',
'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_assumed': False,
'is_to_unheated_space': False, 'is_to_external_air': False, 'is_suspended': False, 'is_solid': False,
'another_property_below': False, 'insulation_thickness': 'none', 'floor_thermal_transmittance': None,
'floor_insulation_thickness': 'none'
}
prop4.set_floor_level()
assert prop4.floor_level is None

View file

@ -1,989 +0,0 @@
from backend.Property import Property
from etl.epc.DataProcessor import DataProcessor
from backend.app.plan.utils import create_recommendation_scoring_data, get_cleaned
from etl.epc.settings import COLUMNS_TO_MERGE_ON
from epc_api.client import EpcClient
import pandas as pd
import pytest
import msgpack
from utils.s3 import read_dataframe_from_s3_parquet, read_from_s3
from tqdm import tqdm
# Handy code for selecting testing data
# import pickle
#
# with open("sap_dataset.pickle", "rb") as f:
# sap_change_dataset = pickle.load(f)
#
# search_from = sap_change_dataset[
# (sap_change_dataset["walls_thermal_transmittance_ENDING"] == sap_change_dataset["walls_thermal_transmittance"]) &
# sap_change_dataset["is_to_unheated_space"]
# ]
# search_from = search_from[
# (search_from["roof_thermal_transmittance_ENDING"] == search_from["roof_thermal_transmittance"]) &
# (search_from["floor_thermal_transmittance_ENDING"] != search_from["floor_thermal_transmittance"]) &
# (search_from["MECHANICAL_VENTILATION_ENDING"] == search_from["MECHANICAL_VENTILATION_STARTING"]) &
# (search_from["SECONDHEAT_DESCRIPTION_ENDING"] == search_from["SECONDHEAT_DESCRIPTION_STARTING"]) &
# (search_from["GLAZED_TYPE_ENDING"] == search_from["GLAZED_TYPE_STARTING"])
# ]
#
# # Find a record where the only difference is cavity wall getting filled
# ending_cols = [c for c in search_from.columns if "_ENDING" in c]
#
# ignore = [
# "SAP_ENDING", "HEAT_DEMAND_ENDING", "CARBON_ENDING", "TRANSACTION_TYPE_ENDING", "FLOOR_HEIGHT_ENDING",
# "DAYS_TO_ENDING", "TOTAL_FLOOR_AREA_ENDING"
# ]
#
# ending_cols = [c for c in ending_cols if c not in ignore]
#
# for _, row in tqdm(search_from.iterrows(), total=search_from.shape[0]):
#
# same = True
# starting_cols = []
# for c in ending_cols:
#
# starting_col = c.replace("_ENDING", "")
# if starting_col not in search_from.columns:
# starting_col = c.replace("_ENDING", "_STARTING")
# if starting_col not in search_from.columns:
# raise Exception("something went wrong")
#
# starting_cols.append(starting_col)
#
# # We want them to be different
# if c == "floor_thermal_transmittance_ENDING":
# if (row[c] == row[starting_col]) | (row[starting_col] != "natural"):
# same = False
# break
# else:
# continue
#
# # We now check if the starting and ending values are the same
# if row[c] != row[starting_col]:
# same = False
# break
#
# if same:
# raise Exception("We found one!")
#
# fixed_cols = [c for c in search_from.columns if c not in starting_cols + ending_cols]
#
# import pandas as pd
#
# start = row[["SAP_STARTING"] + starting_cols]
# start.index = [c.replace("_STARTING", "") for c in start.index]
# end = row[["SAP_ENDING"] + ending_cols]
# end.index = [c.replace("_ENDING", "") for c in end.index]
# start["type"] = "starting"
# end["type"] = "ending"
#
# compare = pd.concat([start, end], axis=1)
#
# ending_lmk = "1252008839062019090910572351658131"
# starting_lmk = "1252008819542014122308482236142128"
#
# client = EpcClient(auth_token=EPC_AUTH_TOKEN)
# result = client.domestic.search(params={"address": "Flat 14 Charles House, Freemens Way", "postcode": "CT14 9DL"})
# starting_epc = [x for x in result["rows"] if x["lmk-key"] == starting_lmk][0]
# ending_epc = [x for x in result["rows"] if x["lmk-key"] == ending_lmk][0]
# with open(
# os.path.abspath(os.path.dirname(__file__)) + "/backend/tests/test_data/cleaned.pickle", "rb"
# ) as f:
# cleaned = pickle.load(f)
# with open(
# os.path.abspath(os.path.dirname(__file__)) + "/backend/tests/test_data/cleaning_data.pickle", "rb"
# ) as f:
# cleaning_data = pickle.load(f)
# TODO: Need to do floors, suspended and solid and to unheated space
class TestSapModelPrep:
@pytest.fixture
def cleaning_data(self):
return read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
)
@pytest.fixture
def cleaned(self):
cleaned = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-dev"
)
cleaned = msgpack.unpackb(cleaned, raw=False)
return cleaned
def test_fill_cavity_wall(self, cleaned, cleaning_data):
"""
We ensure that the process that prepares the data in the engine code results in the same data as
the model is trained on
"""
# This is an actual starting EPC
starting_epc = {
'low-energy-fixed-light-count': '', 'address': '26, Vicarage Lane, Eaton',
'uprn-source': 'Address Matched', 'floor-height': '2.39', 'heating-cost-potential': '942',
'unheated-corridor-length': '', 'hot-water-cost-potential': '97',
'construction-age-band': 'England and Wales: 1967-1975', 'potential-energy-rating': 'D',
'mainheat-energy-eff': 'Average', 'windows-env-eff': 'Good', 'lighting-energy-eff': 'Average',
'environment-impact-potential': '53',
'glazed-type': 'double glazing installed during or after 2002', 'heating-cost-current': '1475',
'address3': '', 'mainheatcont-description': 'Programmer, room thermostat and TRVs',
'sheating-energy-eff': 'N/A', 'property-type': 'House', 'local-authority-label': 'Melton',
'fixed-lighting-outlets-count': '', 'energy-tariff': 'Single',
'mechanical-ventilation': 'natural', 'hot-water-cost-current': '96', 'county': 'Leicestershire',
'postcode': 'NG32 1SP', 'solar-water-heating-flag': 'Y', 'constituency': 'E14000909',
'co2-emissions-potential': '5.7', 'number-heated-rooms': '7',
'floor-description': 'Suspended, no insulation (assumed)',
'energy-consumption-potential': '177', 'local-authority': 'E07000133', 'built-form': 'Detached',
'number-open-fireplaces': '1', 'windows-description': 'Fully double glazed',
'glazed-area': 'Normal', 'inspection-date': '2016-09-22', 'mains-gas-flag': 'N',
'co2-emiss-curr-per-floor-area': '87', 'address1': '26, Vicarage Lane',
'heat-loss-corridor': 'NO DATA!', 'flat-storey-count': '',
'constituency-label': 'Rutland and Melton', 'roof-energy-eff': 'Very Poor',
'total-floor-area': '116.0', 'building-reference-number': '4940047478',
'environment-impact-current': '29', 'co2-emissions-current': '10.0',
'roof-description': 'Pitched, limited insulation (assumed)', 'floor-energy-eff': 'NO DATA!',
'number-habitable-rooms': '7', 'address2': 'Eaton', 'hot-water-env-eff': 'Good',
'posttown': 'GRANTHAM', 'mainheatc-energy-eff': 'Good', 'main-fuel': 'oil (not community)',
'lighting-env-eff': 'Average', 'windows-energy-eff': 'Good', 'floor-env-eff': 'N/A',
'sheating-env-eff': 'N/A',
'lighting-description': 'Low energy lighting in 31% of fixed outlets',
'roof-env-eff': 'Very Poor', 'walls-energy-eff': 'Poor', 'photo-supply': '',
'lighting-cost-potential': '69', 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100',
'main-heating-controls': '2106', 'lodgement-datetime': '2016-09-23 20:29:01',
'flat-top-storey': '', 'current-energy-rating': 'F',
'secondheat-description': 'Room heaters, dual fuel (mineral and wood)', 'walls-env-eff': 'Poor',
'transaction-type': 'marketed sale', 'uprn': '100030534042', 'current-energy-efficiency': '34',
'energy-consumption-current': '343', 'mainheat-description': 'Boiler and radiators, oil',
'lighting-cost-current': '117', 'lodgement-date': '2016-09-23', 'extension-count': '2',
'mainheatc-env-eff': 'Good', 'lmk-key': '1481856849902016092320290148762028',
'wind-turbine-count': '0', 'tenure': 'owner-occupied', 'floor-level': 'NODATA!',
'potential-energy-efficiency': '64', 'hot-water-energy-eff': 'Good',
'low-energy-lighting': '31',
'walls-description': 'Cavity wall, as built, no insulation (assumed)',
'hotwater-description': 'From main system, plus solar'
}
# This is the training data as we prepare it in the engine
# This is an actual record from the training data
row = {
'UPRN': '100030534042', 'RDSAP_CHANGE': 12, 'HEAT_DEMAND_CHANGE': -72,
'CARBON_CHANGE': -2.0999999999999996, 'SAP_STARTING': 34, 'SAP_ENDING': 46, 'HEAT_DEMAND_STARTING': 343,
'HEAT_DEMAND_ENDING': 271, 'CARBON_STARTING': 10.0, 'CARBON_ENDING': 7.9, 'PROPERTY_TYPE': 'House',
'BUILT_FORM': 'Detached', 'CONSTITUENCY': 'E14000909', 'NUMBER_HABITABLE_ROOMS': 7.0,
'NUMBER_HEATED_ROOMS': 7.0, 'FIXED_LIGHTING_OUTLETS_COUNT': 21.0,
'CONSTRUCTION_AGE_BAND': 'England and Wales: 1967-1975', 'TRANSACTION_TYPE_STARTING': 'marketed sale',
'MECHANICAL_VENTILATION_STARTING': 'natural',
'SECONDHEAT_DESCRIPTION_STARTING': 'Room heaters, dual fuel (mineral and wood)',
'ENERGY_TARIFF_STARTING': 'Single', 'SOLAR_WATER_HEATING_FLAG_STARTING': 'Y',
'PHOTO_SUPPLY_STARTING': 0.0, 'GLAZED_TYPE_STARTING': 'double glazing installed during or after 2002',
'MULTI_GLAZE_PROPORTION_STARTING': 100.0, 'LOW_ENERGY_LIGHTING_STARTING': 31.0,
'NUMBER_OPEN_FIREPLACES_STARTING': 1.0, 'EXTENSION_COUNT_STARTING': 2.0,
'TOTAL_FLOOR_AREA_STARTING': 116.0, 'FLOOR_HEIGHT_STARTING': 2.39,
'TRANSACTION_TYPE_ENDING': 'marketed sale', 'MECHANICAL_VENTILATION_ENDING': 'natural',
'SECONDHEAT_DESCRIPTION_ENDING': 'Room heaters, dual fuel (mineral and wood)',
'ENERGY_TARIFF_ENDING': 'Single', 'SOLAR_WATER_HEATING_FLAG_ENDING': 'Y', 'PHOTO_SUPPLY_ENDING': 0.0,
'GLAZED_TYPE_ENDING': 'double glazing installed during or after 2002',
'MULTI_GLAZE_PROPORTION_ENDING': 100.0, 'LOW_ENERGY_LIGHTING_ENDING': 31.0,
'NUMBER_OPEN_FIREPLACES_ENDING': 1.0, 'EXTENSION_COUNT_ENDING': 2.0, 'TOTAL_FLOOR_AREA_ENDING': 116.0,
'FLOOR_HEIGHT_ENDING': 2.41, 'DAYS_TO_STARTING': 784, 'DAYS_TO_ENDING': 867,
'walls_thermal_transmittance': 1.5, 'is_cavity_wall': True, 'is_filled_cavity': False,
'is_solid_brick': False, 'is_system_built': False, 'is_timber_frame': False,
'is_granite_or_whinstone': False, 'is_as_built': True, 'is_cob': False,
'is_sandstone_or_limestone': False, 'is_park_home': False, 'walls_insulation_thickness': 'none',
'external_insulation': False, 'internal_insulation': False, 'walls_thermal_transmittance_ENDING': 0.7,
'is_park_home_ENDING': False, 'walls_insulation_thickness_ENDING': 'average',
'external_insulation_ENDING': False, 'internal_insulation_ENDING': False,
'floor_thermal_transmittance': 0.64, 'is_to_unheated_space': False, 'is_to_external_air': False,
'is_suspended': True, 'is_solid': False, 'another_property_below': False,
'floor_insulation_thickness': 'none', 'floor_thermal_transmittance_ENDING': 0.64,
'floor_insulation_thickness_ENDING': 'none', 'roof_thermal_transmittance': 1.5, 'is_pitched': True,
'is_roof_room': False, 'is_loft': False, 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False,
'has_dwelling_above': False, 'roof_insulation_thickness': 'below average',
'roof_thermal_transmittance_ENDING': 1.5, 'roof_insulation_thickness_ENDING': 'below average',
'heater_type': 'Unknown', 'system_type': 'from main system', 'thermostat_characteristics': 'Unknown',
'heating_scope': 'Unknown', 'energy_recovery': 'Unknown', 'hotwater_tariff_type': 'Unknown',
'extra_features': 'plus solar', 'chp_systems': 'Unknown', 'distribution_system': 'Unknown',
'no_system_present': 'Unknown', 'appliance': 'Unknown', 'heater_type_ENDING': 'Unknown',
'system_type_ENDING': 'from main system', 'thermostat_characteristics_ENDING': 'Unknown',
'heating_scope_ENDING': 'Unknown', 'energy_recovery_ENDING': 'Unknown',
'hotwater_tariff_type_ENDING': 'Unknown', 'extra_features_ENDING': 'plus solar',
'chp_systems_ENDING': 'Unknown', 'distribution_system_ENDING': 'Unknown',
'no_system_present_ENDING': 'Unknown', 'appliance_ENDING': 'Unknown', 'has_radiators': True,
'has_fan_coil_units': False, 'has_pipes_in_screed_above_insulation': False,
'has_pipes_in_insulated_timber_floor': False, 'has_pipes_in_concrete_slab': False, 'has_boiler': True,
'has_air_source_heat_pump': False, 'has_room_heaters': False, 'has_electric_storage_heaters': False,
'has_warm_air': False, 'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False,
'has_community_scheme': False, 'has_ground_source_heat_pump': False, 'has_no_system_present': False,
'has_portable_electric_heaters': False, 'has_water_source_heat_pump': False,
'has_electric_heat_pump': False, 'has_micro-cogeneration': False, 'has_solar_assisted_heat_pump': False,
'has_exhaust_source_heat_pump': False, 'has_community_heat_pump': False, 'has_electric': False,
'has_mains_gas': False, 'has_wood_logs': False, 'has_coal': False, 'has_oil': True,
'has_wood_pellets': False, 'has_anthracite': False, 'has_dual_fuel_mineral_and_wood': False,
'has_smokeless_fuel': False, 'has_lpg': False, 'has_b30k': False, 'has_electricaire': False,
'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False, 'has_radiators_ENDING': True,
'has_fan_coil_units_ENDING': False, 'has_pipes_in_screed_above_insulation_ENDING': False,
'has_pipes_in_insulated_timber_floor_ENDING': False, 'has_pipes_in_concrete_slab_ENDING': False,
'has_boiler_ENDING': True, 'has_air_source_heat_pump_ENDING': False, 'has_room_heaters_ENDING': False,
'has_electric_storage_heaters_ENDING': False, 'has_warm_air_ENDING': False,
'has_electric_underfloor_heating_ENDING': False, 'has_electric_ceiling_heating_ENDING': False,
'has_community_scheme_ENDING': False, 'has_ground_source_heat_pump_ENDING': False,
'has_no_system_present_ENDING': False, 'has_portable_electric_heaters_ENDING': False,
'has_water_source_heat_pump_ENDING': False, 'has_electric_heat_pump_ENDING': False,
'has_micro-cogeneration_ENDING': False, 'has_solar_assisted_heat_pump_ENDING': False,
'has_exhaust_source_heat_pump_ENDING': False, 'has_community_heat_pump_ENDING': False,
'has_electric_ENDING': False, 'has_mains_gas_ENDING': False, 'has_wood_logs_ENDING': False,
'has_coal_ENDING': False, 'has_oil_ENDING': True, 'has_wood_pellets_ENDING': False,
'has_anthracite_ENDING': False, 'has_dual_fuel_mineral_and_wood_ENDING': False,
'has_smokeless_fuel_ENDING': False, 'has_lpg_ENDING': False, 'has_b30k_ENDING': False,
'has_electricaire_ENDING': False, 'has_assumed_for_most_rooms_ENDING': False,
'has_underfloor_heating_ENDING': False, 'thermostatic_control': 'room thermostat',
'charging_system': 'Unknown', 'switch_system': 'programmer', 'no_control': 'Unknown',
'dhw_control': 'Unknown', 'community_heating': 'Unknown', 'multiple_room_thermostats': False,
'auxiliary_systems': 'Unknown', 'trvs': 'trvs', 'rate_control': 'Unknown',
'thermostatic_control_ENDING': 'room thermostat', 'charging_system_ENDING': 'Unknown',
'switch_system_ENDING': 'programmer', 'no_control_ENDING': 'Unknown', 'dhw_control_ENDING': 'Unknown',
'community_heating_ENDING': 'Unknown', 'multiple_room_thermostats_ENDING': False,
'auxiliary_systems_ENDING': 'Unknown', 'trvs_ENDING': 'trvs', 'rate_control_ENDING': 'Unknown',
'glazing_type': 'double', 'glazing_type_ENDING': 'double', 'fuel_type': 'oil',
'main-fuel_tariff_type': 'Unknown', 'is_community': False,
'no_individual_heating_or_community_network': False, 'complex_fuel_type': 'Unknown',
'fuel_type_ENDING': 'oil', 'main-fuel_tariff_type_ENDING': 'Unknown', 'is_community_ENDING': False,
'no_individual_heating_or_community_network_ENDING': False, 'complex_fuel_type_ENDING': 'Unknown',
'estimated_perimeter_STARTING': 44.77882152472145, 'estimated_perimeter_ENDING': 44.77882152472145,
'HOT_WATER_ENERGY_EFF_STARTING': "Good",
"FLOOR_ENERGY_EFF_STARTING": "Unknown",
"WINDOWS_ENERGY_EFF_STARTING": "Good",
"WALLS_ENERGY_EFF_STARTING": "Poor",
"SHEATING_ENERGY_EFF_STARTING": "Unknown",
"ROOF_ENERGY_EFF_STARTING": "Very Poor",
"MAINHEAT_ENERGY_EFF_STARTING": "Average",
"MAINHEATC_ENERGY_EFF_STARTING": "Good",
"LIGHTING_ENERGY_EFF_STARTING": "Average",
"POTENTIAL_ENERGY_EFFICIENCY": 64,
"ENVIRONMENT_IMPACT_POTENTIAL": 53,
"ENERGY_CONSUMPTION_POTENTIAL": 177.0,
"CO2_EMISSIONS_POTENTIAL": 5.7,
"HOT_WATER_ENERGY_EFF_ENDING": "Good",
"FLOOR_ENERGY_EFF_ENDING": "Unknown",
"WINDOWS_ENERGY_EFF_ENDING": "Good",
"WALLS_ENERGY_EFF_ENDING": "Good",
"SHEATING_ENERGY_EFF_ENDING": "Unknown",
"ROOF_ENERGY_EFF_ENDING": "Very Poor",
"MAINHEAT_ENERGY_EFF_ENDING": "Average",
"MAINHEATC_ENERGY_EFF_ENDING": "Good",
"LIGHTING_ENERGY_EFF_ENDING": "Average",
}
home = Property(
id=0,
postcode=starting_epc["postcode"],
address1=starting_epc["address1"],
epc_client=EpcClient(auth_token="notoken"),
data=starting_epc
)
home.get_components(cleaned)
data_processor = DataProcessor(None, newdata=True)
data_processor.insert_data(pd.DataFrame([home.get_model_data()]))
data_processor.pre_process()
starting_epc_data = data_processor.get_component_features(suffix="_STARTING")
ending_epc_data = data_processor.get_component_features(suffix="_ENDING")
fixed_data = data_processor.get_fixed_features()
ending_lodgement_date = '2016-12-15'
ending_epc_data["DAYS_TO_ENDING"] = data_processor.calculate_days_to(ending_lodgement_date)
recommendation = {
"recommendation_id": 0,
"new_u_value": 0.7,
"type": "wall_insulation"
}
test_record = create_recommendation_scoring_data(
property=home,
recommendation=recommendation,
starting_epc_data=starting_epc_data,
ending_epc_data=ending_epc_data,
fixed_data=fixed_data,
)
test_record = pd.DataFrame([test_record])
# Test the final cleaning:
test_record = DataProcessor.apply_averages_cleaning(
data_to_clean=test_record,
cleaning_data=cleaning_data,
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"]
).drop(columns=["LOCAL_AUTHORITY"])
test_record = DataProcessor.clean_missings_after_description_process(
test_record, [
c for c in test_record.columns if
("thermal_transmittance" in c) or ("insulation_thickness" in c)
]
)
# Test that the data has been set up correctly
# Things to fix:
# [] Filled cavity should have an average insulation thickness in the cleaned data
for c in test_record.columns:
if c in ["id", "SAP_ENDING", "HEAT_DEMAND_ENDING", "CARBON_ENDING"]:
continue
if c == "FLOOR_HEIGHT_ENDING":
assert (row[c] - test_record[c].values[0]) <= 0.020001
continue
if c == "walls_insulation_thickness_ENDING":
assert row[c] == "average"
assert test_record[c].values[0] == "above average"
continue
assert test_record[c].values[0] == row[c]
def test_solid_wall_insulation(self, cleaned, cleaning_data):
starting_epc2 = {
'low-energy-fixed-light-count': '2', 'address': 'FLAT 12, WAREHOUSE W, 3 WESTERN GATEWAY',
'uprn-source': 'Energy Assessor', 'floor-height': '3.64', 'heating-cost-potential': '465',
'unheated-corridor-length': '', 'hot-water-cost-potential': '185',
'construction-age-band': 'England and Wales: 1900-1929', 'potential-energy-rating': 'C',
'mainheat-energy-eff': 'Very Poor', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Poor',
'environment-impact-potential': '51', 'glazed-type': 'double glazing installed during or after 2002',
'heating-cost-current': '1223', 'address3': '3 WESTERN GATEWAY',
'mainheatcont-description': 'Programmer and appliance thermostats', 'sheating-energy-eff': 'N/A',
'property-type': 'Flat', 'local-authority-label': 'Newham', 'fixed-lighting-outlets-count': '12',
'energy-tariff': 'off-peak 7 hour', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '342',
'county': '', 'postcode': 'E16 1BD', 'solar-water-heating-flag': 'N', 'constituency': 'E14001032',
'co2-emissions-potential': '3.6', 'number-heated-rooms': '2', 'floor-description': '(other premises below)',
'energy-consumption-potential': '307', 'local-authority': 'E09000025', 'built-form': 'Mid-Terrace',
'number-open-fireplaces': '0', 'windows-description': 'Partial double glazing', 'glazed-area': 'Normal',
'inspection-date': '2020-10-14', 'mains-gas-flag': 'N', 'co2-emiss-curr-per-floor-area': '66',
'address1': 'FLAT 12', 'heat-loss-corridor': 'heated corridor', 'flat-storey-count': '',
'constituency-label': 'West Ham', 'roof-energy-eff': 'N/A', 'total-floor-area': '70.0',
'building-reference-number': '10000539740', 'environment-impact-current': '42',
'co2-emissions-current': '4.6', 'roof-description': '(another dwelling above)', 'floor-energy-eff': 'N/A',
'number-habitable-rooms': '2', 'address2': 'WAREHOUSE W', 'hot-water-env-eff': 'Poor', 'posttown': 'LONDON',
'mainheatc-energy-eff': 'Good', 'main-fuel': 'electricity (not community)', 'lighting-env-eff': 'Poor',
'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
'lighting-description': 'Low energy lighting in 17% of fixed outlets', 'roof-env-eff': 'N/A',
'walls-energy-eff': 'Very Poor', 'photo-supply': '0.0', 'lighting-cost-potential': '67',
'mainheat-env-eff': 'Poor', 'multi-glaze-proportion': '61', 'main-heating-controls': '',
'lodgement-datetime': '2020-10-14 00:00:00', 'flat-top-storey': 'N', 'current-energy-rating': 'F',
'secondheat-description': 'None', 'walls-env-eff': 'Very Poor', 'transaction-type': 'marketed sale',
'uprn': '10012839482', 'current-energy-efficiency': '33', 'energy-consumption-current': '393',
'mainheat-description': 'Room heaters, electric', 'lighting-cost-current': '110',
'lodgement-date': '2020-10-14', 'extension-count': '0', 'mainheatc-env-eff': 'Good',
'lmk-key': 'b0d82f468273bec55ec5676a809b8e36b55db940ffa92f482a482f6aaa38eb1d', 'wind-turbine-count': '0',
'tenure': 'Owner-occupied', 'floor-level': '01', 'potential-energy-efficiency': '71',
'hot-water-energy-eff': 'Very Poor', 'low-energy-lighting': '17',
'walls-description': 'Solid brick, as built, no insulation (assumed)',
'hotwater-description': 'Electric immersion, standard tariff'
}
row2 = {
'UPRN': '10012839482', 'RDSAP_CHANGE': 8, 'HEAT_DEMAND_CHANGE': -59,
'CARBON_CHANGE': -0.5999999999999996, 'SAP_STARTING': 33, 'SAP_ENDING': 41, 'HEAT_DEMAND_STARTING': 393,
'HEAT_DEMAND_ENDING': 334, 'CARBON_STARTING': 4.6, 'CARBON_ENDING': 4.0, 'PROPERTY_TYPE': 'Flat',
'BUILT_FORM': 'Mid-Terrace', 'CONSTITUENCY': 'E14001032', 'NUMBER_HABITABLE_ROOMS': 2.0,
'NUMBER_HEATED_ROOMS': 2.0, 'FIXED_LIGHTING_OUTLETS_COUNT': 12.0,
'CONSTRUCTION_AGE_BAND': 'England and Wales: 1996-2002', 'TRANSACTION_TYPE_STARTING': 'marketed sale',
'MECHANICAL_VENTILATION_STARTING': 'natural', 'SECONDHEAT_DESCRIPTION_STARTING': 'None',
'ENERGY_TARIFF_STARTING': 'off-peak 7 hour', 'SOLAR_WATER_HEATING_FLAG_STARTING': 'N',
'PHOTO_SUPPLY_STARTING': 0.0, 'GLAZED_TYPE_STARTING': 'double glazing installed during or after 2002',
'MULTI_GLAZE_PROPORTION_STARTING': 61.0, 'LOW_ENERGY_LIGHTING_STARTING': 17.0,
'NUMBER_OPEN_FIREPLACES_STARTING': 0.0, 'EXTENSION_COUNT_STARTING': 0.0,
'TOTAL_FLOOR_AREA_STARTING': 70.0, 'FLOOR_HEIGHT_STARTING': 3.64,
'TRANSACTION_TYPE_ENDING': 'marketed sale', 'MECHANICAL_VENTILATION_ENDING': 'natural',
'SECONDHEAT_DESCRIPTION_ENDING': 'None', 'ENERGY_TARIFF_ENDING': 'off-peak 7 hour',
'SOLAR_WATER_HEATING_FLAG_ENDING': 'N', 'PHOTO_SUPPLY_ENDING': 0.0,
'GLAZED_TYPE_ENDING': 'double glazing installed during or after 2002',
'MULTI_GLAZE_PROPORTION_ENDING': 61.0, 'LOW_ENERGY_LIGHTING_ENDING': 17.0,
'NUMBER_OPEN_FIREPLACES_ENDING': 0.0, 'EXTENSION_COUNT_ENDING': 0.0, 'TOTAL_FLOOR_AREA_ENDING': 70.0,
'FLOOR_HEIGHT_ENDING': 3.64, 'DAYS_TO_STARTING': 2266, 'DAYS_TO_ENDING': 2307,
'walls_thermal_transmittance': 1.7, 'is_cavity_wall': False, 'is_filled_cavity': False,
'is_solid_brick': True, 'is_system_built': False, 'is_timber_frame': False,
'is_granite_or_whinstone': False, 'is_as_built': True, 'is_cob': False,
'is_sandstone_or_limestone': False, 'is_park_home': False, 'walls_insulation_thickness': 'none',
'external_insulation': False, 'internal_insulation': False, 'walls_thermal_transmittance_ENDING': 0.21,
'is_park_home_ENDING': False, 'walls_insulation_thickness_ENDING': 'average',
'external_insulation_ENDING': False, 'internal_insulation_ENDING': False,
'floor_thermal_transmittance': 0.0, 'is_to_unheated_space': False, 'is_to_external_air': False,
'is_suspended': False, 'is_solid': False, 'another_property_below': True,
'floor_insulation_thickness': 'none', 'floor_thermal_transmittance_ENDING': 0.0,
'floor_insulation_thickness_ENDING': 'none', 'roof_thermal_transmittance': 0.0, 'is_pitched': False,
'is_roof_room': False, 'is_loft': False, 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False,
'has_dwelling_above': True, 'roof_insulation_thickness': 'none',
'roof_thermal_transmittance_ENDING': 0.0, 'roof_insulation_thickness_ENDING': 'none',
'heater_type': 'electric immersion', 'system_type': 'Unknown', 'thermostat_characteristics': 'Unknown',
'heating_scope': 'Unknown', 'energy_recovery': 'Unknown', 'hotwater_tariff_type': 'standard tariff',
'extra_features': 'Unknown', 'chp_systems': 'Unknown', 'distribution_system': 'Unknown',
'no_system_present': 'Unknown', 'appliance': 'Unknown', 'heater_type_ENDING': 'electric immersion',
'system_type_ENDING': 'Unknown', 'thermostat_characteristics_ENDING': 'Unknown',
'heating_scope_ENDING': 'Unknown', 'energy_recovery_ENDING': 'Unknown',
'hotwater_tariff_type_ENDING': 'standard tariff', 'extra_features_ENDING': 'Unknown',
'chp_systems_ENDING': 'Unknown', 'distribution_system_ENDING': 'Unknown',
'no_system_present_ENDING': 'Unknown', 'appliance_ENDING': 'Unknown', 'has_radiators': False,
'has_fan_coil_units': False, 'has_pipes_in_screed_above_insulation': False,
'has_pipes_in_insulated_timber_floor': False, 'has_pipes_in_concrete_slab': False, 'has_boiler': False,
'has_air_source_heat_pump': False, 'has_room_heaters': True, 'has_electric_storage_heaters': False,
'has_warm_air': False, 'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False,
'has_community_scheme': False, 'has_ground_source_heat_pump': False, 'has_no_system_present': False,
'has_portable_electric_heaters': False, 'has_water_source_heat_pump': False,
'has_electric_heat_pump': False, 'has_micro-cogeneration': False, 'has_solar_assisted_heat_pump': False,
'has_exhaust_source_heat_pump': False, 'has_community_heat_pump': False, 'has_electric': True,
'has_mains_gas': False, 'has_wood_logs': False, 'has_coal': False, 'has_oil': False,
'has_wood_pellets': False, 'has_anthracite': False, 'has_dual_fuel_mineral_and_wood': False,
'has_smokeless_fuel': False, 'has_lpg': False, 'has_b30k': False, 'has_electricaire': False,
'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False, 'has_radiators_ENDING': False,
'has_fan_coil_units_ENDING': False, 'has_pipes_in_screed_above_insulation_ENDING': False,
'has_pipes_in_insulated_timber_floor_ENDING': False, 'has_pipes_in_concrete_slab_ENDING': False,
'has_boiler_ENDING': False, 'has_air_source_heat_pump_ENDING': False, 'has_room_heaters_ENDING': True,
'has_electric_storage_heaters_ENDING': False, 'has_warm_air_ENDING': False,
'has_electric_underfloor_heating_ENDING': False, 'has_electric_ceiling_heating_ENDING': False,
'has_community_scheme_ENDING': False, 'has_ground_source_heat_pump_ENDING': False,
'has_no_system_present_ENDING': False, 'has_portable_electric_heaters_ENDING': False,
'has_water_source_heat_pump_ENDING': False, 'has_electric_heat_pump_ENDING': False,
'has_micro-cogeneration_ENDING': False, 'has_solar_assisted_heat_pump_ENDING': False,
'has_exhaust_source_heat_pump_ENDING': False, 'has_community_heat_pump_ENDING': False,
'has_electric_ENDING': True, 'has_mains_gas_ENDING': False, 'has_wood_logs_ENDING': False,
'has_coal_ENDING': False, 'has_oil_ENDING': False, 'has_wood_pellets_ENDING': False,
'has_anthracite_ENDING': False, 'has_dual_fuel_mineral_and_wood_ENDING': False,
'has_smokeless_fuel_ENDING': False, 'has_lpg_ENDING': False, 'has_b30k_ENDING': False,
'has_electricaire_ENDING': False, 'has_assumed_for_most_rooms_ENDING': False,
'has_underfloor_heating_ENDING': False, 'thermostatic_control': 'appliance thermostats',
'charging_system': 'Unknown', 'switch_system': 'programmer', 'no_control': 'Unknown',
'dhw_control': 'Unknown', 'community_heating': 'Unknown', 'multiple_room_thermostats': False,
'auxiliary_systems': 'Unknown', 'trvs': 'Unknown', 'rate_control': 'Unknown',
'thermostatic_control_ENDING': 'appliance thermostats', 'charging_system_ENDING': 'Unknown',
'switch_system_ENDING': 'programmer', 'no_control_ENDING': 'Unknown', 'dhw_control_ENDING': 'Unknown',
'community_heating_ENDING': 'Unknown', 'multiple_room_thermostats_ENDING': False,
'auxiliary_systems_ENDING': 'Unknown', 'trvs_ENDING': 'Unknown', 'rate_control_ENDING': 'Unknown',
'glazing_type': 'double', 'glazing_type_ENDING': 'double', 'fuel_type': 'electricity',
'main-fuel_tariff_type': 'Unknown', 'is_community': False,
'no_individual_heating_or_community_network': False, 'complex_fuel_type': 'Unknown',
'fuel_type_ENDING': 'electricity', 'main-fuel_tariff_type_ENDING': 'Unknown',
'is_community_ENDING': False, 'no_individual_heating_or_community_network_ENDING': False,
'complex_fuel_type_ENDING': 'Unknown', 'estimated_perimeter_STARTING': 35.4964786985977,
'estimated_perimeter_ENDING': 35.4964786985977,
'HOT_WATER_ENERGY_EFF_STARTING': "Very Poor",
"FLOOR_ENERGY_EFF_STARTING": "Unknown",
"WINDOWS_ENERGY_EFF_STARTING": "Average",
"WALLS_ENERGY_EFF_STARTING": "Very Poor",
"SHEATING_ENERGY_EFF_STARTING": "Unknown",
"ROOF_ENERGY_EFF_STARTING": "Unknown",
"MAINHEAT_ENERGY_EFF_STARTING": "Very Poor",
"MAINHEATC_ENERGY_EFF_STARTING": "Good",
"LIGHTING_ENERGY_EFF_STARTING": "Poor",
"POTENTIAL_ENERGY_EFFICIENCY": 71,
"ENVIRONMENT_IMPACT_POTENTIAL": 51,
"ENERGY_CONSUMPTION_POTENTIAL": 307,
"CO2_EMISSIONS_POTENTIAL": 3.6,
'HOT_WATER_ENERGY_EFF_ENDING': "Very Poor",
"FLOOR_ENERGY_EFF_ENDING": "Unknown",
"WINDOWS_ENERGY_EFF_ENDING": "Average",
"WALLS_ENERGY_EFF_ENDING": "Good",
"SHEATING_ENERGY_EFF_ENDING": "Unknown",
"ROOF_ENERGY_EFF_ENDING": "Unknown",
"MAINHEAT_ENERGY_EFF_ENDING": "Very Poor",
"MAINHEATC_ENERGY_EFF_ENDING": "Good",
"LIGHTING_ENERGY_EFF_ENDING": "Poor",
}
home2 = Property(
id=0,
postcode=starting_epc2["postcode"],
address1=starting_epc2["address1"],
epc_client=EpcClient(auth_token="notoken"),
data=starting_epc2
)
home2.get_components(cleaned)
data_processor2 = DataProcessor(None, newdata=True)
data_processor2.insert_data(pd.DataFrame([home2.get_model_data()]))
data_processor2.pre_process()
starting_epc_data2 = data_processor2.get_component_features(suffix="_STARTING")
ending_epc_data2 = data_processor2.get_component_features(suffix="_ENDING")
fixed_data2 = data_processor2.get_fixed_features()
ending_lodgement_date2 = '2020-11-24'
ending_epc_data2["DAYS_TO_ENDING"] = data_processor2.calculate_days_to(ending_lodgement_date2)
recommendation2 = {
"recommendation_id": 0,
"new_u_value": 0.21,
"type": "wall_insulation"
}
test_record2 = create_recommendation_scoring_data(
property=home2,
recommendation=recommendation2,
starting_epc_data=starting_epc_data2,
ending_epc_data=ending_epc_data2,
fixed_data=fixed_data2,
)
test_record2 = pd.DataFrame([test_record2])
# Test the final cleaning:
test_record2 = DataProcessor.apply_averages_cleaning(
data_to_clean=test_record2,
cleaning_data=cleaning_data,
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"]
).drop(columns=["LOCAL_AUTHORITY"])
test_record2 = DataProcessor.clean_missings_after_description_process(
test_record2, [
c for c in test_record2.columns if
("thermal_transmittance" in c) or ("insulation_thickness" in c)
]
)
for c in test_record2.columns:
if c in ["id", "SAP_ENDING", "HEAT_DEMAND_ENDING", "CARBON_ENDING"]:
continue
if c == "FLOOR_HEIGHT_ENDING":
assert (row2[c] - test_record2[c].values[0]) <= 0.020001
continue
if c == "walls_insulation_thickness_ENDING":
assert row2[c] == "average"
assert test_record2[c].values[0] == "above average"
continue
if c == "CONSTRUCTION_AGE_BAND":
# For this, we have different values in the original data
assert row2[c] == "England and Wales: 1996-2002"
assert test_record2[c].values[0] == "England and Wales: 1900-1929"
continue
assert test_record2[c].values[0] == row2[c]
def test_ventilation(self, cleaned, cleaning_data):
starting_epc3 = {
'low-energy-fixed-light-count': '', 'address': '45 Shepperson Road', 'uprn-source': 'Energy Assessor',
'floor-height': '1.87', 'heating-cost-potential': '645', 'unheated-corridor-length': '',
'hot-water-cost-potential': '69', 'construction-age-band': 'England and Wales: 1900-1929',
'potential-energy-rating': 'C', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average',
'lighting-energy-eff': 'Average', 'environment-impact-potential': '75',
'glazed-type': 'double glazing, unknown install date', 'heating-cost-current': '1028', 'address3': '',
'mainheatcont-description': 'Programmer, TRVs and bypass', 'sheating-energy-eff': 'N/A',
'property-type': 'House', 'local-authority-label': 'Sheffield', 'fixed-lighting-outlets-count': '21',
'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '96',
'county': '', 'postcode': 'S6 4FG', 'solar-water-heating-flag': 'N', 'constituency': 'E14000921',
'co2-emissions-potential': '2.9', 'number-heated-rooms': '5',
'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '152',
'local-authority': 'E08000019', 'built-form': 'Enclosed Mid-Terrace', 'number-open-fireplaces': '0',
'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2022-06-13',
'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '59', 'address1': '45 Shepperson Road',
'heat-loss-corridor': '', 'flat-storey-count': '',
'constituency-label': 'Sheffield, Brightside and Hillsborough', 'roof-energy-eff': 'Very Poor',
'total-floor-area': '107.0', 'building-reference-number': '10002892085', 'environment-impact-current': '46',
'co2-emissions-current': '6.3', 'roof-description': 'Pitched, no insulation (assumed)',
'floor-energy-eff': 'N/A', 'number-habitable-rooms': '5', 'address2': '', 'hot-water-env-eff': 'Good',
'posttown': 'SHEFFIELD', 'mainheatc-energy-eff': 'Average', 'main-fuel': 'mains gas (not community)',
'lighting-env-eff': 'Average', 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A',
'sheating-env-eff': 'N/A', 'lighting-description': 'Low energy lighting in 43% of fixed outlets',
'roof-env-eff': 'Very Poor', 'walls-energy-eff': 'Very Poor', 'photo-supply': '0.0',
'lighting-cost-potential': '83', 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100',
'main-heating-controls': '', 'lodgement-datetime': '2023-05-27 12:15:21', 'flat-top-storey': '',
'current-energy-rating': 'E', 'secondheat-description': 'None', 'walls-env-eff': 'Very Poor',
'transaction-type': 'marketed sale', 'uprn': '100051073214', 'current-energy-efficiency': '54',
'energy-consumption-current': '335', 'mainheat-description': 'Boiler and radiators, mains gas',
'lighting-cost-current': '131', 'lodgement-date': '2023-05-27', 'extension-count': '1',
'mainheatc-env-eff': 'Average',
'lmk-key': 'dc1a4da246562656132b8e36e0534cd90b09fa40fc584e25e644e2d9ab86a247', 'wind-turbine-count': '0',
'tenure': 'Not defined - use in the case of a new dwelling for which the intended tenure in not known. It '
'is not to be used for an existing dwelling',
'floor-level': '', 'potential-energy-efficiency': '80', 'hot-water-energy-eff': 'Good',
'low-energy-lighting': '43',
'walls-description': 'Sandstone or limestone, as built, no insulation (assumed)',
'hotwater-description': 'From main system'
}
row3 = {
'UPRN': '100051073214', 'RDSAP_CHANGE': 2, 'HEAT_DEMAND_CHANGE': -22, 'CARBON_CHANGE': -0.39999999999999947,
'SAP_STARTING': 54, 'SAP_ENDING': 56, 'HEAT_DEMAND_STARTING': 335, 'HEAT_DEMAND_ENDING': 313,
'CARBON_STARTING': 6.3, 'CARBON_ENDING': 5.9, 'PROPERTY_TYPE': 'House', 'BUILT_FORM': 'Mid-Terrace',
'CONSTITUENCY': 'E14000921', 'NUMBER_HABITABLE_ROOMS': 5.0, 'NUMBER_HEATED_ROOMS': 5.0,
'FIXED_LIGHTING_OUTLETS_COUNT': 21.0, 'CONSTRUCTION_AGE_BAND': 'England and Wales: 1900-1929',
'TRANSACTION_TYPE_STARTING': 'marketed sale', 'MECHANICAL_VENTILATION_STARTING': 'natural',
'SECONDHEAT_DESCRIPTION_STARTING': 'None', 'ENERGY_TARIFF_STARTING': 'Single',
'SOLAR_WATER_HEATING_FLAG_STARTING': 'N', 'PHOTO_SUPPLY_STARTING': 0.0,
'GLAZED_TYPE_STARTING': 'double glazing, unknown install date', 'MULTI_GLAZE_PROPORTION_STARTING': 100.0,
'LOW_ENERGY_LIGHTING_STARTING': 43.0, 'NUMBER_OPEN_FIREPLACES_STARTING': 0.0,
'EXTENSION_COUNT_STARTING': 1.0, 'TOTAL_FLOOR_AREA_STARTING': 107.0, 'FLOOR_HEIGHT_STARTING': 1.87,
'TRANSACTION_TYPE_ENDING': 'marketed sale', 'MECHANICAL_VENTILATION_ENDING': 'mechanical, extract only',
'SECONDHEAT_DESCRIPTION_ENDING': 'None', 'ENERGY_TARIFF_ENDING': 'Single',
'SOLAR_WATER_HEATING_FLAG_ENDING': 'N', 'PHOTO_SUPPLY_ENDING': 0.0,
'GLAZED_TYPE_ENDING': 'double glazing, unknown install date', 'MULTI_GLAZE_PROPORTION_ENDING': 100.0,
'LOW_ENERGY_LIGHTING_ENDING': 43.0, 'NUMBER_OPEN_FIREPLACES_ENDING': 0.0, 'EXTENSION_COUNT_ENDING': 1.0,
'TOTAL_FLOOR_AREA_ENDING': 107.0, 'FLOOR_HEIGHT_ENDING': 1.87, 'DAYS_TO_STARTING': 3221,
'DAYS_TO_ENDING': 2874, 'walls_thermal_transmittance': 2.0, 'is_cavity_wall': False,
'is_filled_cavity': False, 'is_solid_brick': False, 'is_system_built': False, 'is_timber_frame': False,
'is_granite_or_whinstone': False, 'is_as_built': True, 'is_cob': False, 'is_sandstone_or_limestone': True,
'is_park_home': False, 'walls_insulation_thickness': 'none', 'external_insulation': False,
'internal_insulation': False, 'walls_thermal_transmittance_ENDING': 2.0, 'is_park_home_ENDING': False,
'walls_insulation_thickness_ENDING': 'none', 'external_insulation_ENDING': False,
'internal_insulation_ENDING': False, 'floor_thermal_transmittance': 0.62, 'is_to_unheated_space': False,
'is_to_external_air': False, 'is_suspended': True, 'is_solid': False, 'another_property_below': False,
'floor_insulation_thickness': 'none', 'floor_thermal_transmittance_ENDING': 0.62,
'floor_insulation_thickness_ENDING': 'none', 'roof_thermal_transmittance': 2.3, 'is_pitched': True,
'is_roof_room': False, 'is_loft': False, 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False,
'has_dwelling_above': False, 'roof_insulation_thickness': 'none', 'roof_thermal_transmittance_ENDING': 2.3,
'roof_insulation_thickness_ENDING': 'none', 'heater_type': 'Unknown', 'system_type': 'from main system',
'thermostat_characteristics': 'Unknown', 'heating_scope': 'Unknown', 'energy_recovery': 'Unknown',
'hotwater_tariff_type': 'Unknown', 'extra_features': 'Unknown', 'chp_systems': 'Unknown',
'distribution_system': 'Unknown', 'no_system_present': 'Unknown', 'appliance': 'Unknown',
'heater_type_ENDING': 'Unknown', 'system_type_ENDING': 'from main system',
'thermostat_characteristics_ENDING': 'Unknown', 'heating_scope_ENDING': 'Unknown',
'energy_recovery_ENDING': 'Unknown', 'hotwater_tariff_type_ENDING': 'Unknown',
'extra_features_ENDING': 'Unknown', 'chp_systems_ENDING': 'Unknown',
'distribution_system_ENDING': 'Unknown', 'no_system_present_ENDING': 'Unknown',
'appliance_ENDING': 'Unknown', 'has_radiators': True, 'has_fan_coil_units': False,
'has_pipes_in_screed_above_insulation': False, 'has_pipes_in_insulated_timber_floor': False,
'has_pipes_in_concrete_slab': False, 'has_boiler': True, 'has_air_source_heat_pump': False,
'has_room_heaters': False, 'has_electric_storage_heaters': False, 'has_warm_air': False,
'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False,
'has_community_scheme': False, 'has_ground_source_heat_pump': False, 'has_no_system_present': False,
'has_portable_electric_heaters': False, 'has_water_source_heat_pump': False,
'has_electric_heat_pump': False, 'has_micro-cogeneration': False, 'has_solar_assisted_heat_pump': False,
'has_exhaust_source_heat_pump': False, 'has_community_heat_pump': False, 'has_electric': False,
'has_mains_gas': True, 'has_wood_logs': False, 'has_coal': False, 'has_oil': False,
'has_wood_pellets': False, 'has_anthracite': False, 'has_dual_fuel_mineral_and_wood': False,
'has_smokeless_fuel': False, 'has_lpg': False, 'has_b30k': False, 'has_electricaire': False,
'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False, 'has_radiators_ENDING': True,
'has_fan_coil_units_ENDING': False, 'has_pipes_in_screed_above_insulation_ENDING': False,
'has_pipes_in_insulated_timber_floor_ENDING': False, 'has_pipes_in_concrete_slab_ENDING': False,
'has_boiler_ENDING': True, 'has_air_source_heat_pump_ENDING': False, 'has_room_heaters_ENDING': False,
'has_electric_storage_heaters_ENDING': False, 'has_warm_air_ENDING': False,
'has_electric_underfloor_heating_ENDING': False, 'has_electric_ceiling_heating_ENDING': False,
'has_community_scheme_ENDING': False, 'has_ground_source_heat_pump_ENDING': False,
'has_no_system_present_ENDING': False, 'has_portable_electric_heaters_ENDING': False,
'has_water_source_heat_pump_ENDING': False, 'has_electric_heat_pump_ENDING': False,
'has_micro-cogeneration_ENDING': False, 'has_solar_assisted_heat_pump_ENDING': False,
'has_exhaust_source_heat_pump_ENDING': False, 'has_community_heat_pump_ENDING': False,
'has_electric_ENDING': False, 'has_mains_gas_ENDING': True, 'has_wood_logs_ENDING': False,
'has_coal_ENDING': False, 'has_oil_ENDING': False, 'has_wood_pellets_ENDING': False,
'has_anthracite_ENDING': False, 'has_dual_fuel_mineral_and_wood_ENDING': False,
'has_smokeless_fuel_ENDING': False, 'has_lpg_ENDING': False, 'has_b30k_ENDING': False,
'has_electricaire_ENDING': False, 'has_assumed_for_most_rooms_ENDING': False,
'has_underfloor_heating_ENDING': False, 'thermostatic_control': 'Unknown', 'charging_system': 'Unknown',
'switch_system': 'programmer', 'no_control': 'Unknown', 'dhw_control': 'Unknown',
'community_heating': 'Unknown', 'multiple_room_thermostats': False, 'auxiliary_systems': 'bypass',
'trvs': 'trvs', 'rate_control': 'Unknown', 'thermostatic_control_ENDING': 'Unknown',
'charging_system_ENDING': 'Unknown', 'switch_system_ENDING': 'programmer', 'no_control_ENDING': 'Unknown',
'dhw_control_ENDING': 'Unknown', 'community_heating_ENDING': 'Unknown',
'multiple_room_thermostats_ENDING': False, 'auxiliary_systems_ENDING': 'bypass', 'trvs_ENDING': 'trvs',
'rate_control_ENDING': 'Unknown', 'glazing_type': 'double', 'glazing_type_ENDING': 'double',
'fuel_type': 'mains gas', 'main-fuel_tariff_type': 'Unknown', 'is_community': False,
'no_individual_heating_or_community_network': False, 'complex_fuel_type': 'Unknown',
'fuel_type_ENDING': 'mains gas', 'main-fuel_tariff_type_ENDING': 'Unknown', 'is_community_ENDING': False,
'no_individual_heating_or_community_network_ENDING': False, 'complex_fuel_type_ENDING': 'Unknown',
'estimated_perimeter_STARTING': 41.634120622393354, 'estimated_perimeter_ENDING': 41.634120622393354,
'HOT_WATER_ENERGY_EFF_STARTING': "Good",
"FLOOR_ENERGY_EFF_STARTING": "Unknown",
"WINDOWS_ENERGY_EFF_STARTING": "Average",
"WALLS_ENERGY_EFF_STARTING": "Very Poor",
"SHEATING_ENERGY_EFF_STARTING": "Unknown",
"ROOF_ENERGY_EFF_STARTING": "Very Poor",
"MAINHEAT_ENERGY_EFF_STARTING": "Good",
"MAINHEATC_ENERGY_EFF_STARTING": "Average",
"LIGHTING_ENERGY_EFF_STARTING": "Average",
"POTENTIAL_ENERGY_EFFICIENCY": 80,
"ENVIRONMENT_IMPACT_POTENTIAL": 75,
"ENERGY_CONSUMPTION_POTENTIAL": 152,
"CO2_EMISSIONS_POTENTIAL": 2.9,
'HOT_WATER_ENERGY_EFF_ENDING': "Good",
"FLOOR_ENERGY_EFF_ENDING": "Unknown",
"WINDOWS_ENERGY_EFF_ENDING": "Average",
"WALLS_ENERGY_EFF_ENDING": "Very Poor",
"SHEATING_ENERGY_EFF_ENDING": "Unknown",
"ROOF_ENERGY_EFF_ENDING": "Very Poor",
"MAINHEAT_ENERGY_EFF_ENDING": "Good",
"MAINHEATC_ENERGY_EFF_ENDING": "Average",
"LIGHTING_ENERGY_EFF_ENDING": "Average",
}
home3 = Property(
id=0,
postcode=starting_epc3["postcode"],
address1=starting_epc3["address1"],
epc_client=EpcClient(auth_token="notoken"),
data=starting_epc3
)
home3.get_components(cleaned)
data_processor3 = DataProcessor(None, newdata=True)
data_processor3.insert_data(pd.DataFrame([home3.get_model_data()]))
data_processor3.pre_process()
starting_epc_data3 = data_processor3.get_component_features(suffix="_STARTING")
ending_epc_data3 = data_processor3.get_component_features(suffix="_ENDING")
fixed_data3 = data_processor3.get_fixed_features()
ending_lodgement_date3 = '2022-06-14'
ending_epc_data3["DAYS_TO_ENDING"] = data_processor3.calculate_days_to(ending_lodgement_date3)
recommendation3 = {
"recommendation_id": 0,
"type": "mechanical_ventilation"
}
test_record3 = create_recommendation_scoring_data(
property=home3,
recommendation=recommendation3,
starting_epc_data=starting_epc_data3,
ending_epc_data=ending_epc_data3,
fixed_data=fixed_data3,
)
test_record3 = pd.DataFrame([test_record3])
# Test the final cleaning:
test_record3 = DataProcessor.apply_averages_cleaning(
data_to_clean=test_record3,
cleaning_data=cleaning_data,
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"]
).drop(columns=["LOCAL_AUTHORITY"])
test_record3 = DataProcessor.clean_missings_after_description_process(
test_record3, [
c for c in test_record3.columns if
("thermal_transmittance" in c) or ("insulation_thickness" in c)
]
)
for c in test_record3.columns:
if c in ["id", "SAP_ENDING", "HEAT_DEMAND_ENDING", "CARBON_ENDING"]:
continue
assert test_record3[c].values[0] == row3[c]
def test_fireplaces(self, cleaned, cleaning_data):
starting_epc4 = {
'low-energy-fixed-light-count': '', 'address': '9 Glebe Road, Asfordby Hill',
'uprn-source': 'Energy Assessor', 'floor-height': '2.4', 'heating-cost-potential': '501',
'unheated-corridor-length': '', 'hot-water-cost-potential': '70',
'construction-age-band': 'England and Wales: 1930-1949', 'potential-energy-rating': 'C',
'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Average',
'environment-impact-potential': '76', 'glazed-type': 'double glazing, unknown install date',
'heating-cost-current': '723', 'address3': '',
'mainheatcont-description': 'Programmer and room thermostat', 'sheating-energy-eff': 'N/A',
'property-type': 'House', 'local-authority-label': 'Melton',
'fixed-lighting-outlets-count': '14', 'energy-tariff': 'dual',
'mechanical-ventilation': 'natural', 'hot-water-cost-current': '98',
'county': 'Leicestershire', 'postcode': 'LE14 3QT', 'solar-water-heating-flag': 'N',
'constituency': 'E14000909', 'co2-emissions-potential': '2.4', 'number-heated-rooms': '5',
'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '153',
'local-authority': 'E07000133', 'built-form': 'Semi-Detached', 'number-open-fireplaces': '1',
'windows-description': 'Fully double glazed', 'glazed-area': 'Normal',
'inspection-date': '2022-06-27', 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '46',
'address1': '9 Glebe Road', 'heat-loss-corridor': '', 'flat-storey-count': '',
'constituency-label': 'Rutland and Melton', 'roof-energy-eff': 'Good',
'total-floor-area': '87.0', 'building-reference-number': '10002396876',
'environment-impact-current': '60', 'co2-emissions-current': '4.0',
'roof-description': 'Pitched, 200 mm loft insulation', 'floor-energy-eff': 'N/A',
'number-habitable-rooms': '5', 'address2': 'Asfordby Hill', 'hot-water-env-eff': 'Good',
'posttown': 'MELTON MOWBRAY', 'mainheatc-energy-eff': 'Average',
'main-fuel': 'mains gas (not community)', 'lighting-env-eff': 'Average',
'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
'lighting-description': 'Low energy lighting in 29% of fixed outlets', 'roof-env-eff': 'Good',
'walls-energy-eff': 'Very Poor', 'photo-supply': '15.0', 'lighting-cost-potential': '79',
'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
'lodgement-datetime': '2022-06-27 15:28:18', 'flat-top-storey': '',
'current-energy-rating': 'D',
'secondheat-description': 'Room heaters, dual fuel (mineral and wood)',
'walls-env-eff': 'Very Poor', 'transaction-type': 'ECO assessment', 'uprn': '100030539619',
'current-energy-efficiency': '66', 'energy-consumption-current': '256',
'mainheat-description': 'Boiler and radiators, mains gas', 'lighting-cost-current': '135',
'lodgement-date': '2022-06-27', 'extension-count': '1', 'mainheatc-env-eff': 'Average',
'lmk-key': '736b6f4803a11d9e45b49bf98f36eb8a7f357b0dd24f3e7cddef5295518e5bef',
'wind-turbine-count': '0', 'tenure': 'Owner-occupied', 'floor-level': '',
'potential-energy-efficiency': '78', 'hot-water-energy-eff': 'Good',
'low-energy-lighting': '29',
'walls-description': 'Solid brick, as built, no insulation (assumed)',
'hotwater-description': 'From main system'
}
row4 = {
'UPRN': '100030539619', 'RDSAP_CHANGE': 7, 'HEAT_DEMAND_CHANGE': -41, 'CARBON_CHANGE': -0.5,
'SAP_STARTING': 66, 'SAP_ENDING': 73, 'HEAT_DEMAND_STARTING': 256, 'HEAT_DEMAND_ENDING': 215,
'CARBON_STARTING': 4.0, 'CARBON_ENDING': 3.5, 'PROPERTY_TYPE': 'House', 'BUILT_FORM': 'Semi-Detached',
'CONSTITUENCY': 'E14000909', 'NUMBER_HABITABLE_ROOMS': 5.0, 'NUMBER_HEATED_ROOMS': 5.0,
'FIXED_LIGHTING_OUTLETS_COUNT': 14.0, 'CONSTRUCTION_AGE_BAND': 'England and Wales: 1930-1949',
'TRANSACTION_TYPE_STARTING': 'eco assessment', 'MECHANICAL_VENTILATION_STARTING': 'natural',
'SECONDHEAT_DESCRIPTION_STARTING': 'Room heaters, dual fuel (mineral and wood)',
'ENERGY_TARIFF_STARTING': 'dual', 'SOLAR_WATER_HEATING_FLAG_STARTING': 'N', 'PHOTO_SUPPLY_STARTING': 15.0,
'GLAZED_TYPE_STARTING': 'double glazing, unknown install date', 'MULTI_GLAZE_PROPORTION_STARTING': 100.0,
'LOW_ENERGY_LIGHTING_STARTING': 29.0, 'NUMBER_OPEN_FIREPLACES_STARTING': 1.0,
'EXTENSION_COUNT_STARTING': 1.0, 'TOTAL_FLOOR_AREA_STARTING': 87.0, 'FLOOR_HEIGHT_STARTING': 2.4,
'TRANSACTION_TYPE_ENDING': 'eco assessment', 'MECHANICAL_VENTILATION_ENDING': 'natural',
'SECONDHEAT_DESCRIPTION_ENDING': 'Room heaters, dual fuel (mineral and wood)',
'ENERGY_TARIFF_ENDING': 'dual', 'SOLAR_WATER_HEATING_FLAG_ENDING': 'N', 'PHOTO_SUPPLY_ENDING': 15.0,
'GLAZED_TYPE_ENDING': 'double glazing, unknown install date', 'MULTI_GLAZE_PROPORTION_ENDING': 100.0,
'LOW_ENERGY_LIGHTING_ENDING': 29.0, 'NUMBER_OPEN_FIREPLACES_ENDING': 0, 'EXTENSION_COUNT_ENDING': 1.0,
'TOTAL_FLOOR_AREA_ENDING': 87.0, 'FLOOR_HEIGHT_ENDING': 2.4, 'DAYS_TO_STARTING': 2887,
'DAYS_TO_ENDING': 2960, 'walls_thermal_transmittance': 1.7, 'is_cavity_wall': False,
'is_filled_cavity': False, 'is_solid_brick': True, 'is_system_built': False, 'is_timber_frame': False,
'is_granite_or_whinstone': False, 'is_as_built': True, 'is_cob': False, 'is_sandstone_or_limestone': False,
'is_park_home': False, 'walls_insulation_thickness': 'none', 'external_insulation': False,
'internal_insulation': False, 'walls_thermal_transmittance_ENDING': 1.7, 'is_park_home_ENDING': False,
'walls_insulation_thickness_ENDING': 'none', 'external_insulation_ENDING': False,
'internal_insulation_ENDING': False, 'floor_thermal_transmittance': 0.66, 'is_to_unheated_space': False,
'is_to_external_air': False, 'is_suspended': False, 'is_solid': True, 'another_property_below': False,
'floor_insulation_thickness': 'none', 'floor_thermal_transmittance_ENDING': 0.66,
'floor_insulation_thickness_ENDING': 'none', 'roof_thermal_transmittance': 0.21, 'is_pitched': True,
'is_roof_room': False, 'is_loft': True, 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False,
'has_dwelling_above': False, 'roof_insulation_thickness': '200', 'roof_thermal_transmittance_ENDING': 0.21,
'roof_insulation_thickness_ENDING': '200', 'heater_type': 'Unknown', 'system_type': 'from main system',
'thermostat_characteristics': 'Unknown', 'heating_scope': 'Unknown', 'energy_recovery': 'Unknown',
'hotwater_tariff_type': 'Unknown', 'extra_features': 'Unknown', 'chp_systems': 'Unknown',
'distribution_system': 'Unknown', 'no_system_present': 'Unknown', 'appliance': 'Unknown',
'heater_type_ENDING': 'Unknown', 'system_type_ENDING': 'from main system',
'thermostat_characteristics_ENDING': 'Unknown', 'heating_scope_ENDING': 'Unknown',
'energy_recovery_ENDING': 'Unknown', 'hotwater_tariff_type_ENDING': 'Unknown',
'extra_features_ENDING': 'Unknown', 'chp_systems_ENDING': 'Unknown',
'distribution_system_ENDING': 'Unknown', 'no_system_present_ENDING': 'Unknown',
'appliance_ENDING': 'Unknown', 'has_radiators': True, 'has_fan_coil_units': False,
'has_pipes_in_screed_above_insulation': False, 'has_pipes_in_insulated_timber_floor': False,
'has_pipes_in_concrete_slab': False, 'has_boiler': True, 'has_air_source_heat_pump': False,
'has_room_heaters': False, 'has_electric_storage_heaters': False, 'has_warm_air': False,
'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False,
'has_community_scheme': False, 'has_ground_source_heat_pump': False, 'has_no_system_present': False,
'has_portable_electric_heaters': False, 'has_water_source_heat_pump': False,
'has_electric_heat_pump': False, 'has_micro-cogeneration': False, 'has_solar_assisted_heat_pump': False,
'has_exhaust_source_heat_pump': False, 'has_community_heat_pump': False, 'has_electric': False,
'has_mains_gas': True, 'has_wood_logs': False, 'has_coal': False, 'has_oil': False,
'has_wood_pellets': False, 'has_anthracite': False, 'has_dual_fuel_mineral_and_wood': False,
'has_smokeless_fuel': False, 'has_lpg': False, 'has_b30k': False, 'has_electricaire': False,
'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False, 'has_radiators_ENDING': True,
'has_fan_coil_units_ENDING': False, 'has_pipes_in_screed_above_insulation_ENDING': False,
'has_pipes_in_insulated_timber_floor_ENDING': False, 'has_pipes_in_concrete_slab_ENDING': False,
'has_boiler_ENDING': True, 'has_air_source_heat_pump_ENDING': False, 'has_room_heaters_ENDING': False,
'has_electric_storage_heaters_ENDING': False, 'has_warm_air_ENDING': False,
'has_electric_underfloor_heating_ENDING': False, 'has_electric_ceiling_heating_ENDING': False,
'has_community_scheme_ENDING': False, 'has_ground_source_heat_pump_ENDING': False,
'has_no_system_present_ENDING': False, 'has_portable_electric_heaters_ENDING': False,
'has_water_source_heat_pump_ENDING': False, 'has_electric_heat_pump_ENDING': False,
'has_micro-cogeneration_ENDING': False, 'has_solar_assisted_heat_pump_ENDING': False,
'has_exhaust_source_heat_pump_ENDING': False, 'has_community_heat_pump_ENDING': False,
'has_electric_ENDING': False, 'has_mains_gas_ENDING': True, 'has_wood_logs_ENDING': False,
'has_coal_ENDING': False, 'has_oil_ENDING': False, 'has_wood_pellets_ENDING': False,
'has_anthracite_ENDING': False, 'has_dual_fuel_mineral_and_wood_ENDING': False,
'has_smokeless_fuel_ENDING': False, 'has_lpg_ENDING': False, 'has_b30k_ENDING': False,
'has_electricaire_ENDING': False, 'has_assumed_for_most_rooms_ENDING': False,
'has_underfloor_heating_ENDING': False, 'thermostatic_control': 'room thermostat',
'charging_system': 'Unknown', 'switch_system': 'programmer', 'no_control': 'Unknown',
'dhw_control': 'Unknown', 'community_heating': 'Unknown', 'multiple_room_thermostats': False,
'auxiliary_systems': 'Unknown', 'trvs': 'Unknown', 'rate_control': 'Unknown',
'thermostatic_control_ENDING': 'room thermostat', 'charging_system_ENDING': 'Unknown',
'switch_system_ENDING': 'programmer', 'no_control_ENDING': 'Unknown', 'dhw_control_ENDING': 'Unknown',
'community_heating_ENDING': 'Unknown', 'multiple_room_thermostats_ENDING': False,
'auxiliary_systems_ENDING': 'Unknown', 'trvs_ENDING': 'Unknown', 'rate_control_ENDING': 'Unknown',
'glazing_type': 'double', 'glazing_type_ENDING': 'double', 'fuel_type': 'mains gas',
'main-fuel_tariff_type': 'Unknown', 'is_community': False,
'no_individual_heating_or_community_network': False, 'complex_fuel_type': 'Unknown',
'fuel_type_ENDING': 'mains gas', 'main-fuel_tariff_type_ENDING': 'Unknown', 'is_community_ENDING': False,
'no_individual_heating_or_community_network_ENDING': False, 'complex_fuel_type_ENDING': 'Unknown',
'estimated_perimeter_STARTING': 37.54197650630557, 'estimated_perimeter_ENDING': 37.54197650630557,
'HOT_WATER_ENERGY_EFF_STARTING': "Good",
"FLOOR_ENERGY_EFF_STARTING": "Unknown",
"WINDOWS_ENERGY_EFF_STARTING": "Average",
"WALLS_ENERGY_EFF_STARTING": "Very Poor",
"SHEATING_ENERGY_EFF_STARTING": "Unknown",
"ROOF_ENERGY_EFF_STARTING": "Good",
"MAINHEAT_ENERGY_EFF_STARTING": "Good",
"MAINHEATC_ENERGY_EFF_STARTING": "Average",
"LIGHTING_ENERGY_EFF_STARTING": "Average",
"POTENTIAL_ENERGY_EFFICIENCY": 78,
"ENVIRONMENT_IMPACT_POTENTIAL": 76,
"ENERGY_CONSUMPTION_POTENTIAL": 153,
"CO2_EMISSIONS_POTENTIAL": 2.4,
'HOT_WATER_ENERGY_EFF_ENDING': "Good",
"FLOOR_ENERGY_EFF_ENDING": "Unknown",
"WINDOWS_ENERGY_EFF_ENDING": "Average",
"WALLS_ENERGY_EFF_ENDING": "Very Poor",
"SHEATING_ENERGY_EFF_ENDING": "Unknown",
"ROOF_ENERGY_EFF_ENDING": "Good",
"MAINHEAT_ENERGY_EFF_ENDING": "Good",
"MAINHEATC_ENERGY_EFF_ENDING": "Average",
"LIGHTING_ENERGY_EFF_ENDING": "Average",
}
home4 = Property(
id=0,
postcode=starting_epc4["postcode"],
address1=starting_epc4["address1"],
epc_client=EpcClient(auth_token="notoken"),
data=starting_epc4
)
home4.get_components(cleaned)
data_processor4 = DataProcessor(None, newdata=True)
data_processor4.insert_data(pd.DataFrame([home4.get_model_data()]))
data_processor4.pre_process()
starting_epc_data4 = data_processor4.get_component_features(suffix="_STARTING")
ending_epc_data4 = data_processor4.get_component_features(suffix="_ENDING")
fixed_data4 = data_processor4.get_fixed_features()
ending_lodgement_date4 = '2022-09-08'
ending_epc_data4["DAYS_TO_ENDING"] = data_processor4.calculate_days_to(ending_lodgement_date4)
recommendation4 = {
"recommendation_id": 0,
"type": "sealing_open_fireplace"
}
test_record4 = create_recommendation_scoring_data(
property=home4,
recommendation=recommendation4,
starting_epc_data=starting_epc_data4,
ending_epc_data=ending_epc_data4,
fixed_data=fixed_data4,
)
test_record4 = pd.DataFrame([test_record4])
# Test the final cleaning:
test_record4 = DataProcessor.apply_averages_cleaning(
data_to_clean=test_record4,
cleaning_data=cleaning_data,
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"]
).drop(columns=["LOCAL_AUTHORITY"])
test_record4 = DataProcessor.clean_missings_after_description_process(
test_record4, [
c for c in test_record4.columns if
("thermal_transmittance" in c) or ("insulation_thickness" in c)
]
)
for c in test_record4.columns:
if c in ["id", "SAP_ENDING", "HEAT_DEMAND_ENDING", "CARBON_ENDING"]:
continue
assert test_record4[c].values[0] == row4[c]

View file

@ -0,0 +1,78 @@
import pandas as pd
from tqdm import tqdm
from utils.s3 import save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
from utils.logger import setup_logger
from etl.epc.settings import EARLIEST_EPC_DATE
logger = setup_logger()
class AirSourceHeatPumpEfficiency:
def __init__(self, file_directories, cleaned_lookup):
"""
:param file_directories: A list of directories where files are stored.
:param cleaned_lookup: A dictionary containing cleaned lookup data.
"""
self.file_directories = file_directories
self.cleaned_lookup = cleaned_lookup
self.results = []
def create_dataset(self):
logger.info("Creating solar photo supply dataset")
for dir in tqdm(self.file_directories):
filepath = dir / "certificates.csv"
df = pd.read_csv(filepath, low_memory=False)
df = df[~pd.isnull(df["UPRN"])]
df["UPRN"] = df["UPRN"].astype(int).astype(str)
# Take entries after SAP12
df["LODGEMENT_DATE"] = pd.to_datetime(df["LODGEMENT_DATE"])
df = df[df["LODGEMENT_DATE"] > EARLIEST_EPC_DATE]
df = df[
~df["TENURE"].isin(
[
"unknown",
"Not defined - use in the case of a new dwelling for which the intended tenure in not known. "
"It is not to be used for an existing dwelling"
]
)
]
# Take entries that contain an air source heat pump
df = df[
df["MAINHEAT_DESCRIPTION"].str.contains("air source heat pump", case=False, na=False)
]
# Get the columns we're interested in
df = df[
[
"MAINHEAT_DESCRIPTION",
"MAINHEAT_ENERGY_EFF",
"MAINHEATCONT_DESCRIPTION",
"MAINHEATC_ENERGY_EFF",
"MAIN_FUEL",
"HOTWATER_DESCRIPTION",
"HOT_WATER_ENERGY_EFF",
"MAINS_GAS_FLAG"
]
]
counts = df.groupby(
[
"MAINHEAT_DESCRIPTION",
"MAINHEAT_ENERGY_EFF",
"MAINHEATCONT_DESCRIPTION",
"MAINHEATC_ENERGY_EFF",
"MAIN_FUEL",
"HOTWATER_DESCRIPTION",
"HOT_WATER_ENERGY_EFF",
"MAINS_GAS_FLAG"
]
).size().reset_index(name="count")
# Drop rows that have a missing PROPERTY_TYPE, BUILT_FORM, CONSTRUCTION_AGE_BAND, TOTAL_FLOOR_AREA
for col in ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA"]:
df = df[~pd.isnull(df[col])]
# Take newest LODGEMENT_DATE per UPRN
df = df.sort_values(by="LODGEMENT_DATE", ascending=False).drop_duplicates(subset=["UPRN"])

View file

@ -0,0 +1,24 @@
from pathlib import Path
from backend.app.plan.utils import get_cleaned
from etl.air_source_heat_pump.AirSourceHeatPumpEfficiency import AirSourceHeatPumpEfficiency
DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"
def app():
"""
This code reads in the EPC dataset and looks at the efficiency values for heating systems that inclue air source
heat pumps. This dataset is then used to inform the recommendations for the air source heat pump, so we know
how to set the simulation
:return:
"""
directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
cleaned_lookup = get_cleaned()
ashp_data_client = AirSourceHeatPumpEfficiency(
file_directories=directories,
cleaned_lookup=cleaned_lookup
)
ashp_data_client.create_dataset()

View file

@ -73,6 +73,9 @@ def app():
suspended_floor_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="suspended_floor_insulation", header=0)
solid_floor_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="solid_floor_insulation", header=0)
ewi_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="external_wall_insulation", header=0)
lel_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="low_energy_lighting", header=0)
flat_roof_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="flat_roof_insulation", header=0)
window_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="window_glazing", header=0)
# Form a single table to be uploaded
costs = pd.concat(
@ -83,6 +86,8 @@ def app():
suspended_floor_costs,
solid_floor_costs,
ewi_costs,
lel_costs,
flat_roof_costs
]
)

View file

@ -0,0 +1,211 @@
import pandas as pd
from utils.s3 import save_csv_to_s3
USER_ID = 8
PORTFOLIO_ID = 67
archetype_1_uprns = [100020604138, 200001188299, 100020578756, 200001187196, 200001192253, 100020581792, 200001188304,
100020625813, 100020618060, 100020585305, 100020617489, 100020615039, 100020618076, 100020588913,
200001187197, 100020671205, 100020576940, 100020619814, 100020576472, 100020618083]
archetype_2_uprns = [100020698027, 10001007455, 100020653785, 10090383198, 100020665632, 100020620659, 100020615603,
100020609610, 100020625597, 100020665656, 100020665640, 100020587905, 100020665630, 100020624351,
100020625451, 100020624348, 100020666735, 100020653786, 100020576458, 100020657902, 100020624350,
100020637405, 100020666734, 100020616325, 100020666716, 100020653783, 100020665645, 100020642337,
100020665638, 100022904981, 100020688226, 100020630285, 100020626800, 100020665634, 100022907528,
100020665652, 100020624347, 100020666721, 100020585002, 10014055968, 10001008257, 100020621438,
100020576459, 100020665643, 100020665654, 100022917303]
archetype_3_uprns = [100020577523, 100020616446, 100020605342, 100020594652, 100020585394, 100020601138, 100020597485,
100020614883, 100020633162, 100020697787, 200001185785, 100020646842, 100020581449, 100020595611,
100020641814, 100020575611, 100020652986, 100020654671, 100020647336, 100020610518, 100020607980,
100020692380, 100020581690]
archetype_4_uprns = [100020650603, 100020582907, 100020605116, 100020650607, 100020589325, 100020655500, 100020642537,
200001187539, 100020631683, 100020610165, 100020596436, 100020598277, 100020660228]
def app():
"""
We shall define a small portfolio of properties, based in Croydon
:return:
"""
# Firstly, read in the EPC data for Croydon
epc_data = pd.read_csv(
"local_data/all-domestic-certificates/domestic-E09000008-Croydon/certificates.csv",
low_memory=False
)
z = epc_data.groupby(["WALLS_DESCRIPTION", "WALLS_ENERGY_EFF"]).size().reset_index(name="count")
z = z[z["MAINHEAT_DESCRIPTION"] == "Boiler and radiators, mains gas"]
# Filter on entries where we have a UPRN
epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
# Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this
epc_data["LODGEMENT_DATE"] = pd.to_datetime(epc_data["LODGEMENT_DATE"])
epc_data = epc_data.sort_values("LODGEMENT_DATE", ascending=False).drop_duplicates("UPRN")
# Now filter on social properties
epc_data = epc_data[epc_data["TENURE"].isin(["rental (social)", "Rented (social)"])]
# There are 17337 properties with a registered EPC in Croydon
# Take below EPC C properties
epc_data = epc_data[epc_data["CURRENT_ENERGY_EFFICIENCY"].astype(int) < 69]
# 7994 properties are below EPC C (46%)
# 79% D, 19% E, 1% F, 0.2% G - it probably makes the most sense to focus on E and D properties
epc_data["CURRENT_ENERGY_RATING"].value_counts(normalize=True)
# For the purpose of the sample, take the properties have surveys done in the last 3 years
# This gives us 1351 remaining properties
three_years_ago = pd.Timestamp.now() - pd.DateOffset(days=int(3 * 365))
epc_data = epc_data[epc_data["LODGEMENT_DATE"] >= three_years_ago]
# Archetype 1: defined below:
# 1) House
# 2) Unfilled cavity
# 3) A roof that could be insulated (flat or pitched with no more than 50mm insulation)
# 4) EPC E or D
# 24 properties
archetype_1_sample = epc_data[
epc_data["PROPERTY_TYPE"].isin(["House"]) &
(epc_data["CURRENT_ENERGY_RATING"].isin(["D", "E"])) &
epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"]) &
epc_data["ROOF_DESCRIPTION"].isin(
[
"Pitched, 12 mm loft insulation",
"Pitched, 0 mm loft insulation",
"Pitched, no insulation",
"Pitched, 50 mm loft insulation",
"Flat, no insulation (assumed)",
"Pitched, no insulation (assumed)"
]
)
]
archetype_1_sample_asset_list = archetype_1_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
archetype_1_sample_asset_list["ARCHETYPE"] = "Archetype 1"
# Archetype 2: defined below:
# 1) Flat
# 2) Unfilled cavity
# 3) Another property above
# 4) EPC E
# 57 properties here
archetype_2_sample = epc_data[
epc_data["PROPERTY_TYPE"].isin(["Flat"]) &
(epc_data["CURRENT_ENERGY_RATING"].isin(["E", "D"])) &
epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"]) &
epc_data["ROOF_DESCRIPTION"].isin(
[
"(another dwelling above)"
]
)
]
archetype_2_sample_asset_list = archetype_2_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
archetype_2_sample_asset_list["ARCHETYPE"] = "Archetype 2"
# Archetype 3: defined below:
# 1) EPC E or below
# 2) Solid brick wall
# 3) House
# 4) Pitched roof with no insulation
# Just 7 properties (more expensive to retrofit)
archetype_3_sample = epc_data[
epc_data["PROPERTY_TYPE"].isin(["House"]) &
(epc_data["CURRENT_ENERGY_RATING"].isin(["E", "F", "G"])) &
epc_data["WALLS_DESCRIPTION"].isin(["Solid brick, as built, no insulation (assumed)"]) &
epc_data["ROOF_DESCRIPTION"].isin(
[
"Pitched, no insulation",
"Pitched, limited insulation (assumed)",
"Pitched, 100 mm loft insulation",
"Pitched, no insulation (assumed)",
]
)
]
archetype_3_sample_asset_list = archetype_3_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
archetype_3_sample_asset_list["ARCHETYPE"] = "Archetype 3"
# Archetype 4: defined below:
# 1) Maisonette
# 2) Empty cavity
# 3) EPC E
# 16 properties here
archetype_4_sample = epc_data[
epc_data["PROPERTY_TYPE"].isin(["Maisonette"]) &
epc_data["WALLS_DESCRIPTION"].isin(
["Cavity wall, as built, no insulation (assumed)"]
)
]
archetype_4_sample_asset_list = archetype_4_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
archetype_4_sample_asset_list["ARCHETYPE"] = "Archetype 4"
asset_list = pd.concat(
[
archetype_1_sample_asset_list,
archetype_2_sample_asset_list,
archetype_3_sample_asset_list,
archetype_4_sample_asset_list
]
)
asset_list = asset_list.rename(
columns={
"UPRN": "uprn",
"ADDRESS1": "address",
"POSTCODE": "postcode",
"ARCHETYPE": "archetype"
}
)
asset_list["uprn"] = asset_list["uprn"].astype(int)
# We end up with some properties that are currently an EPC C, but we do not have this data in the download, so we
# manually remove
# 1) 3 Reid Close, CR5 3BL
# 2) Flat 6, Collier Court 2A, St. Peters Road CR0 1HD
asset_list = asset_list[
~asset_list["uprn"].isin(
[
100020576460,
100020624352,
]
)
]
# We have slightly too many properties, so we take a random sample of each archetype
# achetype_1_size = 20
# achetype_2_size = 46
# achetype_3_size = 23
# achetype_4_size = 13
# archetype_1_uprns = asset_list[asset_list["archetype"] == "Archetype 1"]["uprn"].sample(
# int(achetype_1_size)
# ).tolist()
# archetype_2_uprns = asset_list[asset_list["archetype"] == "Archetype 2"]["uprn"].sample(
# int(achetype_2_size)
# ).tolist()
# archetype_3_uprns = asset_list[asset_list["archetype"] == "Archetype 3"]["uprn"].sample(
# int(achetype_3_size)
# ).tolist()
# archetype_4_uprns = asset_list[asset_list["archetype"] == "Archetype 4"]["uprn"].sample(
# int(achetype_4_size)
# ).tolist()
uprns_to_keep = archetype_1_uprns + archetype_2_uprns + archetype_3_uprns + archetype_4_uprns
asset_list = asset_list[asset_list["uprn"].isin(uprns_to_keep)]
filename = f"{USER_ID}/{PORTFOLIO_ID}/inputs.csv"
save_csv_to_s3(
dataframe=asset_list,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Social",
"goal": "Increase EPC",
"goal_value": "C",
"trigger_file_path": filename,
"budget": None,
"exclusions": ["floor_insulation"]
}
print(body)

View file

@ -0,0 +1,760 @@
"""
This script contains the code to generate the data required to populate the slides
We connect to the database amd extract the data for the portfolio needed so it is recommended to use
a environment akin to the backend to run this script
"""
import pandas as pd
import numpy as np
from backend.app.db.connection import db_engine
from sqlalchemy.orm import sessionmaker
from utils.s3 import read_csv_from_s3
from etl.customers.slide_utils import (
plot_epc_distribution,
get_property_details_by_portfolio_id,
get_plan_by_portfolio_id,
get_properties_with_default_recommendations,
create_powerpoint,
create_recommendations_summary
)
from backend.ml_models.AnnualBillSavings import AnnualBillSavings
USER_ID = 8
PORTFOLIO_ID_1 = 67
PORTFOLIO_ID_2 = 68
EPC_TARGET_1 = "C"
EPC_TARGET_2 = "A"
SAP_TARGET_1 = 69
SAP_TARGET_2 = 100
CUSTOMER_KEY = "gla-demo"
# Sample UPRNS
archetype_1_sample = ['100020604138', '200001192253', '100020581792', '100020576940', '200001187196', '100020618060',
'100020625813', '100020578756', '100020618076', '200001187197', '100020619814', '100020617489',
'100020588913']
archetype_2_sample = ['100020585002', '100020615603', '100020665652', '100020626800', '100020624347', '100020624348',
'100020576459', '10001007455', '100020666716', '100020609610', '100020625451', '100020625597',
'100020624351', '100020665634', '100020624350', '100020665640', '100020665632', '100022917303',
'100020665656', '10014055968', '100020630285', '100020665638', '100020616325', '100020637405',
'100020698027', '100020657902', '100020688226', '100020653786', '100020642337', '100020665643']
archetype_3_sample = ['100020594652', '100020697787', '100020577523', '100020633162', '100020601138', '100020595611',
'100020597485', '100020614883', '100020605342', '100020654671', '100020575611', '100020607980',
'200001185785', '100020616446', '100020692380']
archetype_4_sample = ['100020596436', '100020610165', '200001187539', '100020655500', '100020582907', '100020598277',
'100020650607', '100020605116', '100020650603']
def scenario_1():
# Connect to database
session = sessionmaker(bind=db_engine)()
########################################################################
# Get the data we need
########################################################################
portfolio_id = PORTFOLIO_ID_1
# Get the asset list
asset_list = read_csv_from_s3(
"retrofit-plan-inputs-dev", f"{USER_ID}/67/inputs.csv"
)
asset_list = pd.DataFrame(asset_list)
# Get the properties for the portfolio
properties = get_properties_with_default_recommendations(session, portfolio_id)
properties_df = pd.DataFrame(properties)
# We now pull the data for the property details
property_details = get_property_details_by_portfolio_id(session, portfolio_id)
property_details_df = pd.DataFrame(property_details)
# We estimate bills based on the adjusted_energy_consumption
property_details_df["energy_bill"] = property_details_df["adjusted_energy_consumption"].apply(
lambda x: AnnualBillSavings.calculate_annual_bill(x)
)
# Merge on uprn
property_details_df = property_details_df.merge(
properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
on="property_id"
)
plans = get_plan_by_portfolio_id(session, portfolio_id)
plans_df = pd.DataFrame(plans)
# Unnest the recommendations. Each recommendation is a list of dictionaries
recommendations_exploded = properties_df["recommendations"].explode().tolist()
recommendations_df = pd.DataFrame([r for r in recommendations_exploded if not pd.isnull(r)])
# Add uprn on
recommendations_df = recommendations_df.merge(
properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
how="left",
on="property_id"
)
recommendations_summary = create_recommendations_summary(
recommendations_df,
properties_df,
property_details_df,
SAP_TARGET_1
)
# Calculate % changes of energ, co2 and abs
recommendations_summary["carbon_percent_change"] = (
recommendations_summary["total_carbon"] / recommendations_summary["current_co2"]
)
recommendations_summary["energy_percent_change"] = (
recommendations_summary["adjusted_heat_demand"] / recommendations_summary["current_energy"]
)
recommendations_summary["bills_percent_change"] = (
recommendations_summary["total_bill_savings"] / recommendations_summary["current_energy_bill"]
)
########################
# Overview
########################
overview_totals = recommendations_summary.sum()
overview_means = recommendations_summary.mean()
########################
# Measures
########################
measures_count = recommendations_df.groupby("type")["id"].count().reset_index()
wall_insulation_measures = measures_count[
measures_count["type"].isin(["cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation"])
]["id"].sum()
ventilation_measures = measures_count[
measures_count["type"].isin(["mechanical_ventilation"])
]["id"].sum()
roof_insulation_measures = measures_count[
measures_count["type"].isin(["loft_insulation", "flat_roof_insulation"])
]["id"].sum()
floor_insulation_measures = measures_count[
measures_count["type"].isin(["solid_floor_insulation", "suspended_floor_insulation"])
]["id"].sum()
windows = measures_count[
measures_count["type"].isin(["windows_glazing"])
]["id"].sum()
heating = measures_count[
measures_count["type"].isin(["heating"])
]["id"].sum()
heating_controls = measures_count[
measures_count["type"].isin(["heating_control"])
]["id"].sum()
solar = measures_count[
measures_count["type"].isin(["solar_pv"])
]["id"].sum()
other = measures_count[
~measures_count["type"].isin([
"cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation",
"loft_insulation", "flat_roof_insulation", "solid_floor_insulation",
"suspended_floor_insulation", "windows_glazing", "heating", "heating_control", "solar_pv",
"mechanical_ventilation"
])
]["id"].sum()
# Summary information by each archetype
########################
# Archetype 1
########################
archetype_1 = asset_list[asset_list["archetype"] == "Archetype 1"]
recommendations_arch_1_summary = recommendations_summary[
recommendations_summary["uprn"].astype(str).isin(archetype_1["uprn"].values)
]
arch_1_property_details = property_details_df[
property_details_df["uprn"].astype(str).isin(archetype_1["uprn"].values)
]
arch_1_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
# Take the mean, median and maximum of each value
cols_to_keep = ["total_cost", "total_carbon", "total_bill_savings", "total_sap_points", "adjusted_heat_demand",
"energy_percent_change", "carbon_percent_change", "bills_percent_change"]
arch_1_recommendation_min = recommendations_arch_1_summary.min()[cols_to_keep]
arch_1_recommendation_max = recommendations_arch_1_summary.max()[cols_to_keep]
arch_1_recommendation_means = recommendations_arch_1_summary.mean()[cols_to_keep]
arch_1_totals = recommendations_arch_1_summary.sum()[cols_to_keep]
annual_total_co2 = recommendations_arch_1_summary["total_carbon"].sum()
annual_total_bills = recommendations_arch_1_summary["total_bill_savings"].sum()
annual_total_energy_savings = recommendations_arch_1_summary["adjusted_heat_demand"].sum()
archetype_measures = \
recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_1["uprn"].values)].groupby("type")[
"id"].count().reset_index()
cost_text = (f"{round(arch_1_recommendation_means['total_cost'], 2)}: "
f"{arch_1_recommendation_min['total_cost']} - {arch_1_recommendation_max['total_cost']}")
sap_text = (f"{round(arch_1_recommendation_means['total_sap_points'], 2)}: "
f"{arch_1_recommendation_min['total_sap_points']} - {arch_1_recommendation_max['total_sap_points']}")
energy_text = (f"{round(arch_1_recommendation_means['adjusted_heat_demand'], 2)}: "
f"{arch_1_recommendation_min['adjusted_heat_demand']} - "
f"{arch_1_recommendation_max['adjusted_heat_demand']}")
energy_percent_text = (f"{round(arch_1_recommendation_means['energy_percent_change'], 2)}: "
f"{arch_1_recommendation_min['energy_percent_change']} - "
f"{arch_1_recommendation_max['energy_percent_change']}")
carbon_text = (f"{round(arch_1_recommendation_means['total_carbon'], 2)}: "
f"{arch_1_recommendation_min['total_carbon']} - {arch_1_recommendation_max['total_carbon']}")
carbon_percent_text = (f"{round(arch_1_recommendation_means['carbon_percent_change'], 2)}: "
f"{arch_1_recommendation_min['carbon_percent_change']} - "
f"{arch_1_recommendation_max['carbon_percent_change']}")
bill_text = (f"{round(arch_1_recommendation_means['total_bill_savings'], 2)}: "
f"{arch_1_recommendation_min['total_bill_savings']} - "
f"{arch_1_recommendation_max['total_bill_savings']}")
bill_percent_text = (f"{round(arch_1_recommendation_means['bills_percent_change'], 2)}: "
f"{arch_1_recommendation_min['bills_percent_change']} - "
f"{arch_1_recommendation_max['bills_percent_change']}")
########################
# Archetype 2
########################
archetype_2 = asset_list[asset_list["archetype"] == "Archetype 2"]
recommendations_arch_2_summary = recommendations_summary[
recommendations_summary["uprn"].astype(str).isin(archetype_2["uprn"].values)
]
arch_2_property_details = property_details_df[
property_details_df["uprn"].astype(str).isin(archetype_2["uprn"].values)
]
arch_2_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
# Take the mean, median and maximum of each value
arch_2_recommendation_min = recommendations_arch_2_summary.min()
arch_2_recommendation_max = recommendations_arch_2_summary.max()
arch_2_recommendation_means = recommendations_arch_2_summary.mean().round(2)
total_cost = recommendations_arch_2_summary["total_cost"].sum()
annual_total_co2 = recommendations_arch_2_summary["total_carbon"].sum()
annual_total_bills = recommendations_arch_2_summary["total_bill_savings"].sum()
annual_total_energy_savings = recommendations_arch_2_summary["adjusted_heat_demand"].sum()
archetype_measures = \
recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_2["uprn"].values)].groupby("type")[
"id"].count().reset_index()
cost_text = (f"{round(arch_2_recommendation_means['total_cost'], 2)}: "
f"{arch_2_recommendation_min['total_cost']} - {arch_2_recommendation_max['total_cost']}")
sap_text = (f"{round(arch_2_recommendation_means['total_sap_points'], 2)}: "
f"{arch_2_recommendation_min['total_sap_points']} - {arch_2_recommendation_max['total_sap_points']}")
energy_text = (f"{round(arch_2_recommendation_means['adjusted_heat_demand'], 2)}: "
f"{arch_2_recommendation_min['adjusted_heat_demand']} - "
f"{arch_2_recommendation_max['adjusted_heat_demand']}")
energy_percent_text = (f"{round(arch_2_recommendation_means['energy_percent_change'], 2)}: "
f"{arch_2_recommendation_min['energy_percent_change']} - "
f"{arch_2_recommendation_max['energy_percent_change']}")
carbon_text = (f"{round(arch_2_recommendation_means['total_carbon'], 2)}: "
f"{arch_2_recommendation_min['total_carbon']} - {arch_2_recommendation_max['total_carbon']}")
carbon_percent_text = (f"{round(arch_2_recommendation_means['carbon_percent_change'], 2)}: "
f"{arch_2_recommendation_min['carbon_percent_change']} - "
f"{arch_2_recommendation_max['carbon_percent_change']}")
bill_text = (f"{round(arch_2_recommendation_means['total_bill_savings'], 2)}: "
f"{arch_2_recommendation_min['total_bill_savings']} - "
f"{arch_2_recommendation_max['total_bill_savings']}")
bill_percent_text = (f"{round(arch_2_recommendation_means['bills_percent_change'], 2)}: "
f"{arch_2_recommendation_min['bills_percent_change']} - "
f"{arch_2_recommendation_max['bills_percent_change']}")
########################
# Archetype 3
########################
archetype_3 = asset_list[asset_list["archetype"] == "Archetype 3"]
recommendations_arch_3_summary = recommendations_summary[
recommendations_summary["uprn"].astype(str).isin(archetype_3["uprn"].values)
]
arch_3_property_details = property_details_df[
property_details_df["uprn"].astype(str).isin(archetype_3["uprn"].values)
]
arch_3_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
# Take the mean, median and maximum of each value
arch_3_recommendation_min = recommendations_arch_3_summary.min()
arch_3_recommendation_max = recommendations_arch_3_summary.max()
arch_3_recommendation_means = recommendations_arch_3_summary.mean()
total_cost = recommendations_arch_3_summary["total_cost"].sum()
annual_total_co2 = recommendations_arch_3_summary["total_carbon"].sum()
annual_total_bills = recommendations_arch_3_summary["total_bill_savings"].sum()
annual_total_energy_savings = recommendations_arch_3_summary["adjusted_heat_demand"].sum()
archetype_measures = \
recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_3["uprn"].values)].groupby("type")[
"id"].count().reset_index()
cost_text = (f"{round(arch_3_recommendation_means['total_cost'], 2)}: "
f"{arch_3_recommendation_min['total_cost']} - {arch_3_recommendation_max['total_cost']}")
sap_text = (f"{round(arch_3_recommendation_means['total_sap_points'], 2)}: "
f"{arch_3_recommendation_min['total_sap_points']} - {arch_3_recommendation_max['total_sap_points']}")
energy_text = (f"{round(arch_3_recommendation_means['adjusted_heat_demand'], 2)}: "
f"{arch_3_recommendation_min['adjusted_heat_demand']} - "
f"{arch_3_recommendation_max['adjusted_heat_demand']}")
energy_percent_text = (f"{round(arch_3_recommendation_means['energy_percent_change'], 2)}: "
f"{arch_3_recommendation_min['energy_percent_change']} - "
f"{arch_3_recommendation_max['energy_percent_change']}")
carbon_text = (f"{round(arch_3_recommendation_means['total_carbon'], 2)}: "
f"{arch_3_recommendation_min['total_carbon']} - {arch_3_recommendation_max['total_carbon']}")
carbon_percent_text = (f"{round(arch_3_recommendation_means['carbon_percent_change'], 2)}: "
f"{arch_3_recommendation_min['carbon_percent_change']} - "
f"{arch_3_recommendation_max['carbon_percent_change']}")
bill_text = (f"{round(arch_3_recommendation_means['total_bill_savings'], 2)}: "
f"{arch_3_recommendation_min['total_bill_savings']} - "
f"{arch_3_recommendation_max['total_bill_savings']}")
bill_percent_text = (f"{round(arch_3_recommendation_means['bills_percent_change'], 2)}: "
f"{arch_3_recommendation_min['bills_percent_change']} - "
f"{arch_3_recommendation_max['bills_percent_change']}")
########################
# Archetype 4
########################
archetype_4 = asset_list[asset_list["archetype"] == "Archetype 4"]
recommendations_arch_4_summary = recommendations_summary[
recommendations_summary["uprn"].astype(str).isin(archetype_4["uprn"].values)
]
arch_4_property_details = property_details_df[
property_details_df["uprn"].astype(str).isin(archetype_4["uprn"].values)
]
arch_4_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
# Take the mean, median and maximum of each value
arch_4_recommendation_min = recommendations_arch_4_summary.min()
arch_4_recommendation_max = recommendations_arch_4_summary.max()
arch_4_recommendation_means = recommendations_arch_4_summary.mean()
total_cost = recommendations_arch_4_summary["total_cost"].sum()
annual_total_co2 = recommendations_arch_4_summary["total_carbon"].sum()
annual_total_bills = recommendations_arch_4_summary["total_bill_savings"].sum()
annual_total_energy_savings = recommendations_arch_4_summary["adjusted_heat_demand"].sum()
archetype_measures = \
recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_4["uprn"].values)].groupby("type")[
"id"].count().reset_index()
cost_text = (f"{round(arch_4_recommendation_means['total_cost'], 2)}: "
f"{arch_4_recommendation_min['total_cost']} - {arch_4_recommendation_max['total_cost']}")
sap_text = (f"{round(arch_4_recommendation_means['total_sap_points'], 2)}: "
f"{arch_4_recommendation_min['total_sap_points']} - {arch_4_recommendation_max['total_sap_points']}")
energy_text = (f"{round(arch_4_recommendation_means['adjusted_heat_demand'], 2)}: "
f"{arch_4_recommendation_min['adjusted_heat_demand']} - "
f"{arch_4_recommendation_max['adjusted_heat_demand']}")
energy_percent_text = (f"{round(arch_4_recommendation_means['energy_percent_change'], 2)}: "
f"{arch_4_recommendation_min['energy_percent_change']} - "
f"{arch_4_recommendation_max['energy_percent_change']}")
carbon_text = (f"{round(arch_4_recommendation_means['total_carbon'], 2)}: "
f"{arch_4_recommendation_min['total_carbon']} - {arch_4_recommendation_max['total_carbon']}")
carbon_percent_text = (f"{round(arch_4_recommendation_means['carbon_percent_change'], 2)}: "
f"{arch_4_recommendation_min['carbon_percent_change']} - "
f"{arch_4_recommendation_max['carbon_percent_change']}")
bill_text = (f"{round(arch_4_recommendation_means['total_bill_savings'], 2)}: "
f"{arch_4_recommendation_min['total_bill_savings']} - "
f"{arch_4_recommendation_max['total_bill_savings']}")
bill_percent_text = (f"{round(arch_4_recommendation_means['bills_percent_change'], 2)}: "
f"{arch_4_recommendation_min['bills_percent_change']} - "
f"{arch_4_recommendation_max['bills_percent_change']}")
########################
# Overview
########################
overview_totals = recommendations_summary.sum()
def make_sample():
# sample_proportion = 67 / 102
# Get the asset list
asset_list = read_csv_from_s3(
"retrofit-plan-inputs-dev", f"{USER_ID}/67/inputs.csv"
)
asset_list = pd.DataFrame(asset_list)
# From the asset list, we deduce how many properties we need
# Need to figure out the sizes
archetype_1_sample_size = 13
archetype_2_sample_size = 30
archetype_3_sample_size = 15
archetype_4_sample_size = 9
# We take the sample and we'll keep the uprns static
archetype_1_sample = asset_list[
asset_list["archetype"] == "Archetype 1"
].sample(archetype_1_sample_size)["uprn"].to_list()
archetype_2_sample = asset_list[
asset_list["archetype"] == "Archetype 2"
].sample(archetype_2_sample_size)["uprn"].to_list()
archetype_3_sample = asset_list[
asset_list["archetype"] == "Archetype 3"
].sample(archetype_3_sample_size)["uprn"].to_list()
archetype_4_sample = asset_list[
asset_list["archetype"] == "Archetype 4"
].sample(archetype_4_sample_size)["uprn"].to_list()
def scenario_2():
# Connect to database
session = sessionmaker(bind=db_engine)()
########################################################################
# Get the data we need
########################################################################
portfolio_id = PORTFOLIO_ID_2
# Get the asset list
asset_list = read_csv_from_s3(
"retrofit-plan-inputs-dev", f"{USER_ID}/67/inputs.csv"
)
asset_list = pd.DataFrame(asset_list)
sample_uprns = archetype_1_sample + archetype_2_sample + archetype_3_sample + archetype_4_sample
# Filter on sample uprns
asset_list = asset_list[asset_list["uprn"].astype(str).isin(sample_uprns)]
# Get the properties for the portfolio
properties = get_properties_with_default_recommendations(session, portfolio_id)
properties_df = pd.DataFrame(properties)
properties_df = properties_df[properties_df["uprn"].astype(str).isin(sample_uprns)]
# We now pull the data for the property details
property_details = get_property_details_by_portfolio_id(session, portfolio_id)
property_details_df = pd.DataFrame(property_details)
property_details_df = property_details_df[property_details_df["property_id"].isin(properties_df["id"].values)]
# We estimate bills based on the adjusted_energy_consumption
property_details_df["energy_bill"] = property_details_df["adjusted_energy_consumption"].apply(
lambda x: AnnualBillSavings.calculate_annual_bill(x)
)
# Merge on uprn
property_details_df = property_details_df.merge(
properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
on="property_id"
)
plans = get_plan_by_portfolio_id(session, portfolio_id)
plans_df = pd.DataFrame(plans)
# Unnest the recommendations. Each recommendation is a list of dictionaries
recommendations_exploded = properties_df["recommendations"].explode().tolist()
recommendations_df = pd.DataFrame([r for r in recommendations_exploded if not pd.isnull(r)])
# Add uprn on
recommendations_df = recommendations_df.merge(
properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
how="left",
on="property_id"
)
recommendations_summary = create_recommendations_summary(
recommendations_df,
properties_df,
property_details_df,
SAP_TARGET_1
)
# Calculate % changes of energ, co2 and abs
recommendations_summary["carbon_percent_change"] = (
recommendations_summary["total_carbon"] / recommendations_summary["current_co2"]
)
recommendations_summary["energy_percent_change"] = (
recommendations_summary["adjusted_heat_demand"] / recommendations_summary["current_energy"]
)
recommendations_summary["bills_percent_change"] = (
recommendations_summary["total_bill_savings"] / recommendations_summary["current_energy_bill"]
)
########################
# Overview
########################
overview_totals = recommendations_summary.sum()
overview_means = recommendations_summary.mean()
########################
# Measures
########################
measures_count = recommendations_df.groupby("type")["id"].count().reset_index()
wall_insulation_measures = measures_count[
measures_count["type"].isin(["cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation"])
]["id"].sum()
ventilation_measures = measures_count[
measures_count["type"].isin(["mechanical_ventilation"])
]["id"].sum()
roof_insulation_measures = measures_count[
measures_count["type"].isin(["loft_insulation", "flat_roof_insulation"])
]["id"].sum()
floor_insulation_measures = measures_count[
measures_count["type"].isin(["solid_floor_insulation", "suspended_floor_insulation"])
]["id"].sum()
windows = measures_count[
measures_count["type"].isin(["windows_glazing"])
]["id"].sum()
heating = measures_count[
measures_count["type"].isin(["heating"])
]["id"].sum()
heating_controls = measures_count[
measures_count["type"].isin(["heating_control"])
]["id"].sum()
solar = measures_count[
measures_count["type"].isin(["solar_pv"])
]["id"].sum()
other = measures_count[
~measures_count["type"].isin([
"cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation",
"loft_insulation", "flat_roof_insulation", "solid_floor_insulation",
"suspended_floor_insulation", "windows_glazing", "heating", "heating_control", "solar_pv",
"mechanical_ventilation"
])
]["id"].sum()
z = recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_3_sample)]
recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_3_sample)]["type"].value_counts()
# Summary information by each archetype
########################
# Archetype 1
########################
archetype_1 = asset_list[asset_list["archetype"] == "Archetype 1"]
recommendations_arch_1_summary = recommendations_summary[
recommendations_summary["uprn"].astype(str).isin(archetype_1["uprn"].values)
]
arch_1_property_details = property_details_df[
property_details_df["uprn"].astype(str).isin(archetype_1["uprn"].values)
]
arch_1_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
# Take the mean, median and maximum of each value
arch_1_recommendation_min = recommendations_arch_1_summary.min()
arch_1_recommendation_max = recommendations_arch_1_summary.max()
arch_1_recommendation_means = recommendations_arch_1_summary.mean()
arch_1_totals = recommendations_arch_1_summary.sum()
annual_total_co2 = recommendations_arch_1_summary["total_carbon"].sum()
annual_total_bills = recommendations_arch_1_summary["total_bill_savings"].sum()
annual_total_energy_savings = recommendations_arch_1_summary["adjusted_heat_demand"].sum()
archetype_measures = \
recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_1["uprn"].values)].groupby("type")[
"id"].count().reset_index()
cost_text = (f"{round(arch_1_recommendation_means['total_cost'], 2)}: "
f"{arch_1_recommendation_min['total_cost']} - {arch_1_recommendation_max['total_cost']}")
sap_text = (f"{round(arch_1_recommendation_means['total_sap_points'], 2)}: "
f"{arch_1_recommendation_min['total_sap_points']} - {arch_1_recommendation_max['total_sap_points']}")
energy_text = (f"{round(arch_1_recommendation_means['adjusted_heat_demand'], 2)}: "
f"{arch_1_recommendation_min['adjusted_heat_demand']} - "
f"{arch_1_recommendation_max['adjusted_heat_demand']}")
energy_percent_text = (f"{round(arch_1_recommendation_means['energy_percent_change'], 2)}: "
f"{arch_1_recommendation_min['energy_percent_change']} - "
f"{arch_1_recommendation_max['energy_percent_change']}")
carbon_text = (f"{round(arch_1_recommendation_means['total_carbon'], 2)}: "
f"{arch_1_recommendation_min['total_carbon']} - {arch_1_recommendation_max['total_carbon']}")
carbon_percent_text = (f"{round(arch_1_recommendation_means['carbon_percent_change'], 2)}: "
f"{arch_1_recommendation_min['carbon_percent_change']} - "
f"{arch_1_recommendation_max['carbon_percent_change']}")
bill_text = (f"{round(arch_1_recommendation_means['total_bill_savings'], 2)}: "
f"{arch_1_recommendation_min['total_bill_savings']} - "
f"{arch_1_recommendation_max['total_bill_savings']}")
bill_percent_text = (f"{round(arch_1_recommendation_means['bills_percent_change'], 2)}: "
f"{arch_1_recommendation_min['bills_percent_change']} - "
f"{arch_1_recommendation_max['bills_percent_change']}")
########################
# Archetype 2
########################
archetype_2 = asset_list[asset_list["archetype"] == "Archetype 2"]
recommendations_arch_2_summary = recommendations_summary[
recommendations_summary["uprn"].astype(str).isin(archetype_2["uprn"].values)
]
arch_2_property_details = property_details_df[
property_details_df["uprn"].astype(str).isin(archetype_2["uprn"].values)
]
arch_2_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
# Take the mean, median and maximum of each value
arch_2_recommendation_min = recommendations_arch_2_summary.min()
arch_2_recommendation_max = recommendations_arch_2_summary.max()
arch_2_recommendation_means = recommendations_arch_2_summary.mean().round(2)
total_cost = recommendations_arch_2_summary["total_cost"].sum()
annual_total_co2 = recommendations_arch_2_summary["total_carbon"].sum()
annual_total_bills = recommendations_arch_2_summary["total_bill_savings"].sum()
annual_total_energy_savings = recommendations_arch_2_summary["adjusted_heat_demand"].sum()
archetype_measures = \
recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_2["uprn"].values)].groupby("type")[
"id"].count().reset_index()
cost_text = (f"{round(arch_2_recommendation_means['total_cost'], 2)}: "
f"{arch_2_recommendation_min['total_cost']} - {arch_2_recommendation_max['total_cost']}")
sap_text = (f"{round(arch_2_recommendation_means['total_sap_points'], 2)}: "
f"{arch_2_recommendation_min['total_sap_points']} - {arch_2_recommendation_max['total_sap_points']}")
energy_text = (f"{round(arch_2_recommendation_means['adjusted_heat_demand'], 2)}: "
f"{arch_2_recommendation_min['adjusted_heat_demand']} - "
f"{arch_2_recommendation_max['adjusted_heat_demand']}")
energy_percent_text = (f"{round(arch_2_recommendation_means['energy_percent_change'], 2)}: "
f"{arch_2_recommendation_min['energy_percent_change']} - "
f"{arch_2_recommendation_max['energy_percent_change']}")
carbon_text = (f"{round(arch_2_recommendation_means['total_carbon'], 2)}: "
f"{arch_2_recommendation_min['total_carbon']} - {arch_2_recommendation_max['total_carbon']}")
carbon_percent_text = (f"{round(arch_2_recommendation_means['carbon_percent_change'], 2)}: "
f"{arch_2_recommendation_min['carbon_percent_change']} - "
f"{arch_2_recommendation_max['carbon_percent_change']}")
bill_text = (f"{round(arch_2_recommendation_means['total_bill_savings'], 2)}: "
f"{arch_2_recommendation_min['total_bill_savings']} - "
f"{arch_2_recommendation_max['total_bill_savings']}")
bill_percent_text = (f"{round(arch_2_recommendation_means['bills_percent_change'], 2)}: "
f"{arch_2_recommendation_min['bills_percent_change']} - "
f"{arch_2_recommendation_max['bills_percent_change']}")
########################
# Archetype 3
########################
archetype_3 = asset_list[asset_list["archetype"] == "Archetype 3"]
recommendations_arch_3_summary = recommendations_summary[
recommendations_summary["uprn"].astype(str).isin(archetype_3["uprn"].values)
]
arch_3_property_details = property_details_df[
property_details_df["uprn"].astype(str).isin(archetype_3["uprn"].values)
]
arch_3_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
# Take the mean, median and maximum of each value
arch_3_recommendation_min = recommendations_arch_3_summary.min()
arch_3_recommendation_max = recommendations_arch_3_summary.max()
arch_3_recommendation_means = recommendations_arch_3_summary.mean()
total_cost = recommendations_arch_3_summary["total_cost"].sum()
annual_total_co2 = recommendations_arch_3_summary["total_carbon"].sum()
annual_total_bills = recommendations_arch_3_summary["total_bill_savings"].sum()
annual_total_energy_savings = recommendations_arch_3_summary["adjusted_heat_demand"].sum()
archetype_measures = \
recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_3["uprn"].values)].groupby("type")[
"id"].count().reset_index()
cost_text = (f"{round(arch_3_recommendation_means['total_cost'], 2)}: "
f"{arch_3_recommendation_min['total_cost']} - {arch_3_recommendation_max['total_cost']}")
sap_text = (f"{round(arch_3_recommendation_means['total_sap_points'], 2)}: "
f"{arch_3_recommendation_min['total_sap_points']} - {arch_3_recommendation_max['total_sap_points']}")
energy_text = (f"{round(arch_3_recommendation_means['adjusted_heat_demand'], 2)}: "
f"{arch_3_recommendation_min['adjusted_heat_demand']} - "
f"{arch_3_recommendation_max['adjusted_heat_demand']}")
energy_percent_text = (f"{round(arch_3_recommendation_means['energy_percent_change'], 2)}: "
f"{arch_3_recommendation_min['energy_percent_change']} - "
f"{arch_3_recommendation_max['energy_percent_change']}")
carbon_text = (f"{round(arch_3_recommendation_means['total_carbon'], 2)}: "
f"{arch_3_recommendation_min['total_carbon']} - {arch_3_recommendation_max['total_carbon']}")
carbon_percent_text = (f"{round(arch_3_recommendation_means['carbon_percent_change'], 2)}: "
f"{arch_3_recommendation_min['carbon_percent_change']} - "
f"{arch_3_recommendation_max['carbon_percent_change']}")
bill_text = (f"{round(arch_3_recommendation_means['total_bill_savings'], 2)}: "
f"{arch_3_recommendation_min['total_bill_savings']} - "
f"{arch_3_recommendation_max['total_bill_savings']}")
bill_percent_text = (f"{round(arch_3_recommendation_means['bills_percent_change'], 2)}: "
f"{arch_3_recommendation_min['bills_percent_change']} - "
f"{arch_3_recommendation_max['bills_percent_change']}")
########################
# Archetype 4
########################
archetype_4 = asset_list[asset_list["archetype"] == "Archetype 4"]
recommendations_arch_4_summary = recommendations_summary[
recommendations_summary["uprn"].astype(str).isin(archetype_4["uprn"].values)
]
arch_4_property_details = property_details_df[
property_details_df["uprn"].astype(str).isin(archetype_4["uprn"].values)
]
arch_4_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
# Take the mean, median and maximum of each value
arch_4_recommendation_min = recommendations_arch_4_summary.min()
arch_4_recommendation_max = recommendations_arch_4_summary.max()
arch_4_recommendation_means = recommendations_arch_4_summary.mean()
total_cost = recommendations_arch_4_summary["total_cost"].sum()
annual_total_co2 = recommendations_arch_4_summary["total_carbon"].sum()
annual_total_bills = recommendations_arch_4_summary["total_bill_savings"].sum()
annual_total_energy_savings = recommendations_arch_4_summary["adjusted_heat_demand"].sum()
archetype_measures = \
recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_4["uprn"].values)].groupby("type")[
"id"].count().reset_index()
cost_text = (f"{round(arch_4_recommendation_means['total_cost'], 2)}: "
f"{arch_4_recommendation_min['total_cost']} - {arch_4_recommendation_max['total_cost']}")
sap_text = (f"{round(arch_4_recommendation_means['total_sap_points'], 2)}: "
f"{arch_4_recommendation_min['total_sap_points']} - {arch_4_recommendation_max['total_sap_points']}")
energy_text = (f"{round(arch_4_recommendation_means['adjusted_heat_demand'], 2)}: "
f"{arch_4_recommendation_min['adjusted_heat_demand']} - "
f"{arch_4_recommendation_max['adjusted_heat_demand']}")
energy_percent_text = (f"{round(arch_4_recommendation_means['energy_percent_change'], 2)}: "
f"{arch_4_recommendation_min['energy_percent_change']} - "
f"{arch_4_recommendation_max['energy_percent_change']}")
carbon_text = (f"{round(arch_4_recommendation_means['total_carbon'], 2)}: "
f"{arch_4_recommendation_min['total_carbon']} - {arch_4_recommendation_max['total_carbon']}")
carbon_percent_text = (f"{round(arch_4_recommendation_means['carbon_percent_change'], 2)}: "
f"{arch_4_recommendation_min['carbon_percent_change']} - "
f"{arch_4_recommendation_max['carbon_percent_change']}")
bill_text = (f"{round(arch_4_recommendation_means['total_bill_savings'], 2)}: "
f"{arch_4_recommendation_min['total_bill_savings']} - "
f"{arch_4_recommendation_max['total_bill_savings']}")
bill_percent_text = (f"{round(arch_4_recommendation_means['bills_percent_change'], 2)}: "
f"{arch_4_recommendation_min['bills_percent_change']} - "
f"{arch_4_recommendation_max['bills_percent_change']}")

View file

@ -0,0 +1,129 @@
import pandas as pd
from utils.s3 import read_excel_from_s3
from utils.s3 import save_csv_to_s3
USER_ID = 8
PORTFOLIO_ID = 70
council_tax_bands = [
{'address': '8 Corporation Road', 'postcode': 'DY2 7PX', 'band': 'A'},
{'address': '21 Wells Road', 'postcode': 'DY5 3TB', 'band': 'A'},
{'address': '27 Milton Road', 'postcode': 'WV14 8HZ', 'band': 'A'},
{'address': '195 Ashenhurst Road', 'postcode': 'DY1 2JB', 'band': 'A'},
{'address': '53 Bromley', 'postcode': 'DY5 4PJ', 'band': 'A'},
{'address': '91 Osprey Drive', 'postcode': 'DY1 2JS', 'band': 'B'},
{'address': '47 Fairfield Road', 'postcode': 'DY8 5UJ', 'band': 'B'},
{'address': '150 Huntingtree Road', 'postcode': 'B63 4HP', 'band': 'C'},
{'address': '6 Beech Road', 'postcode': 'DY1 4BP', 'band': 'A'},
{'address': '5 Oaklands', 'postcode': 'B62 0JA', 'band': 'A'},
]
council_tax_bands = pd.DataFrame(council_tax_bands)
# This is information we need to override on the EPC itself, for instance if a new survey has been conducted and
# that has not reached the API
patches = [
{
'address': '6 Beech Road', 'postcode': 'DY1 4BP',
'walls-description': 'Cavity wall, filled cavity',
'walls-energy-eff': 'Good',
'roof-description': 'Pitched, 12 mm loft insulation',
'roof-energy-eff': 'Very Poor',
'windows-description': 'Fully double glazed',
'windows-energy-eff': 'Good',
'mainheat-description': 'Room heaters, electric',
'mainheat-energy-eff': 'Very Poor',
'mainheatcont-description': 'Appliance thermostats',
'mainheatc-energy-eff': 'Good',
'lighting-description': 'Low energy lighting in 25% of fixed outlets',
'lighting-energy-eff': 'Good',
'floor-description': 'Solid, no insulation (assumed)',
'secondheat-description': 'None',
'current-energy-efficiency': '32',
'energy-consumption-current': '491',
'co2-emissions-current': '5.0',
'potential-energy-efficiency': '87'
}
]
# This is information that is found as a result of the non-invasives, that mean that certain measures
# have been installed already. To reflect this in the front end, it is included in the recommendation, however
# the cost is removed and instead, a message is presented saying that the measure is already installed.
already_installed = [
{
'address': '5 Oaklands',
'postcode': 'B62 0JA',
"already_installed": ["windows_glazing"]
}
]
def app():
raw_asset_list = read_excel_from_s3(
bucket_name="retrofit-datalake-dev",
file_key="customers/Immo/IMMO Sample Assets_Dudley.xlsx",
header_row=0
)
raw_asset_list = raw_asset_list.drop(columns=["Unnamed: 0"])
# Extract address and postcode
raw_asset_list["address"] = raw_asset_list["Full Address"].str.split(",").str[0]
raw_asset_list["postcode"] = raw_asset_list["Full Address"].str.split(",").str[-1].str.strip()
asset_list = raw_asset_list.merge(council_tax_bands, how="left", on=["address", "postcode"])
# We're provided with number of bathrooms and number of bedrooms.
asset_list = asset_list.rename(
columns={
"No. of Beds": "n_bedrooms",
"No. of WC's": "n_bathrooms"
}
)
# Store the asset list in s3
filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
save_csv_to_s3(
dataframe=asset_list,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
# Store overrides in s3
already_installed_filename = f"{USER_ID}/{PORTFOLIO_ID}/already_installed.json"
save_csv_to_s3(
dataframe=pd.DataFrame(already_installed),
bucket_name="retrofit-plan-inputs-dev",
file_name=already_installed_filename
)
# Store patches in s3
patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.json"
save_csv_to_s3(
dataframe=pd.DataFrame(patches),
bucket_name="retrofit-plan-inputs-dev",
file_name=patches_filename
)
# EPC C portoflio
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increase EPC",
"goal_value": "C",
"trigger_file_path": filename,
"already_installed_file_path": already_installed_filename,
"patches_file_path": patches_filename,
"budget": None,
}
print(body)
# EPC B portoflio
body = {
"portfolio_id": str(PORTFOLIO_ID + 1),
"housing_type": "Private",
"goal": "Increase EPC",
"goal_value": "B",
"trigger_file_path": filename,
"already_installed_file_path": already_installed_filename,
"patches_file_path": patches_filename,
"budget": None,
}
print(body)

View file

@ -0,0 +1,210 @@
# import extract_msg
from datetime import datetime
from sqlalchemy.orm import sessionmaker
from backend.app.db.connection import db_engine
from backend.app.db.functions.non_intrusive_surveys import upload_non_intrusive_survey_notes
def parse_msg_body(text):
# Split the text into lines
lines = text.split('\r\n')
# Dictionary to hold the parsed data
data = {}
# Process each line
for line in lines:
# Remove all asterisks and extra whitespace
clean_line = line.replace('*', '').strip()
if clean_line: # Ensure the line is not empty after cleaning
# Attempt to split clean '=' if present
if '=' in clean_line:
clean_line = clean_line.replace(' = ', ': ')
# Use line content as a key with a default value indicating presence
# Generate a unique key for lines without '='
data[f"Info{len(data) + 1}"] = clean_line
return data
def app():
"""
This code retrieves the results of the non-invasive surveys, to be stored in S3
:return:
"""
# filepath = ("/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/5 Oaklands B62 "
# "0JA/Immo - 5 Oaklands Halesowen B62 0JA.msg")
# filepath = ("/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/6 Beech Rd DY1 "
# "4BP/IMMO - 6 Beech Road Dudley DY1 4BP.msg")
# filepath = (
# "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/8 Corporation Rd DY2 "
# "7PX/IMMO - 8 Corporation Road Dudley DY2 7PX.msg"
# )
# filepath = (
# "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/21 Wells Rd DY5 3TB/"
# "IMMO - 21 Wells Road Brierley Hill DY5 3TB.msg"
# )
# filepath = (
# "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/47 Fairfield Rd DY8 "
# "5UJ/IMMO - 47 Fairfield Road Wordsley Stourbridge DY8 5UJ.msg"
# )
# filepath = (
# "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/91 Osprey Drive DY1 "
# "2JS/IMMO - 91 Osprey Drive Dudley DY1 2JS.msg"
# )
# filepath = (
# "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/195 Ashenhurst Rd DY1 "
# "2JB/IMMO - 195 Ashenhurst Road Dudley DY1 2JB.msg"
# )
# filepath = (
# "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/27 Milton Rd DY1 2JB/IMMO "
# "- 27 Milton Road Coseley Bilston WV14 8HZ.msg"
# )
#
# with extract_msg.Message(filepath) as msg:
# body = msg.body
#
# from pprint import pprint
# pprint(parse_msg_body(body))
# We manually create the non-invasive notes for the pilot
non_invasive_notes = [
{
'uprn': 90028499,
# 'address': '5 Oaklands',
# 'postcode': 'B62 0JA',
'surveyor': 'Carl Fitzgerald - The Warmfront Team',
'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
'CIGA check and extracting the cavity, replacing with bead insulation. '
'There is a shared alleyway with the neighbour, that is a solid brick wall.',
'Wall Render': 'Partial render between top of ground floor window and bottom of 1st floor window',
'Existing solar PV': 'No existing solar',
'Orientation': 'Front house direction: North East, Back house direction: South West',
'Access to mains?': 'Property has access to the mains',
},
{
'uprn': 90055152,
# 'address': '6 Beech Road',
# 'postcode': 'DY1 4BP',
'surveyor': 'Carl Fitzgerald - The Warmfront Team',
'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
'Wall Insulation': '1st floor is solid brick with external wall insulation. 2nd floor is cavity, '
'retro drilled, containing loose fibre insulation. Consider getting a '
'CIGA check and extracting the cavity, replacing with bead insulation.',
'Wall Render': None,
'Existing solar PV': 'No existing solar',
'Orientation': 'Side house direction: North East',
'Access to mains?': 'Property has access to the mains',
},
{
'uprn': 90070461,
# 'address': '8 Corporation Road',
# 'postcode': 'DY2 7PX',
'surveyor': 'Carl Fitzgerald - The Warmfront Team',
'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
'Wall Insulation': "External wall insulation",
'Wall Render': "Render finish throughout",
'Existing solar PV': 'No existing solar',
'Orientation': 'Front house direction: North East, Back house direction: South West',
'Access to mains?': None,
},
{
'uprn': 90022227,
# 'address': '21 Wells Road',
# 'postcode': 'DY5 3TB',
'surveyor': 'Carl Fitzgerald - The Warmfront Team',
'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
'CIGA check and extracting the cavity, replacing with bead insulation.',
'Wall Render': None,
'Existing solar PV': 'No existing solar',
'Orientation': 'Front house direction: East, Back house direction: West',
'Access to mains?': 'Property has access to the mains',
},
{
'uprn': 90077535,
# 'address': '47 Fairfield Road',
# 'postcode': 'DY8 5UJ',
'surveyor': 'Carl Fitzgerald - The Warmfront Team',
'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
'CIGA check and extracting the cavity, replacing with bead insulation.',
'Wall Render': None,
'Existing solar PV': 'No existing solar',
'Orientation': 'Front house direction: East, Back house direction: West',
'Access to mains?': 'Property has access to the mains',
},
{
'uprn': 90060989,
# 'address': '53 Bromley',
# 'postcode': 'DY5 4PJ',
'surveyor': 'Carl Fitzgerald - The Warmfront Team',
'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
'Wall Insulation': "Filled at build, partially filled - celotex/king board, 50mm cavity remaining - "
"recommends a cavity wall fill",
"Roof": "Hipped roof",
'Existing solar PV': 'No existing solar',
'Orientation': "Front house direction: North, Back house direction: South, Side house direction: West",
'Access to mains?': 'Property has access to the mains',
},
{
'uprn': 90048026,
# 'address': '91 Osprey Drive',
# 'postcode': 'DY1 2JS',
'surveyor': 'Carl Fitzgerald - The Warmfront Team',
'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
'CIGA check and extracting the cavity, replacing with bead insulation.',
'Wall Render': 'Tile hung front and rear of property',
'Existing solar PV': 'No existing solar',
'Orientation': 'Side house direction: East',
'Access to mains?': 'Property has access to the mains',
},
{
'uprn': 90093693,
# 'address': '150 Huntingtree Road',
# 'postcode': 'B63 4HP',
'surveyor': 'Carl Fitzgerald - The Warmfront Team',
'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
'Heating': 'Electric (storage heaters)',
'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
'CIGA check and extracting the cavity, replacing with bead insulation.',
"Roof": "Hipped roof",
'Existing solar PV': 'No existing solar',
'Orientation': "Front house direction: North West, Back house direction: South East, Side house direction: "
"North East",
},
{
'uprn': 90051858,
# 'address': '195 Ashenhurst Road',
# 'postcode': 'DY1 2JB',
'surveyor': 'Carl Fitzgerald - The Warmfront Team',
'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
'CIGA check and extracting the cavity, replacing with bead insulation.',
'Wall Render': "Solid render front and rear of property",
'Existing solar PV': 'No existing solar',
'Orientation': 'Front house direction: South, Back house direction: North',
'Access to mains?': 'Property has access to the mains',
},
{
'uprn': 90106884,
# 'address': '27 Milton Road',
# 'postcode': 'WV14 8HZ',
'surveyor': 'Carl Fitzgerald - The Warmfront Team',
'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
'CIGA check and extracting the cavity, replacing with bead insulation.',
'Wall Render': "Solid render front and rear of property",
'Existing solar PV': 'No existing solar',
'Orientation': 'Front house direction: South East, Back house direction: North West',
'Access to mains?': 'Property has access to the mains',
},
]
session = sessionmaker(bind=db_engine)()
upload_non_intrusive_survey_notes(session=session, non_invasive_notes=non_invasive_notes, batch_size=500)

View file

@ -0,0 +1 @@
extract-msg

View file

@ -0,0 +1,293 @@
from pptx.enum.text import PP_ALIGN # NOQA
from pptx import Presentation
from pptx.util import Inches, Pt
import matplotlib.pyplot as plt
from sqlalchemy.orm import Session
from sqlalchemy.sql import true
from backend.app.db.utils import row2dict
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
from backend.app.db.models.recommendations import Recommendation
from backend.app.db.models.recommendations import Plan
from backend.app.utils import sap_to_epc
EPC_COLOURS = {
"A": "#028051",
"B": "#14b759",
"C": "#8ecd46",
"D": "#fdd401",
"E": "#fdab67",
"F": "#ee8023",
"G": "#e71437"
}
def get_properties_with_default_recommendations(session: Session, portfolio_id: int):
"""
Fetch properties for a given portfolio_id along with their default recommendations,
ensuring that all properties are retrieved even if they don't have recommendations
where default is True.
:param session: The SQLAlchemy session used to execute the query.
:param portfolio_id: The ID of the portfolio for which to retrieve properties and recommendations.
:return: A list of dictionaries, where each dictionary represents a property including
its associated default recommendations if any.
"""
# Adjust the join to correctly filter recommendations while including all properties
query = session.query(PropertyModel, Recommendation).outerjoin(Recommendation,
(Recommendation.property_id == PropertyModel.id) & (
Recommendation.default == true())) \
.filter(PropertyModel.portfolio_id == portfolio_id) \
.all()
properties = {}
for property, recommendation in query:
# Ensure the property is added once with an empty list of recommendations initially
if property.id not in properties:
properties[property.id] = row2dict(property)
properties[property.id]['recommendations'] = []
# Append recommendations if they exist and meet the criteria (already filtered by the query)
if recommendation and recommendation.default:
properties[property.id]['recommendations'].append(row2dict(recommendation))
return list(properties.values())
def get_property_details_by_portfolio_id(session: Session, portfolio_id: int):
"""
This function retrieves all property details associated with a given portfolio_id.
:param session: The SQLAlchemy session used to execute the query.
:param portfolio_id: The ID of the portfolio for which to retrieve property details.
:return: A list of dictionaries, where each dictionary represents a property's details.
Returns an empty list if no property details are found.
"""
property_details = session.query(PropertyDetailsEpcModel).filter(
PropertyDetailsEpcModel.portfolio_id == portfolio_id).all()
# Convert the SQLAlchemy objects to dictionaries
property_details_dict = [row2dict(pd) for pd in property_details] if property_details else []
return property_details_dict
def get_plan_by_portfolio_id(session: Session, portfolio_id: int):
"""
This function retrieves all plans associated with a given portfolio_id.
:param session: The SQLAlchemy session used to execute the query.
:param portfolio_id: The ID of the portfolio for which to retrieve plans.
:return: A list of dictionaries, where each dictionary represents a plan.
Returns an empty list if no plans are found.
"""
plans = session.query(Plan).filter(Plan.portfolio_id == portfolio_id).all()
# Convert the SQLAlchemy objects to dictionaries
plans_dict = [row2dict(plan) for plan in plans] if plans else []
return plans_dict
def plot_epc_distribution(df, customer_key, title='Your Units', background_color='white', bar_height=0.4, font_size=15):
"""
Plots a horizontal bar chart of EPC rating distribution with adjustable bar thickness and text sizes.
Allows setting the plot background color and dynamically adjusts text size and bar spacing.
:param df: DataFrame with columns ['current_epc_rating', 'count', 'percentage']
:param title: Title of the plot
:param background_color: Background color of the plot
:param bar_height: Thickness of the bars (default 0.4)
:param font_size: Base font size for text annotations (default 15)
"""
# Calculate dynamic figure size or adjust based on preferences
square_size = max(6, len(df) * 0.6) # Ensure minimum size and adjust based on number of entries
fig, ax = plt.subplots(figsize=(square_size, square_size))
fig.patch.set_facecolor(background_color) # Set figure background color
ax.set_facecolor(background_color) # Set axes background color
df['percentage'] = df['percentage'].round(1) # Round the percentage values to 1 decimal place
df_sorted = df.sort_values('percentage', ascending=True)
# Plot bars with specified height for adjustable thickness
bars = ax.barh(df_sorted['current_epc_rating'], df_sorted['percentage'],
color=df_sorted['current_epc_rating'].map(EPC_COLOURS), edgecolor='none', height=bar_height)
epc_rating_font_size = font_size * 2 # EPC rating font size larger than base font size
count_percentage_font_size = font_size # Count (percentage) font size as base font size
# Annotate bars with EPC ratings inside and count with percentage values outside
for index, bar in enumerate(bars):
width = bar.get_width()
epc_rating = df_sorted.iloc[index]['current_epc_rating']
count = df_sorted.iloc[index]['count']
percentage = df_sorted.iloc[index]['percentage']
# EPC rating inside the bar with increased font size
ax.text(width - (width * 0.05), bar.get_y() + bar.get_height() / 2,
f"{epc_rating}", va='center', ha='right', color='white', fontsize=epc_rating_font_size)
# Count and percentage outside the bar, original font size
ax.text(width + 1, bar.get_y() + bar.get_height() / 2,
f"{count} ({percentage}%)", va='center', color='black', fontsize=count_percentage_font_size)
ax.set_title(title, fontsize=font_size * 1.2) # Adjust title font size proportionally
ax.tick_params(axis='x', which='both', bottom=False, top=False,
labelbottom=False) # Remove x-axis tick marks and values
ax.tick_params(axis='y', which='both', left=False, right=False,
labelleft=False) # Remove y-axis tick marks and labels
ax.spines['top'].set_visible(False) # Remove top spine
ax.spines['right'].set_visible(False) # Remove right spine
ax.spines['left'].set_visible(False) # Remove left spine
ax.spines['bottom'].set_visible(False) # Remove bottom spine
plt.tight_layout() # Adjust layout
plt.show()
# Save the figure as an image
figure_path = f'etl/customers/{customer_key}/epc_distribution_plot.png'
fig.savefig(figure_path, bbox_inches='tight')
plt.close(fig) # Close the figure to free memory
return fig, figure_path
def save_plot_to_image(figure, path='plot.png'):
"""
Saves a matplotlib figure to an image file for insertion into PowerPoint.
"""
figure.savefig(path, bbox_inches='tight')
plt.close(figure)
def save_figure_as_image(figure, filename='temp_plot.png'):
"""
Saves a matplotlib figure to an image file.
"""
figure.savefig(filename, dpi=300)
plt.close(figure) # Close the figure to prevent it from displaying in notebooks or Python environments
def add_commentary_with_bullets(slide, commentary, top_inches, left_inches=Inches(1), width_inches=Inches(8),
height_inches=Inches(2)):
"""
Adds commentary with bullet points to a slide.
:param slide: The slide object to add the commentary to.
:param commentary: The commentary text, with sections separated by newlines for bullet points.
:param top_inches: The top position of the commentary text box.
:param left_inches: The left position of the commentary text box.
:param width_inches: The width of the commentary text box.
:param height_inches: The height of the commentary text box.
"""
txBox = slide.shapes.add_textbox(left_inches, top_inches, width_inches, height_inches)
tf = txBox.text_frame
# Configure text frame
tf.word_wrap = True
tf.auto_size = True
tf.paragraphs[0].alignment = PP_ALIGN.LEFT
# Split the commentary into sections for bullet points
sections = commentary.split("\n")
for i, section in enumerate(sections):
if i > 0:
p = tf.add_paragraph() # Add a new paragraph for each section after the first
else:
p = tf.paragraphs[0] # Use the first paragraph for the first section
p.text = section
p.space_after = Pt(14) # Adjust space after each bullet point as needed
p.font.size = Pt(14) # Adjust font size as needed
p.level = 0 # Bullet level, can be adjusted for nested bullets
p.space_before = Pt(0)
def add_slide_with_image(prs, title, img_path=None, commentary=None):
"""
Adds a slide with an image (if provided) and optional commentary. If no image is provided,
places the commentary text in the middle of the slide.
"""
slide_layout = prs.slide_layouts[5] # Title and Content layout
slide = prs.slides.add_slide(slide_layout)
title_placeholder = slide.shapes.title
title_placeholder.text = title
# Determine the position of the commentary text box based on whether an image is included
if img_path:
# Add the image
slide.shapes.add_picture(img_path, Inches(1), Inches(1.5), Inches(8), Inches(4.5))
# Position for commentary when image is present
commentary_top = Inches(6)
else:
# Position for commentary when image is not present (centered vertically)
commentary_top = Inches(3)
# Add commentary if provided
if commentary:
add_commentary_with_bullets(slide, commentary, commentary_top)
def create_powerpoint(data, save_location):
"""
Creates a PowerPoint presentation based on provided data and optional commentaries.
:param data: A dictionary containing the data needed for each slide.
:param save_location: The file path where the PowerPoint presentation will be saved.
"""
prs = Presentation()
for slide, slide_data in data.items():
slide_figure_path = data[slide].get('image_path')
text = data[slide].get('text')
title = data[slide].get('title', "")
add_slide_with_image(prs, title, slide_figure_path, text)
# Save the presentation
prs.save(save_location)
def create_recommendations_summary(recommendations_df, properties_df, property_details_df, sap_target):
# Aggregate the impact of the recommendations
# We want:
# Total number of sap points
# total valuation impact
# total bill savings
# total cost
# Total Co2 impact
recommendations_summary = recommendations_df.groupby(["property_id"]).agg(
total_sap_points=("sap_points", "sum"),
total_valuation_impact=("property_valuation_increase", "sum"),
total_bill_savings=("energy_cost_savings", "sum"),
total_cost=("estimated_cost", "sum"),
total_carbon=("co2_equivalent_savings", "sum"),
adjusted_heat_demand=("adjusted_heat_demand", "sum")
).reset_index()
# Merge on current sap points, current CO2, current adjusted_heat_demand, current annual bill
recommendations_summary = recommendations_summary.merge(
properties_df[["id", "uprn", "current_sap_points"]].rename(columns={"id": "property_id"}), on="property_id",
how="left"
)
recommendations_summary["expected_sap_points"] = (
recommendations_summary["current_sap_points"] + recommendations_summary["total_sap_points"]
)
recommendations_summary["expected_epc_rating"] = recommendations_summary["expected_sap_points"].apply(
lambda x: sap_to_epc(x)
)
recommendations_summary["sap_difference"] = sap_target - recommendations_summary["expected_sap_points"]
if property_details_df is not None:
recommendations_summary = recommendations_summary.merge(
property_details_df[["uprn", "co2_emissions", "adjusted_energy_consumption", "energy_bill"]].rename(
columns={
"id": "property_id",
"co2_emissions": "current_co2",
"adjusted_energy_consumption": "current_energy",
"energy_bill": "current_energy_bill"
}
),
on="uprn",
how="left"
)
return recommendations_summary

View file

@ -0,0 +1,195 @@
import os
import pandas as pd
from tqdm import tqdm
from dotenv import load_dotenv
from utils.s3 import read_excel_from_s3
from backend.SearchEpc import SearchEpc
from epc_api.client import EpcClient
from utils.s3 import save_csv_to_s3
# Read in the .env file in backend
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
USER_ID = 8
PORTFOLIO_ID = 66
SECOND_SCENARIO_PORTFOLIO_ID = 65
# We also create a second portfolio for a subset of properties that do not meet the install requirements
# We drop these uprns from the first plan
second_portfolio_uprns = [
10070056840, 10070056846, 10070056847, 10070056843, 10070056848, 10070056844, 10070056849,
10070056829, 10070056920, 10023345463
]
def app():
"""
This application will read in the Urban Splash data, in the dev AWS account, and pre-process it. There are a
few issues with the file, including incorrect postcodes.
The customer is interested in the following:
- Getting properties to an EPC C
- Doing do within a budget of £5,000
:return:
"""
potential_postcodes = ["BD9 5BQ", "BD9 5BR", "BD9 5BN"]
raw_asset_list = read_excel_from_s3(
bucket_name="retrofit-datalake-dev",
file_key="customers/urban_splash/raw_asset_list/USRF - Velvet Mill EPC.xlsx",
header_row=2
)
# We have a series of apartment numbers that are "Apartment 001", "Apartment 002", etc. We need to convert these
# to "Apartment 1", "Apartment 2", etc.
raw_asset_list["address1"] = raw_asset_list["Unit Number"].str.replace(
"Apartment 00", "Apartment ", regex=True
)
raw_asset_list["address1"] = raw_asset_list["address1"].str.replace(
"Apartment 0", "Apartment ", regex=True
)
# For each entry in the asset list, we make an api call to the EPC database to get the EPC data. We'll retrieve the
# uprn for the property, as well as a nice address and postcode that we can use. We'll also try and deduce the
# likely wall construction, since many of the homes are new builds, based on their newest EPC
epc_data = []
processed_asset_list = []
for _, row in tqdm(raw_asset_list.iterrows(), total=len(raw_asset_list)):
newest_epc = None
idx = 0
while newest_epc is None:
postcode = potential_postcodes[idx]
searcher = SearchEpc(
address1=row.address1, postcode=postcode, auth_token=EPC_AUTH_TOKEN, os_api_key=""
)
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
if idx == len(potential_postcodes) - 1:
break
idx += 1
else:
newest_epc = searcher.newest_epc
if newest_epc is None:
raise Exception("FX ME")
if row["Beds"] == "Studio":
number_heated_rooms = 2
number_habitable_rooms = 2
else:
# Assume one room for communal space, one room for bathroom
number_heated_rooms = row["Beds"] + 2
number_habitable_rooms = row["Beds"] + 2
to_append = {
**row.to_dict(),
"uprn": newest_epc["uprn"],
"address": newest_epc["address1"],
"postcode": newest_epc["postcode"],
# "walls-description": newest_epc["walls-description"],
# "roof-description": newest_epc["roof-description"],
# "floor-description": newest_epc["floor-description"],
# "total-floor-area": newest_epc["total-floor-area"],
"full-address": newest_epc["address"],
"number-heated-rooms": number_heated_rooms,
"number-habitable-rooms": number_habitable_rooms,
}
processed_asset_list.append(to_append)
epc_data.append(newest_epc)
processed_asset_list_df = pd.DataFrame(processed_asset_list)
epc_data_df = pd.DataFrame(epc_data)
# We store this data
# Store the data in s3
filename = f"{USER_ID}/{PORTFOLIO_ID}/test_inputs.csv"
save_csv_to_s3(
dataframe=processed_asset_list_df[
~processed_asset_list_df["uprn"].astype(int).isin(second_portfolio_uprns)
],
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increase EPC",
"goal_value": "C",
"trigger_file_path": filename,
"budget": None,
}
print(body)
subset = processed_asset_list_df[
processed_asset_list_df["uprn"].astype(int).isin(second_portfolio_uprns)
]
filename2 = f"{USER_ID}/{SECOND_SCENARIO_PORTFOLIO_ID}/test_inputs.csv"
save_csv_to_s3(
dataframe=subset,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename2
)
body = {
"portfolio_id": str(SECOND_SCENARIO_PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increase EPC",
"goal_value": "C",
"trigger_file_path": filename,
"budget": None,
}
print(body)
# Some basic analysis on the heating, heating controls and hot water systems
# All of the heating systems are rated very poor, poor or average. When it's average, they are all also
# "Room heaters, electric", but the house has "Programmer and appliance thermostats" for the heating controls.
# which is more efficient
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
# Heating
print(epc_data_df[["mainheat-description", "mainheatcont-description", "mainheat-energy-eff"]].drop_duplicates())
# mainheat-description mainheatcont-description mainheat-energy-eff
# 0 Room heaters, electric Programmer and room thermostat Very Poor
# 12 Room heaters, electric Programmer and appliance thermostats Average
# 20 Electric storage heaters, radiators Celect-type controls Poor
# Hot water
print(epc_data_df[["hotwater-description", "hot-water-energy-eff"]].drop_duplicates())
# hotwater-description hot-water-energy-eff
# 0 Electric immersion, standard tariff Very Poor
# 12 Electric immersion, off-peak Average
# We now retrieve EPCS for all of the properties that are in these postcodes very obviously for the velvet mill
# We'll use this information to get a sense of the likely wall/roof/floor construction for the properties
# client = EpcClient(auth_token=EPC_AUTH_TOKEN)
#
# neighbouring_epcs = []
# for pc in potential_postcodes:
# response = client.domestic.search(params={"postcode": pc}, size=1000)
# data = response["rows"]
#
# # keep just rows that are clearly for the velvet mill
# data = [x for x in data if "velvet" in x["address1"].lower()]
#
# neighbouring_epcs.extend(data)
#
# neighbouring_epcs_df = pd.DataFrame(neighbouring_epcs)
# neighbouring_epcs_df["walls-description"].value_counts()
# neighbouring_epcs_df["roof-description"].value_counts()
# neighbouring_epcs_df["floor-description"].value_counts()

View file

@ -0,0 +1,352 @@
"""
This script contains the code to generate the data required to populate the slides
We connect to the database amd extract the data for the portfolio needed so it is recommended to use
a environment akin to the backend to run this script
"""
import pandas as pd
import numpy as np
from backend.app.db.connection import db_engine
from sqlalchemy.orm import sessionmaker
from etl.customers.slide_utils import (
plot_epc_distribution,
get_property_details_by_portfolio_id,
get_plan_by_portfolio_id,
get_properties_with_default_recommendations,
create_powerpoint,
create_recommendations_summary
)
PORTFOLIO_ID = 66
SECOND_SCENARIO_PORTFOLIO_ID = 65
EPC_TARGET = "C"
SAP_TARGET = 69
CUSTOMER_KEY = "urban_splash"
def app():
# Connect to database
session = sessionmaker(bind=db_engine)()
########################################################################
# Get the data we need
########################################################################
# Get the properties for the portfolio
properties = get_properties_with_default_recommendations(session, PORTFOLIO_ID)
properties_df = pd.DataFrame(properties)
# We now pull the data for the property details
property_details = get_property_details_by_portfolio_id(session, PORTFOLIO_ID)
property_details_df = pd.DataFrame(property_details)
# Merge on uprn
property_details_df = property_details_df.merge(
properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
on="property_id"
)
plans = get_plan_by_portfolio_id(session, PORTFOLIO_ID)
plans_df = pd.DataFrame(plans)
# Unnest the recommendations. Each recommendation is a list of dictionaries
recommendations_exploded = properties_df["recommendations"].explode().tolist()
recommendations_df = pd.DataFrame([r for r in recommendations_exploded if not pd.isnull(r)])
recommendations_summary = create_recommendations_summary(recommendations_df, properties_df, SAP_TARGET)
# Get the data for the second scenario portfolio
properties_second_scenario = get_properties_with_default_recommendations(session, SECOND_SCENARIO_PORTFOLIO_ID)
properties_second_scenario_df = pd.DataFrame(properties_second_scenario)
propert_details_second_scenario = get_property_details_by_portfolio_id(session, SECOND_SCENARIO_PORTFOLIO_ID)
property_details_second_scenario_df = pd.DataFrame(propert_details_second_scenario)
# Merge on uprn
property_details_second_scenario_df = property_details_second_scenario_df.merge(
properties_second_scenario_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
on="property_id"
)
plans_second_scenario = get_plan_by_portfolio_id(session, SECOND_SCENARIO_PORTFOLIO_ID)
plans_second_scenario_df = pd.DataFrame(plans_second_scenario)
# Merge on uprn so we can compare properties across portfolios
plans_second_scenario_df = plans_second_scenario_df.merge(
properties_second_scenario_df[["uprn", "id"]].rename(columns={"id": "property_id"}), on="property_id"
)
recommendations_exploded_second_scenario = properties_second_scenario_df["recommendations"].explode().tolist()
recommendations_second_scenario_df = pd.DataFrame(
[r for r in recommendations_exploded_second_scenario if not pd.isnull(r)]
)
recommendations_summary_second_scenario = create_recommendations_summary(
recommendations_second_scenario_df, properties_second_scenario_df, SAP_TARGET
)
# Combine the data for both scenarios
full_property_details = pd.concat([property_details_df, property_details_second_scenario_df])
full_properties = pd.concat([properties_df, properties_second_scenario_df])
epc_rating_summary = full_properties.groupby("current_epc_rating").size().reset_index(name="count")
epc_rating_summary["percentage"] = epc_rating_summary["count"] / epc_rating_summary["count"].sum() * 100
########################################################################
# We pull out the data for the slides
########################################################################
############
# Slide 1:
############
# visual
epc_plot, figure_path = plot_epc_distribution(
epc_rating_summary, CUSTOMER_KEY, title="", background_color="white", bar_height=0.75, font_size=15
)
# floor area - upper and lower bounds
# Take just properties that are below EPC C
properties_needing_work = full_properties[
full_properties["current_sap_points"] < SAP_TARGET
]
property_details_needing_work = full_property_details[
full_property_details["uprn"].isin(properties_needing_work["uprn"])
]
min_area, max_area, average_area = (
full_property_details["total_floor_area"].min(),
full_property_details["total_floor_area"].max(),
full_property_details["total_floor_area"].mean()
)
# Annual energy consumption - upper and lower bounds
min_energy_consumption, max_energy_consumption, average_consumption, total_consumption = (
property_details_needing_work["adjusted_energy_consumption"].min(),
property_details_needing_work["adjusted_energy_consumption"].max(),
property_details_needing_work["adjusted_energy_consumption"].mean(),
property_details_needing_work["adjusted_energy_consumption"].sum()
)
# Co2 emissions - upper and lower bounds
min_co2, max_co2, average_co2, total_co2 = (
property_details_needing_work["co2_emissions"].min(),
property_details_needing_work["co2_emissions"].max(),
property_details_needing_work["co2_emissions"].mean(),
property_details_needing_work["co2_emissions"].sum()
)
# Valuation: upper and lower bounds and average - take positive values in case we have just a sample
valuation_df = properties_df[properties_df["current_valuation"] > 0]
min_valuation, max_valuation, average_valuation = (
valuation_df["current_valuation"].min(),
valuation_df["current_valuation"].max(),
valuation_df["current_valuation"].median()
)
recommendations_df.keys()
slide_1_commentary = (
f"Floor areas range from {min_area} to {max_area} square meters, with an average of {average_area} square "
f"meters. \n"
f"Annual energy consumption ranges from {min_energy_consumption} to {max_energy_consumption} kWh, with an "
f"average of {average_consumption} kWh. \n"
f"CO2 emissions range from {min_co2} to {max_co2} tonnes, with an average of {average_co2} tonnes. \n"
f"Valuations range from £{min_valuation} to £{max_valuation} £, with an average of £"
f"{average_valuation}.\n"
)
############
# Slide 2:
############
# What it would take to hit EPC C
# We calculate the number of units that will make it to an EPC C
units_hitting_target = recommendations_summary[
recommendations_summary["expected_epc_rating"] == EPC_TARGET
]
n_units_to_target = units_hitting_target.shape[0]
measures = "Electrical heating system upgrades & heating controls and Hot water system improvements"
# Costs
(
expected_cost_per_unit_lower,
expected_cost_per_unit_upper,
expected_project_cost,
) = (
units_hitting_target["total_cost"].min(),
units_hitting_target["total_cost"].max(),
units_hitting_target["total_cost"].sum()
)
# Per property
# Take positive entries just in case we we have a sample
valuation_impact_df = plans_df[plans_df["property_id"].isin(units_hitting_target["property_id"])]
valuation_impact_df = valuation_impact_df[valuation_impact_df["valuation_increase_lower_bound"] > 0]
min_valuation_impact, max_valuation_impact, average_valuation_impact = (
valuation_impact_df["valuation_increase_lower_bound"].median(),
valuation_impact_df["valuation_increase_upper_bound"].median(),
valuation_impact_df["valuation_increase_average"].median()
)
# Bill savings per property
min_bill_savings, max_bill_savings, average_bill_savings = (
units_hitting_target["total_bill_savings"].min(),
units_hitting_target["total_bill_savings"].max(),
units_hitting_target["total_bill_savings"].mean()
)
# Total CO2 reduction of portfolio
min_co2_reduction, max_co2_reduction, average_co2_reduction, total_co2_reduction = (
units_hitting_target["total_carbon"].min(),
units_hitting_target["total_carbon"].max(),
units_hitting_target["total_carbon"].mean(),
units_hitting_target["total_carbon"].sum()
)
slide_2_commentary = (
f"{n_units_to_target} units expected to achieve EPC {EPC_TARGET} \n"
f"Expected cost: {expected_cost_per_unit_lower} - {expected_cost_per_unit_upper}, total project: £"
f"{expected_project_cost}\n"
f"Measures include: {measures}\n"
f"Valuation increase per property: £{min_valuation_impact}-{max_valuation_impact}, average: £"
f"{average_valuation_impact}\n"
f"Bill savings per property: £{min_bill_savings}-{max_bill_savings}, average: £{average_bill_savings}\n"
f"Total CO2 reduction: {min_co2_reduction}-{max_co2_reduction} tonnes, average: {average_co2_reduction}\n"
f"tonnes, total for the {n_units_to_target} properties: {total_co2_reduction} tonnes\n"
)
############
# Slide 3:
############
units_missed_target = recommendations_summary_second_scenario.copy()
n_units_missed_target = units_missed_target.shape[0]
# How close were the properties that missed the target
# We calculate the difference between the expected sap points and the lower bound sap points for the target
# min_difference, max_difference, average_difference = (
# np.ceil(units_missed_target["sap_difference"].min()),
# np.ceil(units_missed_target["sap_difference"].max()),
# np.ceil(units_missed_target["sap_difference"].mean())
# )
second_scenario_measures = ("Electrical heating system upgrades & heating controls, Hot water system improvements "
"and internal wall insulation")
# Just take all of the units in the second scenario, since they're borderline
units_hitting_target_second_scenario = recommendations_summary_second_scenario[
# (recommendations_summary_second_scenario["expected_epc_rating"] == EPC_TARGET) &
(recommendations_summary_second_scenario["uprn"].isin(units_missed_target["uprn"].values))
]
n_units_hitting_second_scenario = units_hitting_target_second_scenario[
units_hitting_target_second_scenario["expected_epc_rating"] == EPC_TARGET
].shape[0]
# Impact on second scenario
# Costs
(
expected_cost_per_unit_lower_second_scenario,
expected_cost_per_unit_upper_second_scenario,
expected_project_cost_second_scenario,
) = (
recommendations_summary_second_scenario["total_cost"].min(),
recommendations_summary_second_scenario["total_cost"].max(),
recommendations_summary_second_scenario["total_cost"].sum()
)
valuation_impact_df_second_scenario = plans_second_scenario_df[
plans_second_scenario_df["uprn"].isin(units_hitting_target_second_scenario["uprn"])
]
valuation_impact_df_second_scenario = valuation_impact_df_second_scenario[
valuation_impact_df_second_scenario["valuation_increase_lower_bound"] > 0
]
(
min_valuation_impact_second_scenario,
max_valuation_impact_second_scenario,
average_valuation_impact_second_scenario
) = (
valuation_impact_df_second_scenario["valuation_increase_lower_bound"].median(),
valuation_impact_df_second_scenario["valuation_increase_upper_bound"].median(),
valuation_impact_df_second_scenario["valuation_increase_average"].median()
)
# Bill savings per property
min_bill_savings_second_scenario, max_bill_savings_second_scenario, average_bill_savings_second_scenario = (
units_hitting_target_second_scenario["total_bill_savings"].min(),
units_hitting_target_second_scenario["total_bill_savings"].max(),
units_hitting_target_second_scenario["total_bill_savings"].mean()
)
# Total CO2 reduction of portfolio
(
min_co2_reduction_second_scenario,
max_co2_reduction_second_scenario,
average_co2_reduction_second_scenario,
total_co2_reduction_second_scenario
) = (
units_hitting_target_second_scenario["total_carbon"].min(),
units_hitting_target_second_scenario["total_carbon"].max(),
units_hitting_target_second_scenario["total_carbon"].mean(),
units_hitting_target_second_scenario["total_carbon"].sum()
)
# Values for the leftovers
units_missing_second_scenario = recommendations_summary_second_scenario[
(recommendations_summary_second_scenario["expected_epc_rating"] != EPC_TARGET) &
(recommendations_summary_second_scenario["uprn"].isin(units_missed_target["uprn"].values))
]
min_difference_second_scenario, max_difference_second_scenario, average_difference_second_scenario = (
np.ceil(units_missing_second_scenario["sap_difference"].min()),
np.ceil(units_missing_second_scenario["sap_difference"].max()),
np.ceil(units_missing_second_scenario["sap_difference"].mean())
)
slide_3_text = (
f"{n_units_missed_target} units look like they would miss the EPC {EPC_TARGET} by {min_difference}-"
f"{max_difference} points \n"
"When on site, an assessor may be able to identify further improvements to bring the properties up to an EPC "
f"{EPC_TARGET}.\n"
f"We have looked at a more extensive package for these properties, including: {second_scenario_measures}\n"
f"Of the {n_units_missed_target} properties, a further {units_hitting_target_second_scenario.shape[0]} are "
f"expected to achieve EPC {EPC_TARGET} with these measures.\n"
f"Expected cost: {expected_cost_per_unit_lower_second_scenario} - "
f"{expected_cost_per_unit_upper_second_scenario}, "
f"total project: £"
f"{expected_project_cost_second_scenario}\n"
f"Valuation increase per property: £{min_valuation_impact_second_scenario}-"
f"{max_valuation_impact_second_scenario}, average: £"
f"{average_valuation_impact_second_scenario}\n"
f"Bill savings per property: £{min_bill_savings_second_scenario}-{max_bill_savings_second_scenario}, "
f"average: £{average_bill_savings_second_scenario}\n"
f"Total CO2 reduction: {min_co2_reduction_second_scenario}-{max_co2_reduction_second_scenario} tonnes, "
f"average: "
f"{average_co2_reduction_second_scenario}\n"
f"tonnes, total for the {n_units_hitting_second_scenario} properties: {total_co2_reduction_second_scenario} "
f"tonnes\n"
f"Even in the second scenario, the remaining {units_missing_second_scenario.shape[0]} properties are expected "
f"to miss EPC {EPC_TARGET} by {min_difference_second_scenario} point on average - they should be visited by "
f"an assessor"
)
slide_data = {
'slide_1': {
"title": "EPC Rating Distribution",
'image_path': figure_path, # Pass the path to the saved image
"text": slide_1_commentary
},
"slide_2": {
"title": f"Properties that achieve EPC {EPC_TARGET}",
"text": slide_2_commentary,
},
"slide 3": {
"title": f"Properties that miss EPC {EPC_TARGET}",
"text": slide_3_text
}
}
save_location = f"etl/customers/{CUSTOMER_KEY}/{CUSTOMER_KEY}_tech_slides.pptx"
create_powerpoint(slide_data, save_location)

View file

@ -0,0 +1,787 @@
from recommendations.recommendation_utils import convert_thickness_to_numeric
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes
from etl.epc_clean.epc_attributes.FloorAttributes import FloorAttributes
class Eligibility:
"""
Given the epc data about a property, this class holds the logic for determining if the home
is eligible for a specific retrofit measure.
For example, this could be whether the loft has insulation below a standardised threshold, or
if it has an empty cavity
Further to this, this class is responsible for determining if the property is suitable for specific funding
schemes
"""
loft = None
cavity = None
solid_wall = None
room_roof = None
flat_roof = None
suspended_floor = None
solid_floor = None
# schemes based on Warmfront now
gbis_warmfront = None
eco4_warmfront = None
# Schemes based on full eligibility
gbis = None
eco4 = None
# If the loft has less than 100mm of insulation, we classify the home has needing loft insulation
LOFT_INSULATION_THRESHOLD = 100
HIGH_LOFT_INSULATION_THRESHOLD = 269
# Because EPCS have different values for tenure, we need to remap them to a common set of values
tenure_remap = {
'NO DATA!': "unknown",
'Not defined - use in the case of a new dwelling for which the intended tenure in not known. It is no':
"unknown",
'Owner-occupied': 'Owner-occupied',
'Rented (private)': 'Rented (private)',
'Rented (social)': 'Rented (social)',
'owner-occupied': 'Owner-occupied',
'rental (private)': 'Rented (private)',
'rental (social)': 'Rented (social)',
'unknown': "unknown",
}
def __init__(self, epc, cleaned):
self.epc = epc
self.cleaned = cleaned
self.walls = self.parse_fabric("walls-description")
self.roof = self.parse_fabric("roof-description")
self.floor = self.parse_fabric("floor-description")
self.tenure = self.tenure_remap.get(self.epc["tenure"], None)
def parse_fabric(self, key):
# Get the cleaned version of the description
remapped = [
data for data in self.cleaned[key] if
data["original_description"] == self.epc[key]
]
if remapped:
return remapped[0]
if "SAP05:" in self.epc[key]:
# This is a placeholder method for handling this but this will occur in the case of a very old
# EPC and therefore we just skip
self.epc[key] = "(assumed)"
if key == "walls-description":
cleaner_cls = WallAttributes(self.epc[key])
elif key == "roof-description":
cleaner_cls = RoofAttributes(self.epc[key])
elif key == "floor-description":
cleaner_cls = FloorAttributes(self.epc[key])
else:
raise ValueError("Invalid key")
output = cleaner_cls.process()
output["clean_description"] = cleaner_cls.description.replace("(assumed)", "").rstrip().capitalize()
return output
def loft_insulation(self, loft_thickness_threshold: int = None):
"""
Given the description of roof, this function determines whether or not the property is suitable for loft
insulation. A loft existing insulation with a thickness below loft_thickness_threshold, is deemed to
be suitable for loft insulation
:param loft_thickness_threshold: Integer, Optional. If provided, any loft found with insulation lower than
this thickness is deemed to be suitable for loft insulation. If this
parameter is not provided, this method will default to the variable specified
in LOFT_INSULATION_THRESHOLD
"""
loft_thickness_threshold = (
self.LOFT_INSULATION_THRESHOLD if loft_thickness_threshold is None else loft_thickness_threshold
)
high_loft_thickness_threshold = self.HIGH_LOFT_INSULATION_THRESHOLD
# We firstly check if the roof is a loft
is_loft = self.roof["is_pitched"] and (not self.roof["is_roof_room"])
if not is_loft:
self.loft = {
"suitability": False,
"thickness": None,
"reason": "roof not loft",
"thickness_classification": None
}
return
# If it is a loft, we'll convert the textual thickenss to a numerical value we can easily use
insulation_thickness = convert_thickness_to_numeric(
string_thickness=self.roof["insulation_thickness"],
is_pitched=self.roof["is_pitched"],
is_flat=self.roof["is_flat"]
)
if insulation_thickness <= 100:
thickness_classification = "0-100mm"
elif insulation_thickness <= high_loft_thickness_threshold:
thickness_classification = "100-270mm"
else:
thickness_classification = "270mm+"
if insulation_thickness <= loft_thickness_threshold:
# We produce a thiclkness classification for the loft
# 0 - 100mm insulation
# 100 - 270mm insulation
# 270mm+ insulation
self.loft = {
"suitability": True,
"thickness": insulation_thickness,
"reason": None,
"thickness_classification": thickness_classification
}
return
# Insulation is already thick enough
self.loft = {
"suitability": False,
"thickness": insulation_thickness,
"reason": "existing insulation",
"thickness_classification": thickness_classification
}
return
def cavity_insulation(self):
"""
Given the description of the walls, this function determines if the property is suitable for cavity wall
insulation
:return:
"""
is_cavity = self.walls["is_cavity_wall"]
is_empty = (not self.walls["is_filled_cavity"])
is_as_built = (
self.walls["is_as_built"] and self.walls["insulation_thickness"] not in ["average", "above average"]
and self.walls["is_assumed"]
)
is_partial_filled = "partial" in self.walls["clean_description"].lower()
# We look for potentially under performing cavities - anything that is assumed, as built and insulated
is_underperforming = (
self.walls["is_as_built"] and self.walls["insulation_thickness"] in ["average"] and self.walls["is_assumed"]
)
is_unfilled_cavity = is_cavity and (is_empty and not is_partial_filled)
is_partial_filled_cavity = is_cavity and is_partial_filled
is_assumed_filled_cavity = is_cavity and is_as_built
is_underperforming_cavity = is_cavity and is_underperforming
# Check if it has internal or external wall insulation
has_internal_wall_insulation = self.walls["internal_insulation"]
has_external_wall_insulation = self.walls["external_insulation"]
if has_internal_wall_insulation or has_external_wall_insulation:
self.cavity = {
"suitability": False,
"type": "internal or external wall insulation"
}
return
if is_unfilled_cavity:
self.cavity = {
"suitability": True,
"type": "empty",
}
return
if is_assumed_filled_cavity:
self.cavity = {
"suitability": True,
"type": "as built assumed",
}
return
if is_partial_filled_cavity:
self.cavity = {
"suitability": True,
"type": "partial"
}
return
if is_underperforming_cavity:
self.cavity = {
"suitability": True,
"type": "underperforming"
}
return
self.cavity = {
"suitability": False,
"type": "full"
}
def solid_wall_insulation(self):
"""
Given the description of the walls, this function determines if the property is suitable for solid wall
insulation
:return:
"""
is_solid = self.walls["is_solid_brick"]
is_insulated = self.walls["insulation_thickness"] in ["average", "above average"]
if is_solid and is_insulated:
self.solid_wall = {
"suitability": True,
}
return
self.solid_wall = {
"suitability": False,
}
def room_roof_insulation(self):
is_room_roof = self.roof["is_roof_room"]
if not is_room_roof:
self.room_roof = {
"suitability": False,
"thickness": None
}
return
insulation_thickness = convert_thickness_to_numeric(
self.roof["insulation_thickness"],
self.roof["is_pitched"],
self.roof["is_flat"]
)
self.room_roof = {
"suitability": is_room_roof and insulation_thickness == 0,
"thickness": insulation_thickness
}
def flat_roof_insulation(self):
is_flat = self.roof["is_flat"]
if not is_flat:
self.flat_roof = {
"suitability": False,
"thickness": None
}
return
insulation_thickness = convert_thickness_to_numeric(
self.roof["insulation_thickness"],
self.roof["is_pitched"],
self.roof["is_flat"]
)
self.flat_roof = {
"suitability": is_flat and insulation_thickness <= 100,
"thickness": insulation_thickness
}
def suspended_floor_insulation(self):
if "no_data" in self.floor.keys():
if self.floor["no_data"]:
self.suspended_floor = {
"suitability": False,
}
return
is_suspended = self.floor["is_suspended"]
is_insulated = self.floor["insulation_thickness"] in ["average", "above average"]
self.suspended_floor = {
"suitability": is_suspended and (not is_insulated),
}
return
def solid_floor_insulation(self):
if "no_data" in self.floor.keys():
if self.floor["no_data"]:
self.solid_floor = {
"suitability": False,
}
return
is_solid = self.floor["is_solid"]
is_insulated = self.floor["insulation_thickness"] in ["average", "above average"]
self.solid_floor = {
"suitability": is_solid and (not is_insulated),
}
return
def check_gbis_warmfront(self):
"""
The Eligibility criteria for the Great British Insulation Scheme (GBIS) can be found here:
https://www.ofgem.gov.uk/environmental-and-social-schemes/great-british-insulation-scheme/homeowners-and-tenants
At a high level, the criteria is the following:
- The home must be within council tax bands A-D in England, A-E in Scotland, A-E in Wales
- It must have an EPC rating of D or below
For the moment, we won't check whether a property is in the correct council tax band. There is likely
to be public data for this since there is a govenment website which allows you to search for properties:
https://www.gov.uk/council-tax-bands
This data is possibly contained on the council tax valuation list but it remains to be see (seems unlikely)
whether or not the data is openly accessible
https://www.gov.uk/government/statistics/quality-assurance-of-administrative-data-in-the-uk-house-price-index
/valuation-office-agency-council-tax-valuation-lists
Currently, we tailor this module to the Warmfront Team and their delivery capabilities (both practically and
commercially). Therefore, we will check:
1) Whether the property is an EPC D or below
2) Whether the property is suitible for cavity wall insulation
However, GBIS applies to many insulation measures, which can be seen in the ofgem document
GBIS does not have any minimum upgrade requirement so we don't need to simulate the post retrofit sap score
using the machine learning model
"""
# Check if the property is suitable for cavity wall
self.cavity_insulation()
current_sap = int(self.epc["current-energy-efficiency"])
# We have a strict suitability check and a non-strict check
# Perfect strictness
if (self.cavity["type"] == "empty") and (current_sap < 69):
self.gbis_warmfront = {
"eligible": True,
"strict": True,
"message": "Perfect suitability",
}
return
# Near perfect
if self.cavity["suitability"] and (current_sap < 69):
self.gbis_warmfront = {
"eligible": True,
"strict": True,
"message": "Near perfect suitability",
}
return
self.gbis_warmfront = {
"eligible": False,
"strict": False,
"message": "All conditions fail",
}
def check_eco4_warmfront(self):
"""
This funciton will check if the property is eligible for funding under the ECO4 scheme
For the moment, this function will consider just measures that can be implemented by the
Warmfront team, therefore we will only check if a property has an uninsulated loft AND uninsulated
cavity
We use Ofgem's V1.1 ECO 4 guidance document for the conditions under which a property is elligible
This document can be found here:
https://www.ofgem.gov.uk/sites/default/files/2023-02/ECO4%20Delivery%20Guidance%20v1.1%20%281%29.pdf
The conditions (to be reviewed) to be eligible for retrofit, under ECO4, are the following:
1) The property is a social home (This is assumed prior to this function as this code will often
be run on property lists provided by a HA
2) The property is an EPC E or below
3) The property has an unfilled cavity and uninsulated loft
4) After retrofit, the property will hit an EPC C
Note: This criteria will likely be adjusted depending on the properties that can be served right now
If the post_retrofit_sap is provided, then is this value is 69 or higher, the property will be deemed
to be eligible for ECO4 funding. If the post_retrofit_sap is not provided, the property will be
deemed to be eligible, conditional to the post_retrofit_sap score check
:param post_retrofit_sap:
:return:
"""
current_sap = int(self.epc["current-energy-efficiency"])
self.cavity_insulation()
self.loft_insulation()
# We put in a placeholder when the roof is not a loft
if self.loft["reason"] == "roof not loft":
self.loft["thickness"] = 999
# Case 1: No conditions meet
if not self.cavity["suitability"] and (self.loft["thickness"] > 100) and current_sap >= 55:
self.eco4_warmfront = {
"eligible": False,
"strict": False,
"message": "All conditions fail",
"cavity_type": self.cavity["type"],
"loft_type": self.loft["thickness_classification"]
}
return
# Case 2 - perfect match
if (self.cavity["type"] == "empty") and (self.loft["thickness"] <= 100) and (current_sap < 55):
self.eco4_warmfront = {
"eligible": True,
"strict": True,
"message": "Perfect suitability",
"cavity_type": self.cavity["type"],
"loft_type": self.loft["thickness_classification"]
}
return
# Case 2.5 - near perfect match - but we would not recommend this using the model
if self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap < 55):
self.eco4_warmfront = {
"eligible": True,
"strict": True,
"message": "Near perfect suitability",
"cavity_type": self.cavity["type"],
"loft_type": self.loft["thickness_classification"]
}
return
# Case 3 - cavity is suitable, loft is within 150mm, sap is good
if self.cavity["suitability"] and (self.loft["thickness"] <= 150) and (current_sap < 55):
self.eco4_warmfront = {
"eligible": True,
"strict": False,
"message": "Meets cavity, loft borderline, meets sap",
"cavity_type": self.cavity["type"],
"loft_type": self.loft["thickness_classification"]
}
return
# Case 3 - cavity is suitable, loft is not, sap is good
if self.cavity["suitability"] and (self.loft["thickness"] > 150) and (current_sap < 55):
self.eco4_warmfront = {
"eligible": True,
"strict": False,
"message": "Meets cavity and sap",
"cavity_type": self.cavity["type"],
"loft_type": self.loft["thickness_classification"]
}
return
# Case 4 - cavity is not suitable, loft is, sap is not - we say this is not elifible
if not self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap < 55):
self.eco4_warmfront = {
"eligible": False,
"strict": False,
"message": "failed fabric check",
"cavity_type": self.cavity["type"],
"loft_type": self.loft["thickness_classification"]
}
return
# Case 5 - cavity and loft suitable, sap too high
if self.cavity["suitability"] and (self.loft["thickness"] <= 150) and (current_sap >= 55):
self.eco4_warmfront = {
"eligible": True,
"strict": False,
"message": "Meets fabric, fails SAP check",
"cavity_type": self.cavity["type"],
"loft_type": self.loft["thickness_classification"]
}
return
# Case 6 - meets just cavity
if self.cavity["suitability"] and (self.loft["thickness"] > 100) and (current_sap >= 55):
self.eco4_warmfront = {
"eligible": True,
"strict": False,
"message": "Meets just cavity",
"cavity_type": self.cavity["type"],
"loft_type": self.loft["thickness_classification"]
}
return
# Case 7 - fails cavity, loft but meets sap
if not self.cavity["suitability"] and (self.loft["thickness"] > 100) and (current_sap < 55):
self.eco4_warmfront = {
"eligible": False,
"strict": False,
"message": "Fails cavity and loft, meets SAP",
"cavity_type": self.cavity["type"],
"loft_type": self.loft["thickness_classification"]
}
return
# Case 8 - fails cavity, meets loft, fails sap
if not self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap >= 55):
self.eco4_warmfront = {
"eligible": False,
"strict": False,
"message": "Fails cavity, meets loft, fails SAP",
"cavity_type": self.cavity["type"],
"loft_type": self.loft["thickness_classification"]
}
return
raise ValueError("Implement me")
def check_gbis(self):
"""
The Eligibility criteria for the Great British Insulation Scheme (GBIS) can be found here:
https://www.ofgem.gov.uk/environmental-and-social-schemes/great-british-insulation-scheme/homeowners-and-tenants
Full delivery guidance and be downloaded here:
https://www.ofgem.gov.uk/sites/default/files/2023-08/Great%20British%20Insulation%20Scheme%20Delivery
%20Guidance%20V101693416860968.pdf
For social housing, the criteria is the following:
If the property is currently an EPC D:
- It's valid for innovation measures only but not a heating control measure
- The property must be rented at below the market rate. All eligible social housing is treated based on the
low income group, therefore the tennant must be in receipt of one the eligible benefits
If the property is currently an EPC E or below:
- It's valid for all eligible insulation measures
- The property must be rented at below the market rate. All eligible social housing is treated based on the
low income group, therefore the tennant must be in receipt of one the eligible benefits
From GBIS guidance document:
Determining whether the premises are let below market rate
3.101 Social housing under this provision will only be eligible where the housing is let below
the market rate. The supplier must produce a declaration signed by a social landlord
providing confirmation that the social housing premises are let below the market rate,
or where the premises are currently void, have previously and will be let below the
market rate. The declaration to be signed by a social landlord is included within the
Eligibility and Pre-Retrofit Declaration form. This declaration form must be retained by
suppliers and be available on request for audit purposes.
3.102 Where social housing is let at or above the market rate, the property can be treated as
a private domestic premises, where the occupant meets the eligibility requirements.
See section on PRS from paragraph 1.13 for more information.
This method searches ALL of the possible measures that can be implemented under GBIS. This includes:
- cavity wall (including party wall)
- loft
- solid wall
- pitched roof
- flat roof
- under-floor
- solid floor
- park home
- room-in-roof
:return:
"""
self.cavity_insulation()
self.loft_insulation()
self.solid_wall_insulation()
self.room_roof_insulation()
self.flat_roof_insulation()
self.suspended_floor_insulation()
self.solid_floor_insulation()
current_sap = int(self.epc["current-energy-efficiency"])
is_below_e = current_sap <= 54
is_below_c = current_sap <= 68
needs_measure = (
self.cavity["suitability"] or
self.loft["suitability"] or
self.solid_wall["suitability"] or
self.room_roof["suitability"] or
self.flat_roof["suitability"] or
self.suspended_floor["suitability"] or
self.solid_floor["suitability"]
)
if self.tenure == "Rented (social)":
if is_below_c and (not is_below_e):
# this is a placeholder methodology
self.gbis = {
"eligible": int(self.epc["potential-energy-efficiency"]) > 68,
"message": "contingent on innovation measure delivery"
}
return
elif is_below_e:
self.gbis = {
"eligible": needs_measure,
"message": "eligible under fabric measure"
}
return
else:
self.gbis = {
"eligible": False,
"message": "not eligible"
}
return
elif self.tenure == "Rented (private)":
self.gbis = {
"eligible": is_below_c and needs_measure,
"message": "eligible under fabric measure"
}
return
elif self.tenure == "Owner-occupied":
self.gbis = {
"eligible": False,
"message": "Out-of-scope"
}
return
elif (self.tenure is None) or self.tenure == "unknown":
self.gbis = {
"eligible": needs_measure,
"message": "unknown tenure"
}
return
else:
raise ValueError("Implement me other tenure types")
def check_eco4(self):
"""
Because ECO4 supports nearly all measures. If we have commercial agreements in place then a large number
of homes would be eligible for eco funding, if identified.
These are the eligibility criteria we consider for this process:
Privately rented, Help to heat group
- Sap E-G
- Must receive one of solid wall insulation, first time central heating or district heating control
- The property must already have cavity walls and roof insulated
Social Housing, SAP D
- Innovation measures and insulation measures to meet the minimum insulation requirement
- Improvement to at least band C
- Fabric measures
- If receiving any heating measures, must have at least one insulation measure first
Social Housing, SAP E-G
- Insulation measures, first time central heating, renewable heating, district heating connection,
innovation measures
- Improvement to D (F & G properties) or C (E properties)
- If receiving any heating measure, must already have cavity and roof insulation
Privately rented, ECO4 Flex route 1, 2, 3, 4
- Must have SAP E-G
- Most measures eligible, but must receive one of solid wall insulation, first time central heating,
renewable heating and district heating control
- Improvement to D (F & G properties) or C (E properties)
- All homes receiving heating measures must first have insulated cavity/roof
The flex routes are given here:
https://so-eco.co.uk/what-is-eco4-flex/#:~:text=One%20way%20to%20gain%20ECO4,
including%20elderly%20residents%20and%20lodgers.
:return:
"""
self.cavity_insulation()
self.loft_insulation()
self.solid_wall_insulation()
self.room_roof_insulation()
self.flat_roof_insulation()
self.suspended_floor_insulation()
self.solid_floor_insulation()
current_sap = int(self.epc["current-energy-efficiency"])
is_below_e = current_sap <= 54
is_below_c = current_sap <= 68
sap_potential = int(self.epc["potential-energy-efficiency"])
first_time_central_heating = "boiler" not in self.epc["mainheat-description"].lower()
needs_fabric_measure = (
self.cavity["suitability"] or
self.loft["suitability"] or
self.solid_wall["suitability"] or
self.room_roof["suitability"] or
self.flat_roof["suitability"] or
self.suspended_floor["suitability"] or
self.solid_floor["suitability"]
)
if current_sap <= 38 and sap_potential >= 55:
# sap needs to get to at least a D
expected_to_meet_upgrades = True
elif current_sap <= 68 and sap_potential >= 69:
# sap needs to get to at least a C
expected_to_meet_upgrades = True
else:
expected_to_meet_upgrades = False
if self.tenure == "Rented (social)":
if is_below_c and (not is_below_e) and expected_to_meet_upgrades:
# If the property is a D, then it's eligible under innovation measures but requires improvement to a
# band C
self.eco4 = {
"eligible": True,
"message": "eligible under innovation measure and improvement to band C"
}
elif is_below_e and expected_to_meet_upgrades:
# If the property is an E or below, then it's eligible under fabric measures or heating/innovation
# measures
message = "eligible under fabric measures, with sufficient post retrofit sap improvement" if (
needs_fabric_measure) else (
"eligible under heating and innovation measures, with sufficient post retrofit sap improvement"
)
self.eco4 = {"eligible": True, "message": message}
else:
if (current_sap <= 68) and expected_to_meet_upgrades:
raise ValueError("something is wrong")
self.eco4 = {
"eligible": False,
"message": "not eligible, above EPC C"
}
return
if self.tenure == 'Rented (private)':
# For private homes, the property needs to be an E or below
# For private homes, the cavity must be filled and the roof insulated
cavity_filled = not self.cavity["suitability"]
roof_insulated = (not self.loft["suitability"]) and (not self.room_roof["suitability"]) and (
not self.flat_roof["suitability"])
if is_below_e and cavity_filled and roof_insulated and expected_to_meet_upgrades:
if self.solid_wall["suitability"]:
self.eco4 = {
"eligible": True,
"message": "eligible under solid wall insulation, conditional on post retrofit sap and help "
"to heat/ECO flex route"
}
elif first_time_central_heating:
self.eco4 = {
"eligible": True,
"message": "eligible under first time central heating, conditional on post retrofit sap and "
"help to heat/ECO flex route"
}
else:
self.eco4 = {
"eligible": False,
"message": "not eligible at this time"
}
return
else:
self.eco4 = {
"eligible": False,
"message": "not eligible at this time, EPC too high"
}
self.eco4 = {
"eligible": False,
"message": "Out of scope"
}

View file

@ -0,0 +1,6 @@
# Eligiblity
This codebase is responsible for determining if properties look like they would be
eligible for retrofit funding schemes. In order to do this, we use our SAP ML model, to score
what the property would look like after a retrofit. We then compare this to the eligibility
criteria of various schemes, to determing if the property looks likely to be eligible for funding.

View file

View file

@ -0,0 +1,664 @@
Housing Association,No.,Address,Postcode
HA15,2,2 Lander Road,HP19 9TT
HA15,4,4 Lander Road,HP19 9TT
HA15,5,5 Lander Road,HP19 9TT
HA15,12,12 Lander Road,HP19 9TT
HA15,14,14 Lander Road,HP19 9TT
HA15,18,18 Lander Road,HP19 9TT
HA15,22,22 Lander Road,HP19 9TT
HA15,1,1 Eeles Close,HP19 9TU
HA15,2,2 Eeles Close,HP19 9TU
HA15,3,3 Eeles Close,HP19 9TU
HA15,12,12 Eeles Close,HP19 9TU
HA15,15,15 Eeles Close,HP19 9TU
HA15,2,2 Dicks Way,HP19 9UA
HA15,4,4 Dicks Way,HP19 9UA
HA15,5,5 Dicks Way,HP19 9UA
HA15,6,6 Dicks Way,HP19 9UA
HA15,8,8 Dicks Way,HP19 9UA
HA15,9,9 Dicks Way,HP19 9UA
HA15,14,14 Dicks Way,HP19 9UA
HA15,15,15 Dicks Way,HP19 9UA
HA15,17,17 Dicks Way,HP19 9UA
HA15,20,20 Dicks Way,HP19 9UA
HA15,26,26 Dicks Way,HP19 9UA
HA15,28,28 Dicks Way,HP19 9UA
HA15,4,4 Fletcher Close,HP19 9UB
HA15,5,5 Fletcher Close,HP19 9UB
HA15,24,24 Fletcher Close,HP19 9UB
HA15,25,25 Fletcher Close,HP19 9UB
HA15,27,27 Fletcher Close,HP19 9UB
HA15,28,28 Fletcher Close,HP19 9UB
HA15,29,29 Fletcher Close,HP19 9UB
HA15,31,31 Fletcher Close,HP19 9UB
HA15,32,32 Fletcher Close,HP19 9UB
HA15,33,33 Fletcher Close,HP19 9UB
HA15,34,"34 Fletcher Close,Aylesbury,Bucks",HP19 9UB
HA15,1,1 Grimmer Close,HP19 9UD
HA15,11,11 Grimmer Close,HP19 9UD
HA15,14,14 Grimmer Close,HP19 9UD
HA15,15,15 Grimmer Close,HP19 9UD
HA15,17,17 Grimmer Close,HP19 9UD
HA15,18,18 Grimmer Close,HP19 9UD
HA15,21,21 Grimmer Close,HP19 9UD
HA15,23,23 Grimmer Close,HP19 9UD
HA15,24,24 Grimmer Close,HP19 9UD
HA15,28,28 Grimmer Close,HP19 9UD
HA15,30,30 Grimmer Close,HP19 9UD
HA15,1,1 Vincent Road,HP19 9UN
HA15,6,6 Vincent Road,HP19 9UN
HA15,10,10 Vincent Road,HP19 9UN
HA15,12,12 Vincent Road,HP19 9UN
HA15,13,13 Vincent Road,HP19 9UN
HA15,16,16 Vincent Road,HP19 9UN
HA15,21,21 Vincent Road,HP19 9UN
HA15,24,24 Vincent Road,HP19 9UN
HA15,26,26 Vincent Road,HP19 9UN
HA15,27,27 Vincent Road,HP19 9UN
HA15,32,32 Vincent Road,HP19 9UN
HA15,1,1 Reading Close,HP19 9UW
HA15,2,2 Reading Close,HP19 9UW
HA15,3,3 Reading Close,HP19 9UW
HA15,4,4 Reading Close,HP19 9UW
HA15,5,5 Reading Close,HP19 9UW
HA15,6,6 Reading Close,HP19 9UW
HA15,7,7 Reading Close,HP19 9UW
HA15,9,9 Reading Close,HP19 9UW
HA15,10,10 Reading Close,HP19 9UW
HA15,6,6 Mary Mac Manus Drive,MK18 1UN
HA15,8,8 Mary Mac Manus Drive,MK18 1UN
HA15,10,10 Mary Mac Manus Drive,MK18 1UN
HA15,2,"2 Rosebery Road Aston Clinton, Aylesbury",HP22 5JY
HA15,7,"7 Rosebery Road Aston Clinton, Aylesbury",HP22 5JY
HA15,9,"9 Rosebery Road Aston Clinton, Aylesbury",HP22 5JY
HA15,11,"11 Rosebery Road Aston Clinton, Aylesbury",HP22 5JY
HA15,12,"12 Rosebery Road Aston Clinton, Aylesbury",HP22 5JY
HA15,16,"16 Rosebery Road Aston Clinton, Aylesbury",HP22 5JY
HA15,17,"17 Rosebery Road Aston Clinton, Aylesbury",HP22 5JY
HA15,26,"26 Rosebery Road Aston Clinton, Aylesbury",HP22 5JY
HA15,38,"38 Rosebery Road Aston Clinton, Aylesbury",HP22 5JY
HA15,41,"41 Rosebery Road Aston Clinton, Aylesbury",HP22 5JY
HA15,25,"25 New Road Weston Turville, Aylesbury",HP22 5RA
HA15,27,"27 New Road Weston Turville, Aylesbury",HP22 5RA
HA15,29,"29 New Road Weston Turville, Aylesbury",HP22 5RA
HA15,31,"31 New Road Weston Turville, Aylesbury",HP22 5RA
HA15,37,"37 New Road Weston Turville, Aylesbury",HP22 5RA
HA15,39,"39 New Road Weston Turville, Aylesbury",HP22 5RA
HA15,5,"5 Walton Place Weston Turville, Aylesbury",HP22 5RB
HA15,9,"9 Walton Place Weston Turville, Aylesbury",HP22 5RB
HA15,18,"18 Walton Place Weston Turville, Aylesbury",HP22 5RB
HA15,21,"21 Walton Place Weston Turville, Aylesbury",HP22 5RD
HA15,36,"36 Walton Place Weston Turville, Aylesbury",HP22 5RD
HA15,42,"42 Walton Place Weston Turville, Aylesbury",HP22 5RD
HA15,46,"46 Walton Place Weston Turville, Aylesbury",HP22 5RD
HA15,76,"76 Worlds End Lane Weston Turville, Aylesbury",HP22 5RX
HA15,78,"78 Worlds End Lane Weston Turville, Aylesbury",HP22 5RX
HA15,82,"82 Worlds End Lane Weston Turville, Aylesbury",HP22 5RX
HA15,84,"84 Worlds End Lane Weston Turville, Aylesbury",HP22 5RX
HA15,86,"86 Worlds End Lane Weston Turville, Aylesbury",HP22 5RX
HA15,88,"88 Worlds End Lane Weston Turville, Aylesbury",HP22 5RX
HA15,64,"64 Halton Lane Wendover, Aylesbury",HP22 6AZ
HA15,66,"66 Halton Lane Wendover, Aylesbury",HP22 6AZ
HA15,68,"68 Halton Lane Wendover, Aylesbury",HP22 6AZ
HA15,70,"70 Halton Lane Wendover, Aylesbury",HP22 6AZ
HA15,8,"8 South Street Wendover, Aylesbury",HP22 6EF
HA15,2,"2 Barlow Road Wendover, Aylesbury",HP22 6HP
HA15,4,"4 Barlow Road Wendover, Aylesbury",HP22 6HP
HA15,14,"14 Barlow Road Wendover, Aylesbury",HP22 6HP
HA15,15,"15 Barlow Road Wendover, Aylesbury",HP22 6HP
HA15,16,"16 Barlow Road Wendover, Aylesbury",HP22 6HP
HA15,28,"28 Barlow Road Wendover, Aylesbury",HP22 6HP
HA15,1,"1 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
HA15,5,"5 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
HA15,7,"7 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
HA15,8,"8 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
HA15,9,"9 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
HA15,13,"13 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
HA15,16,"16 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
HA15,20,"20 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
HA15,24,"24 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
HA15,26,"26 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
HA15,28,"28 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
HA15,38,"38 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
HA15,44,"44 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
HA15,50,"50 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
HA15,15,"15 Hampden Road Wendover, Aylesbury",HP22 6HU
HA15,18,"18 Hampden Road Wendover, Aylesbury",HP22 6HU
HA15,22,"22 Hampden Road Wendover, Aylesbury",HP22 6HU
HA15,26,"26 Hampden Road Wendover, Aylesbury",HP22 6HU
HA15,28,"28 Hampden Road Wendover, Aylesbury",HP22 6HU
HA15,25,"25 Hampden Road Wendover, Aylesbury",HP22 6HX
HA15,27,"27 Hampden Road Wendover, Aylesbury",HP22 6HX
HA15,31,"31 Hampden Road Wendover, Aylesbury",HP22 6HX
HA15,34,"34 Hampden Road Wendover, Aylesbury",HP22 6HX
HA15,36,"36 Hampden Road Wendover, Aylesbury",HP22 6HX
HA15,38,"38 Hampden Road Wendover, Aylesbury",HP22 6HX
HA15,5,"5 Gainsborough Road, Aylesbury",HP21 9AZ
HA15,1,"1 Dart Close, Aylesbury",HP21 9NP
HA15,1,"1 Wingrave Road Aston Abbotts, Aylesbury",HP22 4LT
HA15,3,"3 Wingrave Road Aston Abbotts, Aylesbury",HP22 4LT
HA15,5,"5 Wingrave Road Aston Abbotts, Aylesbury",HP22 4LT
HA15,82,"82 Winslow Road Wingrave, Aylesbury",HP22 4QB
HA15,84,"84 Winslow Road Wingrave, Aylesbury",HP22 4QB
HA15,106,"106 Winslow Road Wingrave, Aylesbury",HP22 4QB
HA15,125,"125 Winslow Road Wingrave, Aylesbury",HP22 4QB
HA15,19,"19 Abbotts Way Wingrave, Aylesbury",HP22 4QF
HA15,37,"37 Abbotts Way Wingrave, Aylesbury",HP22 4QF
HA15,41,"41 Abbotts Way Wingrave, Aylesbury",HP22 4QF
HA15,43,"43 Abbotts Way Wingrave, Aylesbury",HP22 4QF
HA15,2,"2 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
HA15,5,"5 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
HA15,10,"10 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
HA15,12,"12 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
HA15,19,"19 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
HA15,21,"21 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
HA15,22,"22 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
HA15,31,"31 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
HA15,32,"32 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
HA15,33,"33 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
HA15,34,"34 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
HA15,35,"35 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
HA15,37,"37 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
HA15,38,"38 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
HA15,40,"40 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
HA15,42,"42 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
HA15,23,"23 Great Lane Bierton, Aylesbury",HP22 5DE
HA15,25,"25 Great Lane Bierton, Aylesbury",HP22 5DE
HA15,35,"35 Great Lane Bierton, Aylesbury",HP22 5DE
HA15,37,"37 Great Lane Bierton, Aylesbury",HP22 5DE
HA15,61,"61 Weston Road Aston Clinton, Aylesbury",HP22 5EJ
HA15,65,"65 Weston Road Aston Clinton, Aylesbury",HP22 5EJ
HA15,67,"67 Weston Road Aston Clinton, Aylesbury",HP22 5EJ
HA15,69,"69 Weston Road Aston Clinton, Aylesbury",HP22 5EJ
HA15,28,"28a Tring Road Wendover, Aylesbury",HP22 6NT
HA15,38,"38a Tring Road Wendover, Aylesbury",HP22 6NT
HA15,14,"14 Tring Road Wendover, Aylesbury",HP22 6NT
HA15,34,"34 Tring Road Wendover, Aylesbury",HP22 6NT
HA15,36,"36 Tring Road Wendover, Aylesbury",HP22 6NT
HA15,64,"64 Tring Road Wendover, Aylesbury",HP22 6NX
HA15,68,"68 Tring Road Wendover, Aylesbury",HP22 6NX
HA15,70,"70 Tring Road Wendover, Aylesbury",HP22 6NX
HA15,74,"74 Tring Road Wendover, Aylesbury",HP22 6NX
HA15,76,"76 Tring Road Wendover, Aylesbury",HP22 6NX
HA15,78,"78 Tring Road Wendover, Aylesbury",HP22 6NX
HA15,80,"80 Tring Road Wendover, Aylesbury",HP22 6NX
HA15,90,"90 Tring Road Wendover, Aylesbury",HP22 6NX
HA15,92,"92 Tring Road Wendover, Aylesbury",HP22 6NX
HA15,100,"100 Tring Road Wendover, Aylesbury",HP22 6NX
HA15,104,"104 Tring Road Wendover, Aylesbury",HP22 6NX
HA15,106,"106 Tring Road Wendover, Aylesbury",HP22 6NX
HA15,108,"108 Tring Road Wendover, Aylesbury",HP22 6NX
HA15,114,"114 Tring Road Wendover, Aylesbury",HP22 6NX
HA15,38,"38 The Beeches Wendover, Aylesbury",HP22 6PB
HA15,49,"49 The Beeches Wendover, Aylesbury",HP22 6PB
HA15,54,"54 The Beeches Wendover, Aylesbury",HP22 6PB
HA15,64,"64 The Beeches Wendover, Aylesbury",HP22 6PB
HA15,1,"1 Church End Edlesborough, Dunstable",LU6 2EP
HA15,2,"2 Church End Edlesborough, Dunstable",LU6 2EP
HA15,5,"5 Church End Edlesborough, Dunstable",LU6 2EP
HA15,6,"6 Church End Edlesborough, Dunstable",LU6 2EP
HA15,7,"7 Church End Edlesborough, Dunstable",LU6 2EP
HA15,9,"9 Church End Edlesborough, Dunstable",LU6 2EP
HA15,125,"125 High Street Edlesborough, Dunstable",LU6 2ER
HA15,6,"6 Dove Street Stewkley, Leighton Buzzard",LU7 0HT
HA15,14,"14 Wantage Crescent Wing, Leighton Buzzard",LU7 0NH
HA15,32,"32 Wantage Crescent Wing, Leighton Buzzard",LU7 0NH
HA15,38,"38a Wantage Crescent Wing, Leighton Buzzard",LU7 0NH
HA15,38,"38b Wantage Crescent Wing, Leighton Buzzard",LU7 0NH
HA15,75,"75 High Street Cheddington, Leighton Buzzard",LU7 0RG
HA15,12,"12 New Street Cheddington, Leighton Buzzard",LU7 0RL
HA15,14,"14 New Street Cheddington, Leighton Buzzard",LU7 0RL
HA15,16,"16 New Street Cheddington, Leighton Buzzard",LU7 0RL
HA15,2,"2 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
HA15,4,"4 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
HA15,10,"10 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
HA15,11,"11 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
HA15,17,"17 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
HA15,19,"19 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
HA15,20,"20 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
HA15,23,"23 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
HA15,25,"25 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
HA15,26,"26 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
HA15,28,"28 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
HA15,31,"31 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
HA15,33,"33 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
HA15,36,"36 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
HA15,40,"40 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
HA15,4,"4 Barkham Close Cheddington, Leighton Buzzard",LU7 0RT
HA15,4,"4 Manor Road Cheddington, Leighton Buzzard",LU7 0RW
HA15,7,"7 Manor Road Cheddington, Leighton Buzzard",LU7 0RW
HA15,8,"8 Manor Road Cheddington, Leighton Buzzard",LU7 0RW
HA15,10,"10 Manor Road Cheddington, Leighton Buzzard",LU7 0RW
HA15,11,"11 Manor Road Cheddington, Leighton Buzzard",LU7 0RW
HA15,61,"61 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
HA15,69,"69 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
HA15,71,"71 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
HA15,75,"75 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
HA15,85,"85 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
HA15,87,"87 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
HA15,89,"89 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
HA15,95,"95 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
HA15,101,"101 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
HA15,103,"103 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
HA15,125,"125 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
HA15,129,"129 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
HA15,133,"133 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
HA15,141,"141 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
HA15,151,"151 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
HA15,48,"48 Station Road Ivinghoe, Leighton Buzzard",LU7 9EB
HA15,52,"52 Station Road Ivinghoe, Leighton Buzzard",LU7 9EB
HA15,54,"54 Station Road Ivinghoe, Leighton Buzzard",LU7 9EB
HA15,58,"58 Station Road Ivinghoe, Leighton Buzzard",LU7 9EB
HA15,1,"1 Maud Janes Close Ivinghoe, Leighton Buzzard",LU7 9ED
HA15,3,"3 Maud Janes Close Ivinghoe, Leighton Buzzard",LU7 9ED
HA15,12,"12 Maud Janes Close Ivinghoe, Leighton Buzzard",LU7 9ED
HA15,26,"26 Ladysmith Road Ivinghoe, Leighton Buzzard",LU7 9EE
HA15,24,"24 High Street Ivinghoe, Leighton Buzzard",LU7 9EX
HA15,26,"26 High Street Ivinghoe, Leighton Buzzard",LU7 9EX
HA15,28,"28 High Street Ivinghoe, Leighton Buzzard",LU7 9EX
HA15,30,"30 High Street Ivinghoe, Leighton Buzzard",LU7 9EX
HA15,32,"32 High Street Ivinghoe, Leighton Buzzard",LU7 9EX
HA15,3,"3 Stonebridge Road, Aylesbury",HP19 9LX
HA15,102,"102 Coventon Road, Aylesbury",HP19 9ND
HA15,83,"83 Priory Crescent, Aylesbury",HP19 9NY
HA15,103,"103 Priory Crescent, Aylesbury",HP19 9NY
HA15,83,"83 Weedon Road, Aylesbury",HP19 9PA
HA15,7,"7 Haines Close, Aylesbury",HP19 9TS
HA15,8,"8 Haines Close, Aylesbury",HP19 9TS
HA15,9,"9 Haines Close, Aylesbury",HP19 9TS
HA15,13,"13 Haines Close, Aylesbury",HP19 9TS
HA15,22,"22 Haines Close, Aylesbury",HP19 9TS
HA15,39,"39 Haines Close, Aylesbury",HP19 9TS
HA15,45,"45 Haines Close, Aylesbury",HP19 9TS
HA15,27,"27 Oakfield Road, Aylesbury",HP20 1LH
HA15,11,"11 Wingate Walk, Aylesbury",HP20 1LN
HA15,9,"9 Stanhope Road, Aylesbury",HP20 1LP
HA15,28,"28 Stanhope Road, Aylesbury",HP20 1LR
HA15,12,"12 Cleveland Road, Aylesbury",HP20 2AZ
HA15,20,"20 Cleveland Road, Aylesbury",HP20 2AZ
HA15,22,"22 Cleveland Road, Aylesbury",HP20 2AZ
HA15,7,"7 Bryanston Avenue, Aylesbury",HP20 2BA
HA15,17,"17 Bryanston Avenue, Aylesbury",HP20 2BA
HA15,36,"36 Bryanston Avenue, Aylesbury",HP20 2BA
HA15,38,"38 Bryanston Avenue, Aylesbury",HP20 2BA
HA15,6,"6 Matlock Road, Aylesbury",HP20 2BE
HA15,9,"9 Lisburn Path, Aylesbury",HP20 2BQ
HA15,15,"15 Lisburn Path, Aylesbury",HP20 2BQ
HA15,3,"3 Lansdowne Road, Aylesbury",HP20 2DJ
HA15,15,"15 Lansdowne Road, Aylesbury",HP20 2DJ
HA15,4,"4 Caversham Green, Aylesbury",HP20 2DL
HA15,1,"1 Davies Close, Aylesbury",HP20 2SH
HA15,62,"62 Stoke Road, Aylesbury",HP21 8BX
HA15,64,"64 Stoke Road, Aylesbury",HP21 8BX
HA15,78,"78 Stoke Road, Aylesbury",HP21 8BX
HA15,4,"4 Court Close, Aylesbury",HP21 8BY
HA15,7,"7 Clover Lane, Aylesbury",HP21 8DQ
HA15,25,"25 Clover Lane, Aylesbury",HP21 8DQ
HA15,31,"31 Clover Lane, Aylesbury",HP21 8DQ
HA15,53,"53 Birch Court, Aylesbury",HP21 8DS
HA15,59,"59 Birch Court, Aylesbury",HP21 8DS
HA15,74,"74 Thrasher Road, Aylesbury",HP21 8DX
HA15,2,"2 Vicarage Road, Aylesbury",HP21 8EU
HA15,8,"8 Vicarage Road, Aylesbury",HP21 8EU
HA15,126,"126 Penn Road, Aylesbury",HP21 8JS
HA15,128,"128 Penn Road, Aylesbury",HP21 8JS
HA15,140,"140 Penn Road, Aylesbury",HP21 8JS
HA15,144,"144 Penn Road, Aylesbury",HP21 8JS
HA15,146,"146 Penn Road, Aylesbury",HP21 8JS
HA15,4,"4 Montague Road, Aylesbury",HP21 8JT
HA15,132,"132 Prebendal Avenue, Aylesbury",HP21 8LF
HA15,134,"134 Prebendal Avenue, Aylesbury",HP21 8LF
HA15,138,"138 Prebendal Avenue, Aylesbury",HP21 8LF
HA15,140,"140 Prebendal Avenue, Aylesbury",HP21 8LF
HA15,144,"144 Prebendal Avenue, Aylesbury",HP21 8LF
HA15,15,"15 Oak Green, Aylesbury",HP21 8LJ
HA15,59,"59 Paterson Road, Aylesbury",HP21 8LW
HA15,37,"37 Thame Road, Aylesbury",HP21 8LX
HA15,95,"95 Thame Road, Aylesbury",HP21 8LY
HA15,3,"3 Edinburgh Place, Aylesbury",HP21 8NG
HA15,52,"52 Carrington Road, Aylesbury",HP21 8NL
HA15,9,"9 Hartwell End, Aylesbury",HP21 8NZ
HA15,12,"12 Hartwell End, Aylesbury",HP21 8NZ
HA15,21,"21 Hartwell End, Aylesbury",HP21 8PA
HA15,64,"64 Lavric Road, Aylesbury",HP21 8PF
HA15,8,"8 Cooks Lane Mursley, Milton Keynes",MK17 0RU
HA15,47,"47 Green End Great Brickhill, Milton Keynes",MK17 9AT
HA15,14,"14 Green End Great Brickhill, Milton Keynes",MK17 9AU
HA15,63,"63 Bourtonville, Buckingham",MK18 1AY
HA15,2,"2 Bath Lane Terrace, Buckingham",MK18 1DY
HA15,3,"3 Bath Lane Terrace, Buckingham",MK18 1DY
HA15,4,"4 Bath Lane Terrace, Buckingham",MK18 1DY
HA15,3,"3 Westfields, Buckingham",MK18 1DZ
HA15,5,"5 Westfields, Buckingham",MK18 1DZ
HA15,6,"6 Westfields, Buckingham",MK18 1DZ
HA15,8,"8 Westfields, Buckingham",MK18 1DZ
HA15,10,"10 Westfields, Buckingham",MK18 1DZ
HA15,13,"13 Westfields, Buckingham",MK18 1DZ
HA15,14,"14 Westfields, Buckingham",MK18 1DZ
HA15,15,"15 Westfields, Buckingham",MK18 1DZ
HA15,18,"18 Westfields, Buckingham",MK18 1DZ
HA15,19,"19 Westfields, Buckingham",MK18 1DZ
HA15,20,"20 Westfields, Buckingham",MK18 1DZ
HA15,21,"21 Westfields, Buckingham",MK18 1DZ
HA15,24,"24 Westfields, Buckingham",MK18 1DZ
HA15,27,"27 Westfields, Buckingham",MK18 1DZ
HA15,28,"28 Westfields, Buckingham",MK18 1DZ
HA15,29,"29 Westfields, Buckingham",MK18 1DZ
HA15,31,"31 Westfields, Buckingham",MK18 1DZ
HA15,32,"32 Westfields, Buckingham",MK18 1DZ
HA15,35,"35 Westfields, Buckingham",MK18 1DZ
HA15,49,"49 Westfields, Buckingham",MK18 1DZ
HA15,51,"51 Westfields, Buckingham",MK18 1DZ
HA15,53,"53 Westfields, Buckingham",MK18 1DZ
HA15,55,"55 Westfields, Buckingham",MK18 1DZ
HA15,57,"57 Westfields, Buckingham",MK18 1DZ
HA15,60,"60 Westfields, Buckingham",MK18 1DZ
HA15,2,"2 Grenville Road, Buckingham",MK18 1LR
HA15,118,"118 Western Avenue, Buckingham",MK18 1LS
HA15,5,"5 South Hall Maids Moreton, Buckingham",MK18 1QB
HA15,2,"2 Church Close Maids Moreton, Buckingham",MK18 1QG
HA15,5,"5 Church Close Maids Moreton, Buckingham",MK18 1QG
HA15,7,"7 Church Close Maids Moreton, Buckingham",MK18 1QG
HA15,1,"1 The Leys Main Street, Buckingham",MK18 1QT
HA15,31a,"31a Springfields Padbury, Buckingham",MK18 2AT
HA15,31b,"31b Springfields Padbury, Buckingham",MK18 2AT
HA15,1,"1 Arnolds Close Padbury, Buckingham",MK18 2BG
HA15,42,"42 Victory Road Steeple Claydon, Buckingham",MK18 2NY
HA15,50,"50 Victory Road Steeple Claydon, Buckingham",MK18 2NY
HA15,4,"4 Falklands Close Steeple Claydon, Buckingham",MK18 2PN
HA15,8,"8 Falklands Close Steeple Claydon, Buckingham",MK18 2PN
HA15,10,"10 Falklands Close Steeple Claydon, Buckingham",MK18 2PN
HA15,12,"12 Falklands Close Steeple Claydon, Buckingham",MK18 2PN
HA15,11,"11 Vicarage Lane Steeple Claydon, Buckingham",MK18 2PR
HA15,62,"62 Vicarage Lane Steeple Claydon, Buckingham",MK18 2PR
HA15,64,"64 Vicarage Lane Steeple Claydon, Buckingham",MK18 2PR
HA15,3,"3 Pound Close Steeple Claydon, Buckingham",MK18 2QL
HA15,4,"4 Pound Close Steeple Claydon, Buckingham",MK18 2QL
HA15,6,"6 Oak Leys Steeple Claydon, Buckingham",MK18 2RQ
HA15,8,"8 Oak Leys Steeple Claydon, Buckingham",MK18 2RQ
HA15,8,"8 Old Mill Furlong Winslow, Buckingham",MK18 3EX
HA15,23,"23 Old Mill Furlong Winslow, Buckingham",MK18 3EX
HA15,24,"24 Old Mill Furlong Winslow, Buckingham",MK18 3EX
HA15,25,"25 Old Mill Furlong Winslow, Buckingham",MK18 3EX
HA15,30,"30 Old Mill Furlong Winslow, Buckingham",MK18 3EX
HA15,32,"32 Old Mill Furlong Winslow, Buckingham",MK18 3EX
HA15,34,"34 Old Mill Furlong Winslow, Buckingham",MK18 3EX
HA15,1,"1 Roberts Road Haddenham, Aylesbury",HP17 8HH
HA15,6,"6 Roberts Road Haddenham, Aylesbury",HP17 8HH
HA15,11,"11 Roberts Road Haddenham, Aylesbury",HP17 8HH
HA15,15,"15 Roberts Road Haddenham, Aylesbury",HP17 8HH
HA15,17,"17 Roberts Road Haddenham, Aylesbury",HP17 8HH
HA15,18,"18 Roberts Road Haddenham, Aylesbury",HP17 8HH
HA15,38,"38 Roberts Road Haddenham, Aylesbury",HP17 8HH
HA15,3,"3 Harts Road Haddenham, Aylesbury",HP17 8HJ
HA15,9,"9 Harts Road Haddenham, Aylesbury",HP17 8HJ
HA15,11,"11 Harts Road Haddenham, Aylesbury",HP17 8HJ
HA15,16,"16 Harts Road Haddenham, Aylesbury",HP17 8HJ
HA15,18,"18 Harts Road Haddenham, Aylesbury",HP17 8HJ
HA15,22,"22 Harts Road Haddenham, Aylesbury",HP17 8HJ
HA15,2,"2 Willis Road Haddenham, Aylesbury",HP17 8HL
HA15,4,"4 Willis Road Haddenham, Aylesbury",HP17 8HL
HA15,5,"5 Willis Road Haddenham, Aylesbury",HP17 8HL
HA15,8,"8 Willis Road Haddenham, Aylesbury",HP17 8HL
HA15,20,"20 Willis Road Haddenham, Aylesbury",HP17 8HL
HA15,21,"21 Willis Road Haddenham, Aylesbury",HP17 8HL
HA15,22,"22 Willis Road Haddenham, Aylesbury",HP17 8HL
HA15,26,"26 Willis Road Haddenham, Aylesbury",HP17 8HL
HA15,29,"29 Willis Road Haddenham, Aylesbury",HP17 8HL
HA15,31,"31 Willis Road Haddenham, Aylesbury",HP17 8HL
HA15,33,"33 Willis Road Haddenham, Aylesbury",HP17 8HL
HA15,35,"35 Willis Road Haddenham, Aylesbury",HP17 8HL
HA15,37,"37 Willis Road Haddenham, Aylesbury",HP17 8HL
HA15,39,"39 Willis Road Haddenham, Aylesbury",HP17 8HL
HA15,5,"5 Woodways Haddenham, Aylesbury",HP17 8HW
HA15,7,"7 Woodways Haddenham, Aylesbury",HP17 8HW
HA15,13,"13 Woodways Haddenham, Aylesbury",HP17 8HW
HA15,19,"19 Woodways Haddenham, Aylesbury",HP17 8HW
HA15,1,"1 Woodlands Butte Furlong, Aylesbury",HP17 8JE
HA15,2,"2 Franklin Road Haddenham, Aylesbury",HP17 8LE
HA15,8,"8 Franklin Road Haddenham, Aylesbury",HP17 8LE
HA15,129,"129 Churchway Haddenham, Aylesbury",HP17 8LG
HA15,133,"133 Churchway Haddenham, Aylesbury",HP17 8LG
HA15,135,"135 Churchway Haddenham, Aylesbury",HP17 8LG
HA15,147,"147 Churchway Haddenham, Aylesbury",HP17 8LG
HA15,7,"7 Bishopstone Road Stone, Aylesbury",HP17 8QX
HA15,33,"33 Bishopstone Road Stone, Aylesbury",HP17 8QX
HA15,8,"8 Chiltern Avenue Stone, Aylesbury",HP17 8QY
HA15,20,"20 Chiltern Avenue Stone, Aylesbury",HP17 8QY
HA15,28,"28 Chiltern Avenue Stone, Aylesbury",HP17 8QY
HA15,32,"32 Chiltern Avenue Stone, Aylesbury",HP17 8QY
HA15,34,"34 Chiltern Avenue Stone, Aylesbury",HP17 8QY
HA15,46,"46 Chiltern Avenue Stone, Aylesbury",HP17 8QY
HA15,60,"60 Chiltern Avenue Stone, Aylesbury",HP17 8QY
HA15,62,"62 Chiltern Avenue Stone, Aylesbury",HP17 8QY
HA15,7,"7 Chiltern Avenue Stone, Aylesbury",HP17 8QZ
HA15,13,"13 Chiltern Avenue Stone, Aylesbury",HP17 8QZ
HA15,33,"33 Chiltern Avenue Stone, Aylesbury",HP17 8QZ
HA15,41,"41 Chiltern Avenue Stone, Aylesbury",HP17 8QZ
HA15,14,"14 Chiltern Close Stone, Aylesbury",HP17 8RA
HA15,17,"17 Chiltern Close Stone, Aylesbury",HP17 8RA
HA15,10,"10 Round Hill Stone, Aylesbury",HP17 8RD
HA15,16,"16 Round Hill Stone, Aylesbury",HP17 8RD
HA15,7,"7 Round Hill Stone, Aylesbury",HP17 8RE
HA15,17,"17 Round Hill Stone, Aylesbury",HP17 8RE
HA15,23,"23 Round Hill Stone, Aylesbury",HP17 8RE
HA15,59,"59 Bishopstone Road Stone, Aylesbury",HP17 8RX
HA15,1,"1 Bittenham Close Stone, Aylesbury",HP17 8RY
HA15,7,"7 Bittenham Close Stone, Aylesbury",HP17 8RY
HA15,1,"1 New Road Dinton, Aylesbury",HP17 8UU
HA15,3,"3 New Road Dinton, Aylesbury",HP17 8UU
HA15,8,"8 New Road Dinton, Aylesbury",HP17 8UU
HA15,1,"1 Bernard Close Cuddington, Aylesbury",HP18 0AJ
HA15,4,"4 Bernard Close Cuddington, Aylesbury",HP18 0AJ
HA15,7,"7 Bernard Close Cuddington, Aylesbury",HP18 0AJ
HA15,12,"12 Bernard Close Cuddington, Aylesbury",HP18 0AJ
HA15,19,"19 Bernard Close Cuddington, Aylesbury",HP18 0AJ
HA15,22,"22 Bernard Close Cuddington, Aylesbury",HP18 0AJ
HA15,34,"34 Bernard Close Cuddington, Aylesbury",HP18 0AJ
HA15,39,"39 Bernard Close Cuddington, Aylesbury",HP18 0AJ
HA15,41,"41 Bernard Close Cuddington, Aylesbury",HP18 0AJ
HA15,7,"7 Hillside Cottages Dadbrook, Aylesbury",HP18 0AQ
HA15,10,"10 Hillside Cottages Dadbrook, Aylesbury",HP18 0AQ
HA15,11,"11 Hillside Cottages Dadbrook, Aylesbury",HP18 0AQ
HA15,7,"7 Swan Hill Aylesbury Road, Aylesbury",HP18 0BE
HA15,10,"10 Swan Hill Aylesbury Road, Aylesbury",HP18 0BE
HA15,1,"1 Grove Way Waddesdon, Aylesbury",HP18 0LH
HA15,6,"6 Grove Way Waddesdon, Aylesbury",HP18 0LH
HA15,7,"7 Grove Way Waddesdon, Aylesbury",HP18 0LH
HA15,1,"1 Sheriff Cottages Quainton Road, Aylesbury",HP18 0LT
HA15,2,"2 Sheriff Cottages Quainton Road, Aylesbury",HP18 0LT
HA15,3,"3 Sheriff Cottages Quainton Road, Aylesbury",HP18 0LT
HA15,5,"5 Sheriff Cottages Quainton Road, Aylesbury",HP18 0LT
HA15,6,"6 Sheriff Cottages Quainton Road, Aylesbury",HP18 0LT
HA15,7,"7 Sheriff Cottages Quainton Road, Aylesbury",HP18 0LT
HA15,9,"9 Sheriff Cottages Quainton Road, Aylesbury",HP18 0LT
HA15,21,"21 Goss Avenue Waddesdon, Aylesbury",HP18 0LY
HA15,86,"86 Sharps Close Waddesdon, Aylesbury",HP18 0LZ
HA15,88,"88 Sharps Close Waddesdon, Aylesbury",HP18 0LZ
HA15,3,"3 Hilltop Long Crendon, Aylesbury",HP18 9AT
HA15,4,"4 Hilltop Long Crendon, Aylesbury",HP18 9AT
HA15,1A,"1a Hilltop Long Crendon, Aylesbury",HP18 9AT
HA15,3A,"3a Hilltop Long Crendon, Aylesbury",HP18 9AT
HA15,26,"26 Peascroft Long Crendon, Aylesbury",HP18 9AU
HA15,30,"30 Peascroft Long Crendon, Aylesbury",HP18 9AU
HA15,52,"52 Peascroft Long Crendon, Aylesbury",HP18 9AU
HA15,11,"11 Harroell Long Crendon, Aylesbury",HP18 9AY
HA15,13,"13 Harroell Long Crendon, Aylesbury",HP18 9AY
HA15,14,"14 Harroell Long Crendon, Aylesbury",HP18 9AY
HA15,2,"2 Abbot Ridge Long Crendon, Aylesbury",HP18 9AZ
HA15,14,"14 Abbot Ridge Long Crendon, Aylesbury",HP18 9AZ
HA15,18,"18 Abbot Ridge Long Crendon, Aylesbury",HP18 9AZ
HA15,26,"26 Abbot Ridge Long Crendon, Aylesbury",HP18 9AZ
HA15,5,"5 Meadowbank Close Long Crendon, Aylesbury",HP18 9DH
HA15,11,"11 Bonnersfield Long Crendon, Aylesbury",HP18 9DJ
HA15,14,"14 Bonnersfield Long Crendon, Aylesbury",HP18 9DJ
HA15,16,"16 Bonnersfield Long Crendon, Aylesbury",HP18 9DJ
HA15,26,"26 Bonnersfield Long Crendon, Aylesbury",HP18 9DJ
HA15,28,"28 Bonnersfield Long Crendon, Aylesbury",HP18 9DJ
HA15,29,"29 Bonnersfield Long Crendon, Aylesbury",HP18 9DJ
HA15,30,"30 Bonnersfield Long Crendon, Aylesbury",HP18 9DJ
HA15,32,"32 Bonnersfield Long Crendon, Aylesbury",HP18 9DJ
HA15,36,"36 Giffard Way Long Crendon, Aylesbury",HP18 9DN
HA15,45,"45 Giffard Way Long Crendon, Aylesbury",HP18 9DN
HA15,52,"52 Giffard Way Long Crendon, Aylesbury",HP18 9DN
HA15,10,"10 Coltman Avenue Long Crendon, Aylesbury",HP18 9DP
HA15,11,"11 Coltman Avenue Long Crendon, Aylesbury",HP18 9DP
HA15,12,"12 Coltman Avenue Long Crendon, Aylesbury",HP18 9DP
HA15,14,"14 Coltman Avenue Long Crendon, Aylesbury",HP18 9DP
HA15,16,"16 Coltman Avenue Long Crendon, Aylesbury",HP18 9DP
HA15,22,"22 Coltman Avenue Long Crendon, Aylesbury",HP18 9DP
HA15,25,"25 Coltman Avenue Long Crendon, Aylesbury",HP18 9DP
HA15,26,"26 Coltman Avenue Long Crendon, Aylesbury",HP18 9DP
HA15,27,"27 Coltman Avenue Long Crendon, Aylesbury",HP18 9DP
HA15,32,"32 Friars Furlong Long Crendon, Aylesbury",HP18 9DQ
HA15,4,"4 Highfield Long Crendon, Aylesbury",HP18 9DR
HA15,5,"5 Highfield Long Crendon, Aylesbury",HP18 9DR
HA15,8,"8 Highfield Long Crendon, Aylesbury",HP18 9DR
HA15,9,"9 Highfield Long Crendon, Aylesbury",HP18 9DR
HA15,10,"10 Highfield Long Crendon, Aylesbury",HP18 9DR
HA15,11,"11 Highfield Long Crendon, Aylesbury",HP18 9DR
HA15,14,"14 Highfield Long Crendon, Aylesbury",HP18 9DR
HA15,17,"17 Highfield Long Crendon, Aylesbury",HP18 9DR
HA15,18,"18 Highfield Long Crendon, Aylesbury",HP18 9DR
HA15,20,"20 Highfield Long Crendon, Aylesbury",HP18 9DR
HA15,23,"23 Highfield Long Crendon, Aylesbury",HP18 9DR
HA15,24,"24 Highfield Long Crendon, Aylesbury",HP18 9DR
HA15,14b,"14b Highfield Long Crendon, Aylesbury",HP18 9DR
HA15,4,"4 Giffard Way Long Crendon, Aylesbury",HP18 9DW
HA15,13,"13 Giffard Way Long Crendon, Aylesbury",HP18 9DW
HA15,14,"14 Giffard Way Long Crendon, Aylesbury",HP18 9DW
HA15,24,"24 St. Annes Road, Aylesbury",HP19 7RB
HA15,55,"55 St. Annes Road, Aylesbury",HP19 7RB
HA15,6,"6 Palmer Avenue, Aylesbury",HP19 8EF
HA15,18,"18 Palmer Avenue, Aylesbury",HP19 8EF
HA15,20,"20 Palmer Avenue, Aylesbury",HP19 8EF
HA15,24,"24 Palmer Avenue, Aylesbury",HP19 8EF
HA15,25,"25 Palmer Avenue, Aylesbury",HP19 8EF
HA15,1,"1 Gatehouse Road, Aylesbury",HP19 8EH
HA15,10,"10 Gatehouse Road, Aylesbury",HP19 8EH
HA15,12,"12 Gatehouse Road, Aylesbury",HP19 8EH
HA15,53,"53 Oxford Road, Aylesbury",HP19 8EQ
HA15,59,"59 Oxford Road, Aylesbury",HP19 8EQ
HA15,2,"2 Lander Road,Aylesbury,Bucks",HP19 9TT
HA15,30,"30 Lander Road,Aylesbury,Bucks",HP19 9TT
HA15,31,"31 Lander Road,Aylesbury,Bucks",HP19 9TT
HA15,32,"32 Lander Road,Aylesbury,Bucks",HP19 9TT
HA15,3,"3 Eeles Close,Aylesbury,Bucks",HP19 9TU
HA15,5,"5 Eeles Close,Aylesbury,Bucks",HP19 9TU
HA15,6,"6 Eeles Close,Aylesbury,Bucks",HP19 9TU
HA15,7,"7 Eeles Close,Aylesbury,Bucks",HP19 9TU
HA15,8,"8 Eeles Close,Aylesbury,Bucks",HP19 9TU
HA15,9,"9 Eeles Close,Aylesbury,Bucks",HP19 9TU
HA15,10,"10 Eeles Close,Aylesbury,Bucks",HP19 9TU
HA15,15,"15 Eeles Close,Aylesbury,Bucks",HP19 9TU
HA15,17,"17 Dicks Way,Aylesbury,Bucks",HP19 9UA
HA15,20,"20 Dicks Way,Aylesbury,Bucks",HP19 9UA
HA15,28,"28 Dicks Way,Aylesbury,Bucks",HP19 9UA
HA15,30,"30 Dicks Way,Aylesbury,Bucks",HP19 9UA
HA15,32,"32 Dicks Way,Aylesbury,Bucks",HP19 9UA
HA15,34,"34 Dicks Way,Aylesbury,Bucks",HP19 9UA
HA15,36,"36 Dicks Way,Aylesbury,Bucks",HP19 9UA
HA15,7,"7 Fletcher Close,Aylesbury,Bucks",HP19 9UB
HA15,8,"8 Fletcher Close,Aylesbury,Bucks",HP19 9UB
HA15,10,"10 Fletcher Close,Aylesbury,Bucks",HP19 9UB
HA15,11,"11 Fletcher Close,Aylesbury,Bucks",HP19 9UB
HA15,12,"12 Fletcher Close,Aylesbury,Bucks",HP19 9UB
HA15,25,"25 Fletcher Close,Aylesbury,Bucks",HP19 9UB
HA15,33,"33 Fletcher Close,Aylesbury,Bucks",HP19 9UB
HA15,34,"34 Fletcher Close,Aylesbury,Bucks",HP19 9UB
HA15,11,"11 Grimmer Close,Aylesbury,Bucks",HP19 9UD
HA15,14,"14 Grimmer Close,Aylesbury,Bucks",HP19 9UD
HA15,15,"15 Grimmer Close,Aylesbury,Bucks",HP19 9UD
HA15,23,"23 Grimmer Close,Aylesbury,Bucks",HP19 9UD
HA15,12,"12 Vincent Road,Aylesbury,Bucks",HP19 9UN
HA15,4,"4 Reading Close,Aylesbury,Bucks",HP19 9UW
HA15,7,"7 Reading Close,Aylesbury,Bucks",HP19 9UW
HA15,10,"10 Reading Close,Aylesbury,Bucks",HP19 9UW
HA15,2,"2 Mary Mac Manus Drive, Milton Keynes",MK18 1UN
HA15,4,"4 Mary Mac Manus Drive, Milton Keynes",MK18 1UN
HA15,6,"6 Mary Mac Manus Drive, Milton Keynes",MK18 1UN
HA15,8,"8 Mary Mac Manus Drive, Milton Keynes",MK18 1UN
HA15,10,"10 Mary Mac Manus Drive, Milton Keynes",MK18 1UN
HA15,14,"14 Mary Mac Manus Drive, Milton Keynes",MK18 1UN
HA15,16,"16 Mary Mac Manus Drive, Milton Keynes",MK18 1UN
HA15,18,"18 Mary Mac Manus Drive, Milton Keynes",MK18 1UN
HA15,20,"20 Mary Mac Manus Drive, Milton Keynes",MK18 1UN
HA15,22,"22 Mary Mac Manus Drive, Milton Keynes",MK18 1UN
HA15,24,"24 Mary Mac Manus Drive, Milton Keynes",MK18 1UN
HA15,1,"1 Mary Mac Manus Drive, Milton Keynes",MK18 1UW
HA15,3,"3 Mary Mac Manus Drive, Milton Keynes",MK18 1UW
HA15,5,"5 Mary Mac Manus Drive, Milton Keynes",MK18 1UW
HA15,7,"7 Mary Mac Manus Drive, Milton Keynes",MK18 1UW
HA15,9,"9 Mary Mac Manus Drive, Milton Keynes",MK18 1UW
HA15,11,"11 Mary Mac Manus Drive, Milton Keynes",MK18 1UW
HA15,13,"13 Mary Mac Manus Drive, Milton Keynes",MK18 1UW
HA15,15,"15 Mary Mac Manus Drive, Milton Keynes",MK18 1UW
HA15,17,"17 Mary Mac Manus Drive, Milton Keynes",MK18 1UW
HA15,24,"24 St. Annes Road, Aylesbury",HP19 7RB
HA15,55,"55 St. Annes Road, Aylesbury",HP19 7RB
HA15,3,"3 Lansdowne Road, Aylesbury",HP20 2DJ
HA15,15,"15 Lansdowne Road, Aylesbury",HP20 2DJ
HA15,28,"28 Beechwood Way Aston Clinton, Aylesbury",HP22 5JP
HA15,11,"11 Lower Icknield Way Aston Clinton, Aylesbury",HP22 5JS
HA15,17,"17 Lower Icknield Way Aston Clinton, Aylesbury",HP22 5JS
HA15,5,"5 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JU
HA15,6,"6 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JU
HA15,8,"8 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JU
HA15,12,"12 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JU
HA15,13,"13 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JU
HA15,15,"15 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JU
HA15,16,"16 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JU
HA15,19,"19 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JU
HA15,21,"21 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JU
HA15,23,"23 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JU
HA15,13,"13 Beechwood Way Aston Clinton, Aylesbury",HP22 5JW
HA15,24,"24 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JX
HA15,26,"26 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JX
HA15,34,"34 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JX
HA15,39,"39 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JX
HA15,42,"42 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JX
HA15,44,"44 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JX
HA15,45,"45 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JX
HA15,89,"89 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JX
HA15,9,"9 Longcroft Aston Clinton, Aylesbury",HP22 5JZ
HA15,14,"14 Longcroft Aston Clinton, Aylesbury",HP22 5JZ
HA15,55,"55 Grenville Avenue Wendover, Aylesbury",HP22 6AJ
HA15,67,"67 Grenville Avenue Wendover, Aylesbury",HP22 6AJ
HA15,75,"75 Grenville Avenue Wendover, Aylesbury",HP22 6AJ
HA15,35,"35 Grenville Avenue Wendover, Aylesbury",HP22 6AQ
HA15,12,"12 Boddington Road Wendover, Aylesbury",HP22 6HY
HA15,16,"16 Boddington Road Wendover, Aylesbury",HP22 6HY
HA15,21,"21 Boddington Road Wendover, Aylesbury",HP22 6HY
HA15,35,"35 Boddington Road Wendover, Aylesbury",HP22 6HY
HA15,39,"39 Boddington Road Wendover, Aylesbury",HP22 6HY
HA15,5,"5 Boddington Road Wendover, Aylesbury",HP22 6HZ
HA15,1,"1a Lionel Avenue Wendover, Aylesbury",HP22 6LL
HA15,22,"22 Barley Close Weston Turville, Aylesbury",HP22 5SF
HA15,24,"24 Barley Close Weston Turville, Aylesbury",HP22 5SF
HA15,31,"31 Barley Close Weston Turville, Aylesbury",HP22 5SF
HA15,39,"39 Barley Close Weston Turville, Aylesbury",HP22 5SF
HA15,41,"41 Barley Close Weston Turville, Aylesbury",HP22 5SF
HA15,43,"43 Barley Close Weston Turville, Aylesbury",HP22 5SF
HA15,46,"46 Hampden Road Stoke Mandeville, Aylesbury",HP22 5TW
HA15,6,"6 Hampden Road Stoke Mandeville, Aylesbury",HP22 5UF
HA15,7,"7 Hampden Road Stoke Mandeville, Aylesbury",HP22 5UF
HA15,21,"21 Hampden Road Stoke Mandeville, Aylesbury",HP22 5UF
HA15,14,"14 Eskdale Road Stoke Mandeville, Aylesbury",HP22 5UJ
HA15,15,"15 Eskdale Road Stoke Mandeville, Aylesbury",HP22 5UJ
HA15,18,"18 Eskdale Road Stoke Mandeville, Aylesbury",HP22 5UJ
HA15,20,"20 Eskdale Road Stoke Mandeville, Aylesbury",HP22 5UJ
HA15,23,"23 Eskdale Road Stoke Mandeville, Aylesbury",HP22 5UJ
HA15,43,"43 Eskdale Road Stoke Mandeville, Aylesbury",HP22 5UJ
HA15,44,"44 Eskdale Road Stoke Mandeville, Aylesbury",HP22 5UJ
HA15,27,"27 Station Road Stoke Mandeville, Aylesbury",HP22 5UL
HA15,29,"29 Station Road Stoke Mandeville, Aylesbury",HP22 5UL
HA15,3,"3 Moor Park Wendover, Aylesbury",HP22 6AX
HA15,9,"9 Moor Park Wendover, Aylesbury",HP22 6AX
HA15,21,"21 Moor Park Wendover, Aylesbury",HP22 6AX
HA15,35,"35 Moor Park Wendover, Aylesbury",HP22 6AX
HA15,40,"40 Moor Park Wendover, Aylesbury",HP22 6AX
HA15,42,"42 Moor Park Wendover, Aylesbury",HP22 6AX
HA15,45,"45 Moor Park Wendover, Aylesbury",HP22 6AX
HA15,48,"48 Moor Park Wendover, Aylesbury",HP22 6AX
HA15,54,"54 Moor Park Wendover, Aylesbury",HP22 6AX
HA15,58,"58 Moor Park Wendover, Aylesbury",HP22 6AX
1 Housing Association No. Address Postcode
2 HA15 2 2 Lander Road HP19 9TT
3 HA15 4 4 Lander Road HP19 9TT
4 HA15 5 5 Lander Road HP19 9TT
5 HA15 12 12 Lander Road HP19 9TT
6 HA15 14 14 Lander Road HP19 9TT
7 HA15 18 18 Lander Road HP19 9TT
8 HA15 22 22 Lander Road HP19 9TT
9 HA15 1 1 Eeles Close HP19 9TU
10 HA15 2 2 Eeles Close HP19 9TU
11 HA15 3 3 Eeles Close HP19 9TU
12 HA15 12 12 Eeles Close HP19 9TU
13 HA15 15 15 Eeles Close HP19 9TU
14 HA15 2 2 Dicks Way HP19 9UA
15 HA15 4 4 Dicks Way HP19 9UA
16 HA15 5 5 Dicks Way HP19 9UA
17 HA15 6 6 Dicks Way HP19 9UA
18 HA15 8 8 Dicks Way HP19 9UA
19 HA15 9 9 Dicks Way HP19 9UA
20 HA15 14 14 Dicks Way HP19 9UA
21 HA15 15 15 Dicks Way HP19 9UA
22 HA15 17 17 Dicks Way HP19 9UA
23 HA15 20 20 Dicks Way HP19 9UA
24 HA15 26 26 Dicks Way HP19 9UA
25 HA15 28 28 Dicks Way HP19 9UA
26 HA15 4 4 Fletcher Close HP19 9UB
27 HA15 5 5 Fletcher Close HP19 9UB
28 HA15 24 24 Fletcher Close HP19 9UB
29 HA15 25 25 Fletcher Close HP19 9UB
30 HA15 27 27 Fletcher Close HP19 9UB
31 HA15 28 28 Fletcher Close HP19 9UB
32 HA15 29 29 Fletcher Close HP19 9UB
33 HA15 31 31 Fletcher Close HP19 9UB
34 HA15 32 32 Fletcher Close HP19 9UB
35 HA15 33 33 Fletcher Close HP19 9UB
36 HA15 34 34 Fletcher Close,Aylesbury,Bucks HP19 9UB
37 HA15 1 1 Grimmer Close HP19 9UD
38 HA15 11 11 Grimmer Close HP19 9UD
39 HA15 14 14 Grimmer Close HP19 9UD
40 HA15 15 15 Grimmer Close HP19 9UD
41 HA15 17 17 Grimmer Close HP19 9UD
42 HA15 18 18 Grimmer Close HP19 9UD
43 HA15 21 21 Grimmer Close HP19 9UD
44 HA15 23 23 Grimmer Close HP19 9UD
45 HA15 24 24 Grimmer Close HP19 9UD
46 HA15 28 28 Grimmer Close HP19 9UD
47 HA15 30 30 Grimmer Close HP19 9UD
48 HA15 1 1 Vincent Road HP19 9UN
49 HA15 6 6 Vincent Road HP19 9UN
50 HA15 10 10 Vincent Road HP19 9UN
51 HA15 12 12 Vincent Road HP19 9UN
52 HA15 13 13 Vincent Road HP19 9UN
53 HA15 16 16 Vincent Road HP19 9UN
54 HA15 21 21 Vincent Road HP19 9UN
55 HA15 24 24 Vincent Road HP19 9UN
56 HA15 26 26 Vincent Road HP19 9UN
57 HA15 27 27 Vincent Road HP19 9UN
58 HA15 32 32 Vincent Road HP19 9UN
59 HA15 1 1 Reading Close HP19 9UW
60 HA15 2 2 Reading Close HP19 9UW
61 HA15 3 3 Reading Close HP19 9UW
62 HA15 4 4 Reading Close HP19 9UW
63 HA15 5 5 Reading Close HP19 9UW
64 HA15 6 6 Reading Close HP19 9UW
65 HA15 7 7 Reading Close HP19 9UW
66 HA15 9 9 Reading Close HP19 9UW
67 HA15 10 10 Reading Close HP19 9UW
68 HA15 6 6 Mary Mac Manus Drive MK18 1UN
69 HA15 8 8 Mary Mac Manus Drive MK18 1UN
70 HA15 10 10 Mary Mac Manus Drive MK18 1UN
71 HA15 2 2 Rosebery Road Aston Clinton, Aylesbury HP22 5JY
72 HA15 7 7 Rosebery Road Aston Clinton, Aylesbury HP22 5JY
73 HA15 9 9 Rosebery Road Aston Clinton, Aylesbury HP22 5JY
74 HA15 11 11 Rosebery Road Aston Clinton, Aylesbury HP22 5JY
75 HA15 12 12 Rosebery Road Aston Clinton, Aylesbury HP22 5JY
76 HA15 16 16 Rosebery Road Aston Clinton, Aylesbury HP22 5JY
77 HA15 17 17 Rosebery Road Aston Clinton, Aylesbury HP22 5JY
78 HA15 26 26 Rosebery Road Aston Clinton, Aylesbury HP22 5JY
79 HA15 38 38 Rosebery Road Aston Clinton, Aylesbury HP22 5JY
80 HA15 41 41 Rosebery Road Aston Clinton, Aylesbury HP22 5JY
81 HA15 25 25 New Road Weston Turville, Aylesbury HP22 5RA
82 HA15 27 27 New Road Weston Turville, Aylesbury HP22 5RA
83 HA15 29 29 New Road Weston Turville, Aylesbury HP22 5RA
84 HA15 31 31 New Road Weston Turville, Aylesbury HP22 5RA
85 HA15 37 37 New Road Weston Turville, Aylesbury HP22 5RA
86 HA15 39 39 New Road Weston Turville, Aylesbury HP22 5RA
87 HA15 5 5 Walton Place Weston Turville, Aylesbury HP22 5RB
88 HA15 9 9 Walton Place Weston Turville, Aylesbury HP22 5RB
89 HA15 18 18 Walton Place Weston Turville, Aylesbury HP22 5RB
90 HA15 21 21 Walton Place Weston Turville, Aylesbury HP22 5RD
91 HA15 36 36 Walton Place Weston Turville, Aylesbury HP22 5RD
92 HA15 42 42 Walton Place Weston Turville, Aylesbury HP22 5RD
93 HA15 46 46 Walton Place Weston Turville, Aylesbury HP22 5RD
94 HA15 76 76 Worlds End Lane Weston Turville, Aylesbury HP22 5RX
95 HA15 78 78 Worlds End Lane Weston Turville, Aylesbury HP22 5RX
96 HA15 82 82 Worlds End Lane Weston Turville, Aylesbury HP22 5RX
97 HA15 84 84 Worlds End Lane Weston Turville, Aylesbury HP22 5RX
98 HA15 86 86 Worlds End Lane Weston Turville, Aylesbury HP22 5RX
99 HA15 88 88 Worlds End Lane Weston Turville, Aylesbury HP22 5RX
100 HA15 64 64 Halton Lane Wendover, Aylesbury HP22 6AZ
101 HA15 66 66 Halton Lane Wendover, Aylesbury HP22 6AZ
102 HA15 68 68 Halton Lane Wendover, Aylesbury HP22 6AZ
103 HA15 70 70 Halton Lane Wendover, Aylesbury HP22 6AZ
104 HA15 8 8 South Street Wendover, Aylesbury HP22 6EF
105 HA15 2 2 Barlow Road Wendover, Aylesbury HP22 6HP
106 HA15 4 4 Barlow Road Wendover, Aylesbury HP22 6HP
107 HA15 14 14 Barlow Road Wendover, Aylesbury HP22 6HP
108 HA15 15 15 Barlow Road Wendover, Aylesbury HP22 6HP
109 HA15 16 16 Barlow Road Wendover, Aylesbury HP22 6HP
110 HA15 28 28 Barlow Road Wendover, Aylesbury HP22 6HP
111 HA15 1 1 Woollerton Crescent Wendover, Aylesbury HP22 6HT
112 HA15 5 5 Woollerton Crescent Wendover, Aylesbury HP22 6HT
113 HA15 7 7 Woollerton Crescent Wendover, Aylesbury HP22 6HT
114 HA15 8 8 Woollerton Crescent Wendover, Aylesbury HP22 6HT
115 HA15 9 9 Woollerton Crescent Wendover, Aylesbury HP22 6HT
116 HA15 13 13 Woollerton Crescent Wendover, Aylesbury HP22 6HT
117 HA15 16 16 Woollerton Crescent Wendover, Aylesbury HP22 6HT
118 HA15 20 20 Woollerton Crescent Wendover, Aylesbury HP22 6HT
119 HA15 24 24 Woollerton Crescent Wendover, Aylesbury HP22 6HT
120 HA15 26 26 Woollerton Crescent Wendover, Aylesbury HP22 6HT
121 HA15 28 28 Woollerton Crescent Wendover, Aylesbury HP22 6HT
122 HA15 38 38 Woollerton Crescent Wendover, Aylesbury HP22 6HT
123 HA15 44 44 Woollerton Crescent Wendover, Aylesbury HP22 6HT
124 HA15 50 50 Woollerton Crescent Wendover, Aylesbury HP22 6HT
125 HA15 15 15 Hampden Road Wendover, Aylesbury HP22 6HU
126 HA15 18 18 Hampden Road Wendover, Aylesbury HP22 6HU
127 HA15 22 22 Hampden Road Wendover, Aylesbury HP22 6HU
128 HA15 26 26 Hampden Road Wendover, Aylesbury HP22 6HU
129 HA15 28 28 Hampden Road Wendover, Aylesbury HP22 6HU
130 HA15 25 25 Hampden Road Wendover, Aylesbury HP22 6HX
131 HA15 27 27 Hampden Road Wendover, Aylesbury HP22 6HX
132 HA15 31 31 Hampden Road Wendover, Aylesbury HP22 6HX
133 HA15 34 34 Hampden Road Wendover, Aylesbury HP22 6HX
134 HA15 36 36 Hampden Road Wendover, Aylesbury HP22 6HX
135 HA15 38 38 Hampden Road Wendover, Aylesbury HP22 6HX
136 HA15 5 5 Gainsborough Road, Aylesbury HP21 9AZ
137 HA15 1 1 Dart Close, Aylesbury HP21 9NP
138 HA15 1 1 Wingrave Road Aston Abbotts, Aylesbury HP22 4LT
139 HA15 3 3 Wingrave Road Aston Abbotts, Aylesbury HP22 4LT
140 HA15 5 5 Wingrave Road Aston Abbotts, Aylesbury HP22 4LT
141 HA15 82 82 Winslow Road Wingrave, Aylesbury HP22 4QB
142 HA15 84 84 Winslow Road Wingrave, Aylesbury HP22 4QB
143 HA15 106 106 Winslow Road Wingrave, Aylesbury HP22 4QB
144 HA15 125 125 Winslow Road Wingrave, Aylesbury HP22 4QB
145 HA15 19 19 Abbotts Way Wingrave, Aylesbury HP22 4QF
146 HA15 37 37 Abbotts Way Wingrave, Aylesbury HP22 4QF
147 HA15 41 41 Abbotts Way Wingrave, Aylesbury HP22 4QF
148 HA15 43 43 Abbotts Way Wingrave, Aylesbury HP22 4QF
149 HA15 2 2 Chiltern Road Wingrave, Aylesbury HP22 4QQ
150 HA15 5 5 Chiltern Road Wingrave, Aylesbury HP22 4QQ
151 HA15 10 10 Chiltern Road Wingrave, Aylesbury HP22 4QQ
152 HA15 12 12 Chiltern Road Wingrave, Aylesbury HP22 4QQ
153 HA15 19 19 Chiltern Road Wingrave, Aylesbury HP22 4QQ
154 HA15 21 21 Chiltern Road Wingrave, Aylesbury HP22 4QQ
155 HA15 22 22 Chiltern Road Wingrave, Aylesbury HP22 4QQ
156 HA15 31 31 Chiltern Road Wingrave, Aylesbury HP22 4QQ
157 HA15 32 32 Chiltern Road Wingrave, Aylesbury HP22 4QQ
158 HA15 33 33 Chiltern Road Wingrave, Aylesbury HP22 4QQ
159 HA15 34 34 Chiltern Road Wingrave, Aylesbury HP22 4QQ
160 HA15 35 35 Chiltern Road Wingrave, Aylesbury HP22 4QQ
161 HA15 37 37 Chiltern Road Wingrave, Aylesbury HP22 4QQ
162 HA15 38 38 Chiltern Road Wingrave, Aylesbury HP22 4QQ
163 HA15 40 40 Chiltern Road Wingrave, Aylesbury HP22 4QQ
164 HA15 42 42 Chiltern Road Wingrave, Aylesbury HP22 4QQ
165 HA15 23 23 Great Lane Bierton, Aylesbury HP22 5DE
166 HA15 25 25 Great Lane Bierton, Aylesbury HP22 5DE
167 HA15 35 35 Great Lane Bierton, Aylesbury HP22 5DE
168 HA15 37 37 Great Lane Bierton, Aylesbury HP22 5DE
169 HA15 61 61 Weston Road Aston Clinton, Aylesbury HP22 5EJ
170 HA15 65 65 Weston Road Aston Clinton, Aylesbury HP22 5EJ
171 HA15 67 67 Weston Road Aston Clinton, Aylesbury HP22 5EJ
172 HA15 69 69 Weston Road Aston Clinton, Aylesbury HP22 5EJ
173 HA15 28 28a Tring Road Wendover, Aylesbury HP22 6NT
174 HA15 38 38a Tring Road Wendover, Aylesbury HP22 6NT
175 HA15 14 14 Tring Road Wendover, Aylesbury HP22 6NT
176 HA15 34 34 Tring Road Wendover, Aylesbury HP22 6NT
177 HA15 36 36 Tring Road Wendover, Aylesbury HP22 6NT
178 HA15 64 64 Tring Road Wendover, Aylesbury HP22 6NX
179 HA15 68 68 Tring Road Wendover, Aylesbury HP22 6NX
180 HA15 70 70 Tring Road Wendover, Aylesbury HP22 6NX
181 HA15 74 74 Tring Road Wendover, Aylesbury HP22 6NX
182 HA15 76 76 Tring Road Wendover, Aylesbury HP22 6NX
183 HA15 78 78 Tring Road Wendover, Aylesbury HP22 6NX
184 HA15 80 80 Tring Road Wendover, Aylesbury HP22 6NX
185 HA15 90 90 Tring Road Wendover, Aylesbury HP22 6NX
186 HA15 92 92 Tring Road Wendover, Aylesbury HP22 6NX
187 HA15 100 100 Tring Road Wendover, Aylesbury HP22 6NX
188 HA15 104 104 Tring Road Wendover, Aylesbury HP22 6NX
189 HA15 106 106 Tring Road Wendover, Aylesbury HP22 6NX
190 HA15 108 108 Tring Road Wendover, Aylesbury HP22 6NX
191 HA15 114 114 Tring Road Wendover, Aylesbury HP22 6NX
192 HA15 38 38 The Beeches Wendover, Aylesbury HP22 6PB
193 HA15 49 49 The Beeches Wendover, Aylesbury HP22 6PB
194 HA15 54 54 The Beeches Wendover, Aylesbury HP22 6PB
195 HA15 64 64 The Beeches Wendover, Aylesbury HP22 6PB
196 HA15 1 1 Church End Edlesborough, Dunstable LU6 2EP
197 HA15 2 2 Church End Edlesborough, Dunstable LU6 2EP
198 HA15 5 5 Church End Edlesborough, Dunstable LU6 2EP
199 HA15 6 6 Church End Edlesborough, Dunstable LU6 2EP
200 HA15 7 7 Church End Edlesborough, Dunstable LU6 2EP
201 HA15 9 9 Church End Edlesborough, Dunstable LU6 2EP
202 HA15 125 125 High Street Edlesborough, Dunstable LU6 2ER
203 HA15 6 6 Dove Street Stewkley, Leighton Buzzard LU7 0HT
204 HA15 14 14 Wantage Crescent Wing, Leighton Buzzard LU7 0NH
205 HA15 32 32 Wantage Crescent Wing, Leighton Buzzard LU7 0NH
206 HA15 38 38a Wantage Crescent Wing, Leighton Buzzard LU7 0NH
207 HA15 38 38b Wantage Crescent Wing, Leighton Buzzard LU7 0NH
208 HA15 75 75 High Street Cheddington, Leighton Buzzard LU7 0RG
209 HA15 12 12 New Street Cheddington, Leighton Buzzard LU7 0RL
210 HA15 14 14 New Street Cheddington, Leighton Buzzard LU7 0RL
211 HA15 16 16 New Street Cheddington, Leighton Buzzard LU7 0RL
212 HA15 2 2 Sunnybank Cheddington, Leighton Buzzard LU7 0RN
213 HA15 4 4 Sunnybank Cheddington, Leighton Buzzard LU7 0RN
214 HA15 10 10 Sunnybank Cheddington, Leighton Buzzard LU7 0RN
215 HA15 11 11 Sunnybank Cheddington, Leighton Buzzard LU7 0RN
216 HA15 17 17 Sunnybank Cheddington, Leighton Buzzard LU7 0RN
217 HA15 19 19 Sunnybank Cheddington, Leighton Buzzard LU7 0RN
218 HA15 20 20 Sunnybank Cheddington, Leighton Buzzard LU7 0RN
219 HA15 23 23 Sunnybank Cheddington, Leighton Buzzard LU7 0RN
220 HA15 25 25 Sunnybank Cheddington, Leighton Buzzard LU7 0RN
221 HA15 26 26 Sunnybank Cheddington, Leighton Buzzard LU7 0RN
222 HA15 28 28 Sunnybank Cheddington, Leighton Buzzard LU7 0RN
223 HA15 31 31 Sunnybank Cheddington, Leighton Buzzard LU7 0RN
224 HA15 33 33 Sunnybank Cheddington, Leighton Buzzard LU7 0RN
225 HA15 36 36 Sunnybank Cheddington, Leighton Buzzard LU7 0RN
226 HA15 40 40 Sunnybank Cheddington, Leighton Buzzard LU7 0RN
227 HA15 4 4 Barkham Close Cheddington, Leighton Buzzard LU7 0RT
228 HA15 4 4 Manor Road Cheddington, Leighton Buzzard LU7 0RW
229 HA15 7 7 Manor Road Cheddington, Leighton Buzzard LU7 0RW
230 HA15 8 8 Manor Road Cheddington, Leighton Buzzard LU7 0RW
231 HA15 10 10 Manor Road Cheddington, Leighton Buzzard LU7 0RW
232 HA15 11 11 Manor Road Cheddington, Leighton Buzzard LU7 0RW
233 HA15 61 61 Yardley Avenue Pitstone, Leighton Buzzard LU7 9BD
234 HA15 69 69 Yardley Avenue Pitstone, Leighton Buzzard LU7 9BD
235 HA15 71 71 Yardley Avenue Pitstone, Leighton Buzzard LU7 9BD
236 HA15 75 75 Yardley Avenue Pitstone, Leighton Buzzard LU7 9BD
237 HA15 85 85 Yardley Avenue Pitstone, Leighton Buzzard LU7 9BD
238 HA15 87 87 Yardley Avenue Pitstone, Leighton Buzzard LU7 9BD
239 HA15 89 89 Yardley Avenue Pitstone, Leighton Buzzard LU7 9BD
240 HA15 95 95 Yardley Avenue Pitstone, Leighton Buzzard LU7 9BD
241 HA15 101 101 Yardley Avenue Pitstone, Leighton Buzzard LU7 9BD
242 HA15 103 103 Yardley Avenue Pitstone, Leighton Buzzard LU7 9BD
243 HA15 125 125 Yardley Avenue Pitstone, Leighton Buzzard LU7 9BD
244 HA15 129 129 Yardley Avenue Pitstone, Leighton Buzzard LU7 9BD
245 HA15 133 133 Yardley Avenue Pitstone, Leighton Buzzard LU7 9BD
246 HA15 141 141 Yardley Avenue Pitstone, Leighton Buzzard LU7 9BD
247 HA15 151 151 Yardley Avenue Pitstone, Leighton Buzzard LU7 9BD
248 HA15 48 48 Station Road Ivinghoe, Leighton Buzzard LU7 9EB
249 HA15 52 52 Station Road Ivinghoe, Leighton Buzzard LU7 9EB
250 HA15 54 54 Station Road Ivinghoe, Leighton Buzzard LU7 9EB
251 HA15 58 58 Station Road Ivinghoe, Leighton Buzzard LU7 9EB
252 HA15 1 1 Maud Janes Close Ivinghoe, Leighton Buzzard LU7 9ED
253 HA15 3 3 Maud Janes Close Ivinghoe, Leighton Buzzard LU7 9ED
254 HA15 12 12 Maud Janes Close Ivinghoe, Leighton Buzzard LU7 9ED
255 HA15 26 26 Ladysmith Road Ivinghoe, Leighton Buzzard LU7 9EE
256 HA15 24 24 High Street Ivinghoe, Leighton Buzzard LU7 9EX
257 HA15 26 26 High Street Ivinghoe, Leighton Buzzard LU7 9EX
258 HA15 28 28 High Street Ivinghoe, Leighton Buzzard LU7 9EX
259 HA15 30 30 High Street Ivinghoe, Leighton Buzzard LU7 9EX
260 HA15 32 32 High Street Ivinghoe, Leighton Buzzard LU7 9EX
261 HA15 3 3 Stonebridge Road, Aylesbury HP19 9LX
262 HA15 102 102 Coventon Road, Aylesbury HP19 9ND
263 HA15 83 83 Priory Crescent, Aylesbury HP19 9NY
264 HA15 103 103 Priory Crescent, Aylesbury HP19 9NY
265 HA15 83 83 Weedon Road, Aylesbury HP19 9PA
266 HA15 7 7 Haines Close, Aylesbury HP19 9TS
267 HA15 8 8 Haines Close, Aylesbury HP19 9TS
268 HA15 9 9 Haines Close, Aylesbury HP19 9TS
269 HA15 13 13 Haines Close, Aylesbury HP19 9TS
270 HA15 22 22 Haines Close, Aylesbury HP19 9TS
271 HA15 39 39 Haines Close, Aylesbury HP19 9TS
272 HA15 45 45 Haines Close, Aylesbury HP19 9TS
273 HA15 27 27 Oakfield Road, Aylesbury HP20 1LH
274 HA15 11 11 Wingate Walk, Aylesbury HP20 1LN
275 HA15 9 9 Stanhope Road, Aylesbury HP20 1LP
276 HA15 28 28 Stanhope Road, Aylesbury HP20 1LR
277 HA15 12 12 Cleveland Road, Aylesbury HP20 2AZ
278 HA15 20 20 Cleveland Road, Aylesbury HP20 2AZ
279 HA15 22 22 Cleveland Road, Aylesbury HP20 2AZ
280 HA15 7 7 Bryanston Avenue, Aylesbury HP20 2BA
281 HA15 17 17 Bryanston Avenue, Aylesbury HP20 2BA
282 HA15 36 36 Bryanston Avenue, Aylesbury HP20 2BA
283 HA15 38 38 Bryanston Avenue, Aylesbury HP20 2BA
284 HA15 6 6 Matlock Road, Aylesbury HP20 2BE
285 HA15 9 9 Lisburn Path, Aylesbury HP20 2BQ
286 HA15 15 15 Lisburn Path, Aylesbury HP20 2BQ
287 HA15 3 3 Lansdowne Road, Aylesbury HP20 2DJ
288 HA15 15 15 Lansdowne Road, Aylesbury HP20 2DJ
289 HA15 4 4 Caversham Green, Aylesbury HP20 2DL
290 HA15 1 1 Davies Close, Aylesbury HP20 2SH
291 HA15 62 62 Stoke Road, Aylesbury HP21 8BX
292 HA15 64 64 Stoke Road, Aylesbury HP21 8BX
293 HA15 78 78 Stoke Road, Aylesbury HP21 8BX
294 HA15 4 4 Court Close, Aylesbury HP21 8BY
295 HA15 7 7 Clover Lane, Aylesbury HP21 8DQ
296 HA15 25 25 Clover Lane, Aylesbury HP21 8DQ
297 HA15 31 31 Clover Lane, Aylesbury HP21 8DQ
298 HA15 53 53 Birch Court, Aylesbury HP21 8DS
299 HA15 59 59 Birch Court, Aylesbury HP21 8DS
300 HA15 74 74 Thrasher Road, Aylesbury HP21 8DX
301 HA15 2 2 Vicarage Road, Aylesbury HP21 8EU
302 HA15 8 8 Vicarage Road, Aylesbury HP21 8EU
303 HA15 126 126 Penn Road, Aylesbury HP21 8JS
304 HA15 128 128 Penn Road, Aylesbury HP21 8JS
305 HA15 140 140 Penn Road, Aylesbury HP21 8JS
306 HA15 144 144 Penn Road, Aylesbury HP21 8JS
307 HA15 146 146 Penn Road, Aylesbury HP21 8JS
308 HA15 4 4 Montague Road, Aylesbury HP21 8JT
309 HA15 132 132 Prebendal Avenue, Aylesbury HP21 8LF
310 HA15 134 134 Prebendal Avenue, Aylesbury HP21 8LF
311 HA15 138 138 Prebendal Avenue, Aylesbury HP21 8LF
312 HA15 140 140 Prebendal Avenue, Aylesbury HP21 8LF
313 HA15 144 144 Prebendal Avenue, Aylesbury HP21 8LF
314 HA15 15 15 Oak Green, Aylesbury HP21 8LJ
315 HA15 59 59 Paterson Road, Aylesbury HP21 8LW
316 HA15 37 37 Thame Road, Aylesbury HP21 8LX
317 HA15 95 95 Thame Road, Aylesbury HP21 8LY
318 HA15 3 3 Edinburgh Place, Aylesbury HP21 8NG
319 HA15 52 52 Carrington Road, Aylesbury HP21 8NL
320 HA15 9 9 Hartwell End, Aylesbury HP21 8NZ
321 HA15 12 12 Hartwell End, Aylesbury HP21 8NZ
322 HA15 21 21 Hartwell End, Aylesbury HP21 8PA
323 HA15 64 64 Lavric Road, Aylesbury HP21 8PF
324 HA15 8 8 Cooks Lane Mursley, Milton Keynes MK17 0RU
325 HA15 47 47 Green End Great Brickhill, Milton Keynes MK17 9AT
326 HA15 14 14 Green End Great Brickhill, Milton Keynes MK17 9AU
327 HA15 63 63 Bourtonville, Buckingham MK18 1AY
328 HA15 2 2 Bath Lane Terrace, Buckingham MK18 1DY
329 HA15 3 3 Bath Lane Terrace, Buckingham MK18 1DY
330 HA15 4 4 Bath Lane Terrace, Buckingham MK18 1DY
331 HA15 3 3 Westfields, Buckingham MK18 1DZ
332 HA15 5 5 Westfields, Buckingham MK18 1DZ
333 HA15 6 6 Westfields, Buckingham MK18 1DZ
334 HA15 8 8 Westfields, Buckingham MK18 1DZ
335 HA15 10 10 Westfields, Buckingham MK18 1DZ
336 HA15 13 13 Westfields, Buckingham MK18 1DZ
337 HA15 14 14 Westfields, Buckingham MK18 1DZ
338 HA15 15 15 Westfields, Buckingham MK18 1DZ
339 HA15 18 18 Westfields, Buckingham MK18 1DZ
340 HA15 19 19 Westfields, Buckingham MK18 1DZ
341 HA15 20 20 Westfields, Buckingham MK18 1DZ
342 HA15 21 21 Westfields, Buckingham MK18 1DZ
343 HA15 24 24 Westfields, Buckingham MK18 1DZ
344 HA15 27 27 Westfields, Buckingham MK18 1DZ
345 HA15 28 28 Westfields, Buckingham MK18 1DZ
346 HA15 29 29 Westfields, Buckingham MK18 1DZ
347 HA15 31 31 Westfields, Buckingham MK18 1DZ
348 HA15 32 32 Westfields, Buckingham MK18 1DZ
349 HA15 35 35 Westfields, Buckingham MK18 1DZ
350 HA15 49 49 Westfields, Buckingham MK18 1DZ
351 HA15 51 51 Westfields, Buckingham MK18 1DZ
352 HA15 53 53 Westfields, Buckingham MK18 1DZ
353 HA15 55 55 Westfields, Buckingham MK18 1DZ
354 HA15 57 57 Westfields, Buckingham MK18 1DZ
355 HA15 60 60 Westfields, Buckingham MK18 1DZ
356 HA15 2 2 Grenville Road, Buckingham MK18 1LR
357 HA15 118 118 Western Avenue, Buckingham MK18 1LS
358 HA15 5 5 South Hall Maids Moreton, Buckingham MK18 1QB
359 HA15 2 2 Church Close Maids Moreton, Buckingham MK18 1QG
360 HA15 5 5 Church Close Maids Moreton, Buckingham MK18 1QG
361 HA15 7 7 Church Close Maids Moreton, Buckingham MK18 1QG
362 HA15 1 1 The Leys Main Street, Buckingham MK18 1QT
363 HA15 31a 31a Springfields Padbury, Buckingham MK18 2AT
364 HA15 31b 31b Springfields Padbury, Buckingham MK18 2AT
365 HA15 1 1 Arnolds Close Padbury, Buckingham MK18 2BG
366 HA15 42 42 Victory Road Steeple Claydon, Buckingham MK18 2NY
367 HA15 50 50 Victory Road Steeple Claydon, Buckingham MK18 2NY
368 HA15 4 4 Falklands Close Steeple Claydon, Buckingham MK18 2PN
369 HA15 8 8 Falklands Close Steeple Claydon, Buckingham MK18 2PN
370 HA15 10 10 Falklands Close Steeple Claydon, Buckingham MK18 2PN
371 HA15 12 12 Falklands Close Steeple Claydon, Buckingham MK18 2PN
372 HA15 11 11 Vicarage Lane Steeple Claydon, Buckingham MK18 2PR
373 HA15 62 62 Vicarage Lane Steeple Claydon, Buckingham MK18 2PR
374 HA15 64 64 Vicarage Lane Steeple Claydon, Buckingham MK18 2PR
375 HA15 3 3 Pound Close Steeple Claydon, Buckingham MK18 2QL
376 HA15 4 4 Pound Close Steeple Claydon, Buckingham MK18 2QL
377 HA15 6 6 Oak Leys Steeple Claydon, Buckingham MK18 2RQ
378 HA15 8 8 Oak Leys Steeple Claydon, Buckingham MK18 2RQ
379 HA15 8 8 Old Mill Furlong Winslow, Buckingham MK18 3EX
380 HA15 23 23 Old Mill Furlong Winslow, Buckingham MK18 3EX
381 HA15 24 24 Old Mill Furlong Winslow, Buckingham MK18 3EX
382 HA15 25 25 Old Mill Furlong Winslow, Buckingham MK18 3EX
383 HA15 30 30 Old Mill Furlong Winslow, Buckingham MK18 3EX
384 HA15 32 32 Old Mill Furlong Winslow, Buckingham MK18 3EX
385 HA15 34 34 Old Mill Furlong Winslow, Buckingham MK18 3EX
386 HA15 1 1 Roberts Road Haddenham, Aylesbury HP17 8HH
387 HA15 6 6 Roberts Road Haddenham, Aylesbury HP17 8HH
388 HA15 11 11 Roberts Road Haddenham, Aylesbury HP17 8HH
389 HA15 15 15 Roberts Road Haddenham, Aylesbury HP17 8HH
390 HA15 17 17 Roberts Road Haddenham, Aylesbury HP17 8HH
391 HA15 18 18 Roberts Road Haddenham, Aylesbury HP17 8HH
392 HA15 38 38 Roberts Road Haddenham, Aylesbury HP17 8HH
393 HA15 3 3 Harts Road Haddenham, Aylesbury HP17 8HJ
394 HA15 9 9 Harts Road Haddenham, Aylesbury HP17 8HJ
395 HA15 11 11 Harts Road Haddenham, Aylesbury HP17 8HJ
396 HA15 16 16 Harts Road Haddenham, Aylesbury HP17 8HJ
397 HA15 18 18 Harts Road Haddenham, Aylesbury HP17 8HJ
398 HA15 22 22 Harts Road Haddenham, Aylesbury HP17 8HJ
399 HA15 2 2 Willis Road Haddenham, Aylesbury HP17 8HL
400 HA15 4 4 Willis Road Haddenham, Aylesbury HP17 8HL
401 HA15 5 5 Willis Road Haddenham, Aylesbury HP17 8HL
402 HA15 8 8 Willis Road Haddenham, Aylesbury HP17 8HL
403 HA15 20 20 Willis Road Haddenham, Aylesbury HP17 8HL
404 HA15 21 21 Willis Road Haddenham, Aylesbury HP17 8HL
405 HA15 22 22 Willis Road Haddenham, Aylesbury HP17 8HL
406 HA15 26 26 Willis Road Haddenham, Aylesbury HP17 8HL
407 HA15 29 29 Willis Road Haddenham, Aylesbury HP17 8HL
408 HA15 31 31 Willis Road Haddenham, Aylesbury HP17 8HL
409 HA15 33 33 Willis Road Haddenham, Aylesbury HP17 8HL
410 HA15 35 35 Willis Road Haddenham, Aylesbury HP17 8HL
411 HA15 37 37 Willis Road Haddenham, Aylesbury HP17 8HL
412 HA15 39 39 Willis Road Haddenham, Aylesbury HP17 8HL
413 HA15 5 5 Woodways Haddenham, Aylesbury HP17 8HW
414 HA15 7 7 Woodways Haddenham, Aylesbury HP17 8HW
415 HA15 13 13 Woodways Haddenham, Aylesbury HP17 8HW
416 HA15 19 19 Woodways Haddenham, Aylesbury HP17 8HW
417 HA15 1 1 Woodlands Butte Furlong, Aylesbury HP17 8JE
418 HA15 2 2 Franklin Road Haddenham, Aylesbury HP17 8LE
419 HA15 8 8 Franklin Road Haddenham, Aylesbury HP17 8LE
420 HA15 129 129 Churchway Haddenham, Aylesbury HP17 8LG
421 HA15 133 133 Churchway Haddenham, Aylesbury HP17 8LG
422 HA15 135 135 Churchway Haddenham, Aylesbury HP17 8LG
423 HA15 147 147 Churchway Haddenham, Aylesbury HP17 8LG
424 HA15 7 7 Bishopstone Road Stone, Aylesbury HP17 8QX
425 HA15 33 33 Bishopstone Road Stone, Aylesbury HP17 8QX
426 HA15 8 8 Chiltern Avenue Stone, Aylesbury HP17 8QY
427 HA15 20 20 Chiltern Avenue Stone, Aylesbury HP17 8QY
428 HA15 28 28 Chiltern Avenue Stone, Aylesbury HP17 8QY
429 HA15 32 32 Chiltern Avenue Stone, Aylesbury HP17 8QY
430 HA15 34 34 Chiltern Avenue Stone, Aylesbury HP17 8QY
431 HA15 46 46 Chiltern Avenue Stone, Aylesbury HP17 8QY
432 HA15 60 60 Chiltern Avenue Stone, Aylesbury HP17 8QY
433 HA15 62 62 Chiltern Avenue Stone, Aylesbury HP17 8QY
434 HA15 7 7 Chiltern Avenue Stone, Aylesbury HP17 8QZ
435 HA15 13 13 Chiltern Avenue Stone, Aylesbury HP17 8QZ
436 HA15 33 33 Chiltern Avenue Stone, Aylesbury HP17 8QZ
437 HA15 41 41 Chiltern Avenue Stone, Aylesbury HP17 8QZ
438 HA15 14 14 Chiltern Close Stone, Aylesbury HP17 8RA
439 HA15 17 17 Chiltern Close Stone, Aylesbury HP17 8RA
440 HA15 10 10 Round Hill Stone, Aylesbury HP17 8RD
441 HA15 16 16 Round Hill Stone, Aylesbury HP17 8RD
442 HA15 7 7 Round Hill Stone, Aylesbury HP17 8RE
443 HA15 17 17 Round Hill Stone, Aylesbury HP17 8RE
444 HA15 23 23 Round Hill Stone, Aylesbury HP17 8RE
445 HA15 59 59 Bishopstone Road Stone, Aylesbury HP17 8RX
446 HA15 1 1 Bittenham Close Stone, Aylesbury HP17 8RY
447 HA15 7 7 Bittenham Close Stone, Aylesbury HP17 8RY
448 HA15 1 1 New Road Dinton, Aylesbury HP17 8UU
449 HA15 3 3 New Road Dinton, Aylesbury HP17 8UU
450 HA15 8 8 New Road Dinton, Aylesbury HP17 8UU
451 HA15 1 1 Bernard Close Cuddington, Aylesbury HP18 0AJ
452 HA15 4 4 Bernard Close Cuddington, Aylesbury HP18 0AJ
453 HA15 7 7 Bernard Close Cuddington, Aylesbury HP18 0AJ
454 HA15 12 12 Bernard Close Cuddington, Aylesbury HP18 0AJ
455 HA15 19 19 Bernard Close Cuddington, Aylesbury HP18 0AJ
456 HA15 22 22 Bernard Close Cuddington, Aylesbury HP18 0AJ
457 HA15 34 34 Bernard Close Cuddington, Aylesbury HP18 0AJ
458 HA15 39 39 Bernard Close Cuddington, Aylesbury HP18 0AJ
459 HA15 41 41 Bernard Close Cuddington, Aylesbury HP18 0AJ
460 HA15 7 7 Hillside Cottages Dadbrook, Aylesbury HP18 0AQ
461 HA15 10 10 Hillside Cottages Dadbrook, Aylesbury HP18 0AQ
462 HA15 11 11 Hillside Cottages Dadbrook, Aylesbury HP18 0AQ
463 HA15 7 7 Swan Hill Aylesbury Road, Aylesbury HP18 0BE
464 HA15 10 10 Swan Hill Aylesbury Road, Aylesbury HP18 0BE
465 HA15 1 1 Grove Way Waddesdon, Aylesbury HP18 0LH
466 HA15 6 6 Grove Way Waddesdon, Aylesbury HP18 0LH
467 HA15 7 7 Grove Way Waddesdon, Aylesbury HP18 0LH
468 HA15 1 1 Sheriff Cottages Quainton Road, Aylesbury HP18 0LT
469 HA15 2 2 Sheriff Cottages Quainton Road, Aylesbury HP18 0LT
470 HA15 3 3 Sheriff Cottages Quainton Road, Aylesbury HP18 0LT
471 HA15 5 5 Sheriff Cottages Quainton Road, Aylesbury HP18 0LT
472 HA15 6 6 Sheriff Cottages Quainton Road, Aylesbury HP18 0LT
473 HA15 7 7 Sheriff Cottages Quainton Road, Aylesbury HP18 0LT
474 HA15 9 9 Sheriff Cottages Quainton Road, Aylesbury HP18 0LT
475 HA15 21 21 Goss Avenue Waddesdon, Aylesbury HP18 0LY
476 HA15 86 86 Sharps Close Waddesdon, Aylesbury HP18 0LZ
477 HA15 88 88 Sharps Close Waddesdon, Aylesbury HP18 0LZ
478 HA15 3 3 Hilltop Long Crendon, Aylesbury HP18 9AT
479 HA15 4 4 Hilltop Long Crendon, Aylesbury HP18 9AT
480 HA15 1A 1a Hilltop Long Crendon, Aylesbury HP18 9AT
481 HA15 3A 3a Hilltop Long Crendon, Aylesbury HP18 9AT
482 HA15 26 26 Peascroft Long Crendon, Aylesbury HP18 9AU
483 HA15 30 30 Peascroft Long Crendon, Aylesbury HP18 9AU
484 HA15 52 52 Peascroft Long Crendon, Aylesbury HP18 9AU
485 HA15 11 11 Harroell Long Crendon, Aylesbury HP18 9AY
486 HA15 13 13 Harroell Long Crendon, Aylesbury HP18 9AY
487 HA15 14 14 Harroell Long Crendon, Aylesbury HP18 9AY
488 HA15 2 2 Abbot Ridge Long Crendon, Aylesbury HP18 9AZ
489 HA15 14 14 Abbot Ridge Long Crendon, Aylesbury HP18 9AZ
490 HA15 18 18 Abbot Ridge Long Crendon, Aylesbury HP18 9AZ
491 HA15 26 26 Abbot Ridge Long Crendon, Aylesbury HP18 9AZ
492 HA15 5 5 Meadowbank Close Long Crendon, Aylesbury HP18 9DH
493 HA15 11 11 Bonnersfield Long Crendon, Aylesbury HP18 9DJ
494 HA15 14 14 Bonnersfield Long Crendon, Aylesbury HP18 9DJ
495 HA15 16 16 Bonnersfield Long Crendon, Aylesbury HP18 9DJ
496 HA15 26 26 Bonnersfield Long Crendon, Aylesbury HP18 9DJ
497 HA15 28 28 Bonnersfield Long Crendon, Aylesbury HP18 9DJ
498 HA15 29 29 Bonnersfield Long Crendon, Aylesbury HP18 9DJ
499 HA15 30 30 Bonnersfield Long Crendon, Aylesbury HP18 9DJ
500 HA15 32 32 Bonnersfield Long Crendon, Aylesbury HP18 9DJ
501 HA15 36 36 Giffard Way Long Crendon, Aylesbury HP18 9DN
502 HA15 45 45 Giffard Way Long Crendon, Aylesbury HP18 9DN
503 HA15 52 52 Giffard Way Long Crendon, Aylesbury HP18 9DN
504 HA15 10 10 Coltman Avenue Long Crendon, Aylesbury HP18 9DP
505 HA15 11 11 Coltman Avenue Long Crendon, Aylesbury HP18 9DP
506 HA15 12 12 Coltman Avenue Long Crendon, Aylesbury HP18 9DP
507 HA15 14 14 Coltman Avenue Long Crendon, Aylesbury HP18 9DP
508 HA15 16 16 Coltman Avenue Long Crendon, Aylesbury HP18 9DP
509 HA15 22 22 Coltman Avenue Long Crendon, Aylesbury HP18 9DP
510 HA15 25 25 Coltman Avenue Long Crendon, Aylesbury HP18 9DP
511 HA15 26 26 Coltman Avenue Long Crendon, Aylesbury HP18 9DP
512 HA15 27 27 Coltman Avenue Long Crendon, Aylesbury HP18 9DP
513 HA15 32 32 Friars Furlong Long Crendon, Aylesbury HP18 9DQ
514 HA15 4 4 Highfield Long Crendon, Aylesbury HP18 9DR
515 HA15 5 5 Highfield Long Crendon, Aylesbury HP18 9DR
516 HA15 8 8 Highfield Long Crendon, Aylesbury HP18 9DR
517 HA15 9 9 Highfield Long Crendon, Aylesbury HP18 9DR
518 HA15 10 10 Highfield Long Crendon, Aylesbury HP18 9DR
519 HA15 11 11 Highfield Long Crendon, Aylesbury HP18 9DR
520 HA15 14 14 Highfield Long Crendon, Aylesbury HP18 9DR
521 HA15 17 17 Highfield Long Crendon, Aylesbury HP18 9DR
522 HA15 18 18 Highfield Long Crendon, Aylesbury HP18 9DR
523 HA15 20 20 Highfield Long Crendon, Aylesbury HP18 9DR
524 HA15 23 23 Highfield Long Crendon, Aylesbury HP18 9DR
525 HA15 24 24 Highfield Long Crendon, Aylesbury HP18 9DR
526 HA15 14b 14b Highfield Long Crendon, Aylesbury HP18 9DR
527 HA15 4 4 Giffard Way Long Crendon, Aylesbury HP18 9DW
528 HA15 13 13 Giffard Way Long Crendon, Aylesbury HP18 9DW
529 HA15 14 14 Giffard Way Long Crendon, Aylesbury HP18 9DW
530 HA15 24 24 St. Annes Road, Aylesbury HP19 7RB
531 HA15 55 55 St. Annes Road, Aylesbury HP19 7RB
532 HA15 6 6 Palmer Avenue, Aylesbury HP19 8EF
533 HA15 18 18 Palmer Avenue, Aylesbury HP19 8EF
534 HA15 20 20 Palmer Avenue, Aylesbury HP19 8EF
535 HA15 24 24 Palmer Avenue, Aylesbury HP19 8EF
536 HA15 25 25 Palmer Avenue, Aylesbury HP19 8EF
537 HA15 1 1 Gatehouse Road, Aylesbury HP19 8EH
538 HA15 10 10 Gatehouse Road, Aylesbury HP19 8EH
539 HA15 12 12 Gatehouse Road, Aylesbury HP19 8EH
540 HA15 53 53 Oxford Road, Aylesbury HP19 8EQ
541 HA15 59 59 Oxford Road, Aylesbury HP19 8EQ
542 HA15 2 2 Lander Road,Aylesbury,Bucks HP19 9TT
543 HA15 30 30 Lander Road,Aylesbury,Bucks HP19 9TT
544 HA15 31 31 Lander Road,Aylesbury,Bucks HP19 9TT
545 HA15 32 32 Lander Road,Aylesbury,Bucks HP19 9TT
546 HA15 3 3 Eeles Close,Aylesbury,Bucks HP19 9TU
547 HA15 5 5 Eeles Close,Aylesbury,Bucks HP19 9TU
548 HA15 6 6 Eeles Close,Aylesbury,Bucks HP19 9TU
549 HA15 7 7 Eeles Close,Aylesbury,Bucks HP19 9TU
550 HA15 8 8 Eeles Close,Aylesbury,Bucks HP19 9TU
551 HA15 9 9 Eeles Close,Aylesbury,Bucks HP19 9TU
552 HA15 10 10 Eeles Close,Aylesbury,Bucks HP19 9TU
553 HA15 15 15 Eeles Close,Aylesbury,Bucks HP19 9TU
554 HA15 17 17 Dicks Way,Aylesbury,Bucks HP19 9UA
555 HA15 20 20 Dicks Way,Aylesbury,Bucks HP19 9UA
556 HA15 28 28 Dicks Way,Aylesbury,Bucks HP19 9UA
557 HA15 30 30 Dicks Way,Aylesbury,Bucks HP19 9UA
558 HA15 32 32 Dicks Way,Aylesbury,Bucks HP19 9UA
559 HA15 34 34 Dicks Way,Aylesbury,Bucks HP19 9UA
560 HA15 36 36 Dicks Way,Aylesbury,Bucks HP19 9UA
561 HA15 7 7 Fletcher Close,Aylesbury,Bucks HP19 9UB
562 HA15 8 8 Fletcher Close,Aylesbury,Bucks HP19 9UB
563 HA15 10 10 Fletcher Close,Aylesbury,Bucks HP19 9UB
564 HA15 11 11 Fletcher Close,Aylesbury,Bucks HP19 9UB
565 HA15 12 12 Fletcher Close,Aylesbury,Bucks HP19 9UB
566 HA15 25 25 Fletcher Close,Aylesbury,Bucks HP19 9UB
567 HA15 33 33 Fletcher Close,Aylesbury,Bucks HP19 9UB
568 HA15 34 34 Fletcher Close,Aylesbury,Bucks HP19 9UB
569 HA15 11 11 Grimmer Close,Aylesbury,Bucks HP19 9UD
570 HA15 14 14 Grimmer Close,Aylesbury,Bucks HP19 9UD
571 HA15 15 15 Grimmer Close,Aylesbury,Bucks HP19 9UD
572 HA15 23 23 Grimmer Close,Aylesbury,Bucks HP19 9UD
573 HA15 12 12 Vincent Road,Aylesbury,Bucks HP19 9UN
574 HA15 4 4 Reading Close,Aylesbury,Bucks HP19 9UW
575 HA15 7 7 Reading Close,Aylesbury,Bucks HP19 9UW
576 HA15 10 10 Reading Close,Aylesbury,Bucks HP19 9UW
577 HA15 2 2 Mary Mac Manus Drive, Milton Keynes MK18 1UN
578 HA15 4 4 Mary Mac Manus Drive, Milton Keynes MK18 1UN
579 HA15 6 6 Mary Mac Manus Drive, Milton Keynes MK18 1UN
580 HA15 8 8 Mary Mac Manus Drive, Milton Keynes MK18 1UN
581 HA15 10 10 Mary Mac Manus Drive, Milton Keynes MK18 1UN
582 HA15 14 14 Mary Mac Manus Drive, Milton Keynes MK18 1UN
583 HA15 16 16 Mary Mac Manus Drive, Milton Keynes MK18 1UN
584 HA15 18 18 Mary Mac Manus Drive, Milton Keynes MK18 1UN
585 HA15 20 20 Mary Mac Manus Drive, Milton Keynes MK18 1UN
586 HA15 22 22 Mary Mac Manus Drive, Milton Keynes MK18 1UN
587 HA15 24 24 Mary Mac Manus Drive, Milton Keynes MK18 1UN
588 HA15 1 1 Mary Mac Manus Drive, Milton Keynes MK18 1UW
589 HA15 3 3 Mary Mac Manus Drive, Milton Keynes MK18 1UW
590 HA15 5 5 Mary Mac Manus Drive, Milton Keynes MK18 1UW
591 HA15 7 7 Mary Mac Manus Drive, Milton Keynes MK18 1UW
592 HA15 9 9 Mary Mac Manus Drive, Milton Keynes MK18 1UW
593 HA15 11 11 Mary Mac Manus Drive, Milton Keynes MK18 1UW
594 HA15 13 13 Mary Mac Manus Drive, Milton Keynes MK18 1UW
595 HA15 15 15 Mary Mac Manus Drive, Milton Keynes MK18 1UW
596 HA15 17 17 Mary Mac Manus Drive, Milton Keynes MK18 1UW
597 HA15 24 24 St. Annes Road, Aylesbury HP19 7RB
598 HA15 55 55 St. Annes Road, Aylesbury HP19 7RB
599 HA15 3 3 Lansdowne Road, Aylesbury HP20 2DJ
600 HA15 15 15 Lansdowne Road, Aylesbury HP20 2DJ
601 HA15 28 28 Beechwood Way Aston Clinton, Aylesbury HP22 5JP
602 HA15 11 11 Lower Icknield Way Aston Clinton, Aylesbury HP22 5JS
603 HA15 17 17 Lower Icknield Way Aston Clinton, Aylesbury HP22 5JS
604 HA15 5 5 Beaconsfield Road Aston Clinton, Aylesbury HP22 5JU
605 HA15 6 6 Beaconsfield Road Aston Clinton, Aylesbury HP22 5JU
606 HA15 8 8 Beaconsfield Road Aston Clinton, Aylesbury HP22 5JU
607 HA15 12 12 Beaconsfield Road Aston Clinton, Aylesbury HP22 5JU
608 HA15 13 13 Beaconsfield Road Aston Clinton, Aylesbury HP22 5JU
609 HA15 15 15 Beaconsfield Road Aston Clinton, Aylesbury HP22 5JU
610 HA15 16 16 Beaconsfield Road Aston Clinton, Aylesbury HP22 5JU
611 HA15 19 19 Beaconsfield Road Aston Clinton, Aylesbury HP22 5JU
612 HA15 21 21 Beaconsfield Road Aston Clinton, Aylesbury HP22 5JU
613 HA15 23 23 Beaconsfield Road Aston Clinton, Aylesbury HP22 5JU
614 HA15 13 13 Beechwood Way Aston Clinton, Aylesbury HP22 5JW
615 HA15 24 24 Beaconsfield Road Aston Clinton, Aylesbury HP22 5JX
616 HA15 26 26 Beaconsfield Road Aston Clinton, Aylesbury HP22 5JX
617 HA15 34 34 Beaconsfield Road Aston Clinton, Aylesbury HP22 5JX
618 HA15 39 39 Beaconsfield Road Aston Clinton, Aylesbury HP22 5JX
619 HA15 42 42 Beaconsfield Road Aston Clinton, Aylesbury HP22 5JX
620 HA15 44 44 Beaconsfield Road Aston Clinton, Aylesbury HP22 5JX
621 HA15 45 45 Beaconsfield Road Aston Clinton, Aylesbury HP22 5JX
622 HA15 89 89 Beaconsfield Road Aston Clinton, Aylesbury HP22 5JX
623 HA15 9 9 Longcroft Aston Clinton, Aylesbury HP22 5JZ
624 HA15 14 14 Longcroft Aston Clinton, Aylesbury HP22 5JZ
625 HA15 55 55 Grenville Avenue Wendover, Aylesbury HP22 6AJ
626 HA15 67 67 Grenville Avenue Wendover, Aylesbury HP22 6AJ
627 HA15 75 75 Grenville Avenue Wendover, Aylesbury HP22 6AJ
628 HA15 35 35 Grenville Avenue Wendover, Aylesbury HP22 6AQ
629 HA15 12 12 Boddington Road Wendover, Aylesbury HP22 6HY
630 HA15 16 16 Boddington Road Wendover, Aylesbury HP22 6HY
631 HA15 21 21 Boddington Road Wendover, Aylesbury HP22 6HY
632 HA15 35 35 Boddington Road Wendover, Aylesbury HP22 6HY
633 HA15 39 39 Boddington Road Wendover, Aylesbury HP22 6HY
634 HA15 5 5 Boddington Road Wendover, Aylesbury HP22 6HZ
635 HA15 1 1a Lionel Avenue Wendover, Aylesbury HP22 6LL
636 HA15 22 22 Barley Close Weston Turville, Aylesbury HP22 5SF
637 HA15 24 24 Barley Close Weston Turville, Aylesbury HP22 5SF
638 HA15 31 31 Barley Close Weston Turville, Aylesbury HP22 5SF
639 HA15 39 39 Barley Close Weston Turville, Aylesbury HP22 5SF
640 HA15 41 41 Barley Close Weston Turville, Aylesbury HP22 5SF
641 HA15 43 43 Barley Close Weston Turville, Aylesbury HP22 5SF
642 HA15 46 46 Hampden Road Stoke Mandeville, Aylesbury HP22 5TW
643 HA15 6 6 Hampden Road Stoke Mandeville, Aylesbury HP22 5UF
644 HA15 7 7 Hampden Road Stoke Mandeville, Aylesbury HP22 5UF
645 HA15 21 21 Hampden Road Stoke Mandeville, Aylesbury HP22 5UF
646 HA15 14 14 Eskdale Road Stoke Mandeville, Aylesbury HP22 5UJ
647 HA15 15 15 Eskdale Road Stoke Mandeville, Aylesbury HP22 5UJ
648 HA15 18 18 Eskdale Road Stoke Mandeville, Aylesbury HP22 5UJ
649 HA15 20 20 Eskdale Road Stoke Mandeville, Aylesbury HP22 5UJ
650 HA15 23 23 Eskdale Road Stoke Mandeville, Aylesbury HP22 5UJ
651 HA15 43 43 Eskdale Road Stoke Mandeville, Aylesbury HP22 5UJ
652 HA15 44 44 Eskdale Road Stoke Mandeville, Aylesbury HP22 5UJ
653 HA15 27 27 Station Road Stoke Mandeville, Aylesbury HP22 5UL
654 HA15 29 29 Station Road Stoke Mandeville, Aylesbury HP22 5UL
655 HA15 3 3 Moor Park Wendover, Aylesbury HP22 6AX
656 HA15 9 9 Moor Park Wendover, Aylesbury HP22 6AX
657 HA15 21 21 Moor Park Wendover, Aylesbury HP22 6AX
658 HA15 35 35 Moor Park Wendover, Aylesbury HP22 6AX
659 HA15 40 40 Moor Park Wendover, Aylesbury HP22 6AX
660 HA15 42 42 Moor Park Wendover, Aylesbury HP22 6AX
661 HA15 45 45 Moor Park Wendover, Aylesbury HP22 6AX
662 HA15 48 48 Moor Park Wendover, Aylesbury HP22 6AX
663 HA15 54 54 Moor Park Wendover, Aylesbury HP22 6AX
664 HA15 58 58 Moor Park Wendover, Aylesbury HP22 6AX

View file

@ -0,0 +1,499 @@
Housing Association,No.,Address,Postcode
HA 32,1,SHERWOOD COURT,HU114DF
HA 32,2,SHERWOOD COURT,HU114DF
HA 32,3,SHERWOOD COURT,HU114DF
HA 32,4,SHERWOOD COURT,HU114DF
HA 32,5,SHERWOOD COURT,HU114DF
HA 32,7,SHERWOOD COURT,HU114DF
HA 32,8,SHERWOOD COURT,HU114DF
HA 32,9,SHERWOOD COURT,HU114DF
HA 32,10,SHERWOOD COURT,HU114DF
HA 32,27,Seaton Grove,HU4 6HF
HA 32,29,Seaton Grove,HU4 6HF
HA 32,31,Seaton Grove,HU4 6HF
HA 32,33,Seaton Grove,HU4 6HF
HA 32,35,Seaton Grove,HU4 6HF
HA 32,39,Seaton Grove,HU4 6HF
HA 32,41,Seaton Grove,HU4 6HF
HA 32,43,Seaton Grove,HU4 6HF
HA 32,7,Norton Grove,HU4 6HG
HA 32,9,Norton Grove,HU4 6HG
HA 32,11,Norton Grove,HU4 6HG
HA 32,15,Norton Grove,HU4 6HG
HA 32,17,Norton Grove,HU4 6HG
HA 32,19,Norton Grove,HU4 6HG
HA 32,21,Norton Grove,HU4 6HG
HA 32,28,Coxwold,HU4 6HH
HA 32,30,Coxwold,HU4 6HH
HA 32,32,Coxwold,HU4 6HH
HA 32,34,Coxwold,HU4 6HH
HA 32,36,Coxwold,HU4 6HH
HA 32,38,Coxwold,HU4 6HH
HA 32,40,Coxwold,HU4 6HH
HA 32,42,Coxwold,HU4 6HH
HA 32,44,Coxwold,HU4 6HH
HA 32,971,HESSLE ROAD,HU4 6QG
HA 32,973,HESSLE ROAD,HU4 6QG
HA 32,975,HESSLE ROAD,HU4 6QG
HA 32,977,HESSLE ROAD,HU4 6QG
HA 32,981,HESSLE ROAD,HU4 6QG
HA 32,983,HESSLE ROAD,HU4 6QG
HA 32,1,Hessle Road,HU4 6RS
HA 32,2,Hessle Road,HU4 6RS
HA 32,3,Hessle Road,HU4 6RS
HA 32,4,Hessle Road,HU4 6RS
HA 32,5,Hessle Road,HU4 6RS
HA 32,6,Hessle Road,HU4 6RS
HA 32,7,Hessle Road,HU4 6RS
HA 32,8,Hessle Road,HU4 6RS
HA 32,9,Hessle Road,HU4 6RS
HA 32,10,Hessle Road,HU4 6RS
HA 32,11,Hessle Road,HU4 6RS
HA 32,12,Hessle Road,HU4 6RS
HA 32,14,Hessle Road,HU4 6RS
HA 32,15,Hessle Road,HU4 6RS
HA 32,16,Hessle Road,HU4 6RS
HA 32,17,Hessle Road,HU4 6RS
HA 32,18,Hessle Road,HU4 6RS
HA 32,19,Hessle Road,HU4 6RS
HA 32,20,Hessle Road,HU4 6RS
HA 32,21,Hessle Road,HU4 6RS
HA 32,22,Hessle Road,HU4 6RS
HA 32,23,Hessle Road,HU4 6RS
HA 32,24,Hessle Road,HU4 6RS
HA 32,25,Hessle Road,HU4 6RS
HA 32,26,Hessle Road,HU4 6RS
HA 32,27,Hessle Road,HU4 6RS
HA 32,28,Hessle Road,HU4 6RS
HA 32,29,Hessle Road,HU4 6RS
HA 32,30,Hessle Road,HU4 6RS
HA 32,31,Hessle Road,HU4 6RS
HA 32,32,Hessle Road,HU4 6RS
HA 32,33,Hessle Road,HU4 6RS
HA 32,34,Hessle Road,HU4 6RS
HA 32,35,Hessle Road,HU4 6RS
HA 32,36,Hessle Road,HU4 6RS
HA 32,37,Hessle Road,HU4 6RS
HA 32,46,FORESTER WAY,HU4 6SR
HA 32,48,FORESTER WAY,HU4 6SR
HA 32,50,FORESTER WAY,HU4 6SR
HA 32,54,FORESTER WAY,HU4 6SR
HA 32,56,FORESTER WAY,HU4 6SR
HA 32,62,FORESTER WAY,HU4 6SR
HA 32,64,FORESTER WAY,HU4 6SR
HA 32,66,FORESTER WAY,HU4 6SR
HA 32,68,FORESTER WAY,HU4 6SR
HA 32,70,FORESTER WAY,HU4 6SR
HA 32,15,SUMMERGROVES WAY,HU4 6SZ
HA 32,1,WALNUT TREE WAY,HU4 6TG
HA 32,2,WALNUT TREE WAY,HU4 6TG
HA 32,3,WALNUT TREE WAY,HU4 6TG
HA 32,4,WALNUT TREE WAY,HU4 6TG
HA 32,7,WALNUT TREE WAY,HU4 6TG
HA 32,8,WALNUT TREE WAY,HU4 6TG
HA 32,9,WALNUT TREE WAY,HU4 6TG
HA 32,291,Cottingham Road,HU5 4AT
HA 32,293,Cottingham Road,HU5 4AT
HA 32,295,Cottingham Road,HU5 4AT
HA 32,297,Cottingham Road,HU5 4AT
HA 32,299,Cottingham Road,HU5 4AT
HA 32,301,Cottingham Road,HU5 4AT
HA 32,303,Cottingham Road,HU5 4AT
HA 32,305,Cottingham Road,HU5 4AT
HA 32,307,Cottingham Road,HU5 4AT
HA 32,309,Cottingham Road,HU5 4AT
HA 32,1,Edith Cavell Court,HU5 4BA
HA 32,2,Edith Cavell Court,HU5 4BA
HA 32,3,Edith Cavell Court,HU5 4BA
HA 32,4,Edith Cavell Court,HU5 4BA
HA 32,5,Edith Cavell Court,HU5 4BA
HA 32,6,Edith Cavell Court,HU5 4BA
HA 32,7,Edith Cavell Court,HU5 4BA
HA 32,8,Edith Cavell Court,HU5 4BA
HA 32,9,Edith Cavell Court,HU5 4BA
HA 32,10,Edith Cavell Court,HU5 4BA
HA 32,11,Edith Cavell Court,HU5 4BA
HA 32,12,Edith Cavell Court,HU5 4BA
HA 32,106,Barringhton Avenue,HU5 4BE
HA 32,112,Barringhton Avenue,HU5 4BE
HA 32,114,Barringhton Avenue,HU5 4BE
HA 32,116,Barringhton Avenue,HU5 4BE
HA 32,118,Barringhton Avenue,HU5 4BE
HA 32,120,Barringhton Avenue,HU5 4BE
HA 32,122,Barringhton Avenue,HU5 4BE
HA 32,124,Barringhton Avenue,HU5 4BE
HA 32,126,Barringhton Avenue,HU5 4BE
HA 32,1,Florence Nightingale Court,HU5 4BW
HA 32,2,Florence Nightingale Court,HU5 4BW
HA 32,3,Florence Nightingale Court,HU5 4BW
HA 32,4,Florence Nightingale Court,HU5 4BW
HA 32,5,Florence Nightingale Court,HU5 4BW
HA 32,6,Florence Nightingale Court,HU5 4BW
HA 32,7,Florence Nightingale Court,HU5 4BW
HA 32,8,Florence Nightingale Court,HU5 4BW
HA 32,9,Florence Nightingale Court,HU5 4BW
HA 32,10,Florence Nightingale Court,HU5 4BW
HA 32,11,Florence Nightingale Court,HU5 4BW
HA 32,12,Florence Nightingale Court,HU5 4BW
HA 32,14,Florence Nightingale Court,HU5 4BW
HA 32,15,Florence Nightingale Court,HU5 4BW
HA 32,17,Florence Nightingale Court,HU5 4BW
HA 32,19,Florence Nightingale Court,HU5 4BW
HA 32,12,Green Close,HU6 8DA
HA 32,44,Green Close,HU6 8DA
HA 32,49,Green Close,HU6 8DA
HA 32,50,Green Close,HU6 8DA
HA 32,14,Ashbury Court,HU6 8DY
HA 32,38,Westgarth Avenue,HU6 8LS
HA 32,46,WESTGARTH AVENUE,HU6 8LS
HA 32,48,WESTGARTH AVENUE,HU6 8LS
HA 32,54,Westgarth Avenue,HU6 8LS
HA 32,10,BEAUTIMAN COURT,HU6 8LX
HA 32,1,Rosey Row,HU9 1HF
HA 32,2,Rosey Row,HU9 1HF
HA 32,3,Rosey Row,HU9 1HF
HA 32,4,Rosey Row,HU9 1HF
HA 32,5,Rosey Row,HU9 1HF
HA 32,6,Rosey Row,HU9 1HF
HA 32,7,Rosey Row,HU9 1HF
HA 32,8,Rosey Row,HU9 1HF
HA 32,9,Rosey Row,HU9 1HF
HA 32,10,Rosey Row,HU9 1HF
HA 32,11,Rosey Row,HU9 1HF
HA 32,12,Rosey Row,HU9 1HF
HA 32,14,Rosey Row,HU9 1HF
HA 32,15,Rosey Row,HU9 1HF
HA 32,16,Rosey Row,HU9 1HF
HA 32,17,Rosey Row,HU9 1HF
HA 32,18,Rosey Row,HU9 1HF
HA 32,19,Rosey Row,HU9 1HF
HA 32,20,Rosey Row,HU9 1HF
HA 32,21,Rosey Row,HU9 1HF
HA 32,24,Steynburg Street,HU9 2PF
HA 32,26,Steynburg Street,HU9 2PF
HA 32,28,Steynburg Street,HU9 2PF
HA 32,30,Steynburg Street,HU9 2PF
HA 32,36,Steynburg Street,HU9 2PF
HA 32,38,Steynburg Street,HU9 2PF
HA 32,40,Steynburg Street,HU9 2PF
HA 32,42,Steynburg Street,HU9 2PF
HA 32,19,Rustenburg,HU9 2PT
HA 32,21,Rustenburg,HU9 2PT
HA 32,23,Rustenburg,HU9 2PT
HA 32,25,Rustenburg,HU9 2PT
HA 32,27,Rustenburg,HU9 2PT
HA 32,29,Rustenburg,HU9 2PT
HA 32,31,Rustenburg,HU9 2PT
HA 32,33,Rustenburg,HU9 2PT
HA 32,35,Rustenburg,HU9 2PT
HA 32,37,Rustenburg,HU9 2PT
HA 32,55,Rustenburg,HU9 2PT
HA 32,57,Rustenburg,HU9 2PT
HA 32,59,Rustenburg,HU9 2PT
HA 32,61,Rustenburg,HU9 2PT
HA 32,3,The Broadway,HU9 3JH
HA 32,5,THE BROADWAY,HU9 3JH
HA 32,7,The Broadway,HU9 3JH
HA 32,9,The Broadway,HU9 3JH
HA 32,11,The Broadway,HU9 3JH
HA 32,1,BOWLING CIRCLE,HU9 3JL
HA 32,3,BOWLING CIRCLE,HU9 3JL
HA 32,5,BOWLING CIRCLE,HU9 3JL
HA 32,7,BOWLING CIRCLE,HU9 3JL
HA 32,9,BOWLING CIRCLE,HU9 3JL
HA 32,1,MAJESTIC COURT,HU9 3JY
HA 32,2,MAJESTIC COURT,HU9 3JY
HA 32,3,MAJESTIC COURT,HU9 3JY
HA 32,4,MAJESTIC COURT,HU9 3JY
HA 32,5,MAJESTIC COURT,HU9 3JY
HA 32,6,MAJESTIC COURT,HU9 3JY
HA 32,7,MAJESTIC COURT,HU9 3JY
HA 32,8,MAJESTIC COURT,HU9 3JY
HA 32,9,MAJESTIC COURT,HU9 3JY
HA 32,10,MAJESTIC COURT,HU9 3JY
HA 32,11,MAJESTIC COURT,HU9 3JY
HA 32,12,MAJESTIC COURT,HU9 3JY
HA 32,14,MAJESTIC COURT,HU9 3JY
HA 32,15,Majestic Court,HU9 3JY
HA 32,16,MAJESTIC COURT,HU9 3JY
HA 32,1,ROYALE COURT,HU9 3JZ
HA 32,2,ROYALE COURT,HU9 3JZ
HA 32,3,ROYALE COURT,HU9 3JZ
HA 32,4,ROYALE COURT,HU9 3JZ
HA 32,5,ROYALE COURT,HU9 3JZ
HA 32,6,ROYALE COURT,HU9 3JZ
HA 32,7,ROYALE COURT,HU9 3JZ
HA 32,8,ROYALE COURT,HU9 3JZ
HA 32,9,ROYALE COURT,HU9 3JZ
HA 32,10,ROYALE COURT,HU9 3JZ
HA 32,11,ROYALE COURT,HU9 3JZ
HA 32,12,ROYALE COURT,HU9 3JZ
HA 32,14,ROYALE COURT,HU9 3JZ
HA 32,16,ROYALE COURT,HU9 3JZ
HA 32,17,ROYALE COURT,HU9 3JZ
HA 32,18,ROYALE COURT,HU9 3JZ
HA 32,19,ROYALE COURT,HU9 3JZ
HA 32,20,ROYALE COURT,HU9 3JZ
HA 32,21,ROYALE COURT,HU9 3JZ
HA 32,22,ROYALE COURT,HU9 3JZ
HA 32,23,ROYALE COURT,HU9 3JZ
HA 32,24,ROYALE COURT,HU9 3JZ
HA 32,25,ROYALE COURT,HU9 3JZ
HA 32,26,ROYALE COURT,HU9 3JZ
HA 32,12A,ROYALE COURT,HU9 3JZ
HA 32,79,MAYBURY ROAD,HU9 3LB
HA 32,1,HEBRIDES CLOSE,HU9 3LF
HA 32,2,HEBRIDES CLOSE,HU9 3LF
HA 32,3,HEBRIDES CLOSE,HU9 3LF
HA 32,4,HEBRIDES CLOSE,HU9 3LF
HA 32,5,HEBRIDES CLOSE,HU9 3LF
HA 32,6,HEBRIDES CLOSE,HU9 3LF
HA 32,7,HEBRIDES CLOSE,HU9 3LF
HA 32,8,HEBRIDES CLOSE,HU9 3LF
HA 32,9,HEBRIDES CLOSE,HU9 3LF
HA 32,10,HEBRIDES CLOSE,HU9 3LF
HA 32,11,HEBRIDES CLOSE,HU9 3LF
HA 32,14,Hebrides Close,HU9 3LF
HA 32,15,HEBRIDES CLOSE,HU9 3LF
HA 32,16,HEBRIDES CLOSE,HU9 3LF
HA 32,17,HEBRIDES CLOSE,HU9 3LF
HA 32,18,HEBRIDES CLOSE,HU9 3LF
HA 32,19,HEBRIDES CLOSE,HU9 3LF
HA 32,20,HEBRIDES CLOSE,HU9 3LF
HA 32,21,HEBRIDES CLOSE,HU9 3LF
HA 32,22,HEBRIDES CLOSE,HU9 3LF
HA 32,23,HEBRIDES CLOSE,HU9 3LF
HA 32,24,HEBRIDES CLOSE,HU9 3LF
HA 32,25,HEBRIDES CLOSE,HU9 3LF
HA 32,27,HEBRIDES CLOSE,HU9 3LF
HA 32,28,HEBRIDES CLOSE,HU9 3LF
HA 32,29,HEBRIDES CLOSE,HU9 3LF
HA 32,30,HEBRIDES CLOSE,HU9 3LF
HA 32,31,HEBRIDES CLOSE,HU9 3LF
HA 32,32,HEBRIDES CLOSE,HU9 3LF
HA 32,33,HEBRIDES CLOSE,HU9 3LF
HA 32,34,HEBRIDES CLOSE,HU9 3LF
HA 32,35,HEBRIDES CLOSE,HU9 3LF
HA 32,36,HEBRIDES CLOSE,HU9 3LF
HA 32,39,HEBRIDES CLOSE,HU9 3LF
HA 32,40,HEBRIDES CLOSE,HU9 3LF
HA 32,41,HEBRIDES CLOSE,HU9 3LF
HA 32,42,HEBRIDES CLOSE,HU9 3LF
HA 32,2,CROMARTY CLOSE,HU9 3LG
HA 32,4,CROMARTY CLOSE,HU9 3LG
HA 32,6,CROMARTY CLOSE,HU9 3LG
HA 32,8,CROMARTY CLOSE,HU9 3LG
HA 32,10,CROMARTY CLOSE,HU9 3LG
HA 32,12,CROMARTY CLOSE,HU9 3LG
HA 32,14,CROMARTY CLOSE,HU9 3LG
HA 32,16,CROMARTY CLOSE,HU9 3LG
HA 32,18,CROMARTY CLOSE,HU9 3LG
HA 32,20,CROMARTY CLOSE,HU9 3LG
HA 32,22,CROMARTY CLOSE,HU9 3LG
HA 32,24,CROMARTY CLOSE,HU9 3LG
HA 32,26,CROMARTY CLOSE,HU9 3LG
HA 32,28,CROMARTY CLOSE,HU9 3LG
HA 32,30,CROMARTY CLOSE,HU9 3LG
HA 32,32,CROMARTY CLOSE,HU9 3LG
HA 32,34,CROMARTY CLOSE,HU9 3LG
HA 32,36,CROMARTY CLOSE,HU9 3LG
HA 32,40,CROMARTY CLOSE,HU9 3LG
HA 32,42,CROMARTY CLOSE,HU9 3LG
HA 32,44,CROMARTY CLOSE,HU9 3LG
HA 32,46,CROMARTY CLOSE,HU9 3LG
HA 32,48,CROMARTY CLOSE,HU9 3LG
HA 32,48,CROMARTY CLOSE,HU9 3LG
HA 32,50,CROMARTY CLOSE,HU9 3LG
HA 32,52,CROMARTY CLOSE,HU9 3LG
HA 32,54,CROMARTY CLOSE,HU9 3LG
HA 32,56,CROMARTY CLOSE,HU9 3LG
HA 32,58,CROMARTY CLOSE,HU9 3LG
HA 32,60,CROMARTY CLOSE,HU9 3LG
HA 32,62,CROMARTY CLOSE,HU9 3LG
HA 32,64,CROMARTY CLOSE,HU9 3LG
HA 32,66,CROMARTY CLOSE,HU9 3LG
HA 32,68,CROMARTY CLOSE,HU9 3LG
HA 32,1,RONALDSWAY CLOSE,HU9 3LH
HA 32,2,RONALDSWAY CLOSE,HU9 3LH
HA 32,3,RONALDSWAY CLOSE,HU9 3LH
HA 32,3,"MALIN LODGE, RONALDSWAY CLOSE",HU9 3LH
HA 32,4,"MALIN LODGE, RONALDSWAY CLOSE",HU9 3LH
HA 32,6,"MALIN LODGE, RONALDSWAY CLOSE",HU9 3LH
HA 32,9,"MALIN LODGE, RONALDSWAY CLOSE",HU9 3LH
HA 32,10,"MALIN LODGE, RONALDSWAY CLOSE",HU9 3LH
HA 32,15,"MALIN LODGE, RONALDSWAY CLOSE",HU9 3LH
HA 32,17,"MALIN LODGE, RONALDSWAY CLOSE",HU9 3LH
HA 32,18,"MALIN LODGE, RONALDSWAY CLOSE",HU9 3LH
HA 32,7,BROADWAY DRIVE,HU9 3PA
HA 32,9,BROADWAY DRIVE,HU9 3PA
HA 32,11,BROADWAY DRIVE,HU9 3PA
HA 32,15,Broadway Drive,HU9 3PA
HA 32,17,Broadway Drive,HU9 3PA
HA 32,19,Broadway Drive,HU9 3PA
HA 32,21,Broadway Drive,HU9 3PA
HA 32,23,Broadway Drive,HU9 3PA
HA 32,25,Broadway Drive,HU9 3PA
HA 32,27,Broadway Drive,HU9 3PA
HA 32,29,Broadway Drive,HU9 3PA
HA 32,31,Broadway Drive,HU9 3PA
HA 32,33,Broadway Drive,HU9 3PA
HA 32,35,Broadway Drive,HU9 3PA
HA 32,37,BROADWAY DRIVE,HU9 3PA
HA 32,39,BROADWAY DRIVE,HU9 3PA
HA 32,41,Broadway Drive,HU9 3PA
HA 32,43,BROADWAY DRIVE,HU9 3PA
HA 32,45,BROADWAY DRIVE,HU9 3PA
HA 32,47,BROADWAY DRIVE,HU9 3PA
HA 32,49,BROADWAY DRIVE,HU9 3PA
HA 32,2,Broadway Drive,HU9 3PB
HA 32,4,Broadway Drive,HU9 3PB
HA 32,6,Broadway Drive,HU9 3PB
HA 32,8,Broadway Drive,HU9 3PB
HA 32,10,Broadway Drive,HU9 3PB
HA 32,12,Broadway Drive,HU9 3PB
HA 32,14,Broadway Drive,HU9 3PB
HA 32,16,Broadway Drive,HU9 3PB
HA 32,18,Broadway Drive,HU9 3PB
HA 32,20,Broadway Drive,HU9 3PB
HA 32,22,Broadway Drive,HU9 3PB
HA 32,26,Broadway Drive,HU9 3PB
HA 32,28,Broadway Drive,HU9 3PB
HA 32,28,ADA HOLMES CIRCLE,HU9 3PB
HA 32,30,Broadway Drive,HU9 3PB
HA 32,32,Broadway Drive,HU9 3PB
HA 32,34,Broadway Drive,HU9 3PB
HA 32,36,Broadway Drive,HU9 3PB
HA 32,38,Broadway Drive,HU9 3PB
HA 32,40,Broadway Drive,HU9 3PB
HA 32,42,Broadway Drive,HU9 3PB
HA 32,44,Broadway Drive,HU9 3PB
HA 32,46,Broadway Drive,HU9 3PB
HA 32,48,Broadway Drive,HU9 3PB
HA 32,52,Broadway Drive,HU9 3PB
HA 32,56,Broadway Drive,HU9 3PB
HA 32,58,Broadway Drive,HU9 3PB
HA 32,60,Broadway Drive,HU9 3PB
HA 32,55,RUTHERGLEN DRIVE,HU9 3PF
HA 32,57,RUTHERGLEN DRIVE,HU9 3PF
HA 32,59,RUTHERGLEN DRIVE,HU9 3PF
HA 32,1,IMPERIAL COURT,HU9 3PG
HA 32,3,IMPERIAL COURT,HU9 3PG
HA 32,4,IMPERIAL COURT,HU9 3PG
HA 32,5,IMPERIAL COURT,HU9 3PG
HA 32,6,IMPERIAL COURT,HU9 3PG
HA 32,7,IMPERIAL COURT,HU9 3PG
HA 32,8,IMPERIAL COURT,HU9 3PG
HA 32,9,IMPERIAL COURT,HU9 3PG
HA 32,10,IMPERIAL COURT,HU9 3PG
HA 32,10,SCHUBERT CLOSE,HU9 3PL
HA 32,27,SCHUBERT CLOSE,HU9 3PL
HA 32,28,SCHUBERT CLOSE,HU9 3PL
HA 32,32,SCHUBERT CLOSE,HU9 3PL
HA 32,1,Broadway Manor,HU9 3PN
HA 32,1,Broadway Cottages,HU9 3PN
HA 32,2,Broadway Manor,HU9 3PN
HA 32,2,Broadway Cottages,HU9 3PN
HA 32,3,Broadway Cottages,HU9 3PN
HA 32,6,Broadway Manor,HU9 3PN
HA 32,8,Broadway Manor,HU9 3PN
HA 32,17,Broadway Manor,HU9 3PN
HA 32,18,Broadway Manor,HU9 3PN
HA 32,19,Broadway Manor,HU9 3PN
HA 32,20,Broadway Manor,HU9 3PN
HA 32,24,Broadway Manor,HU9 3PN
HA 32,31,Broadway Manor,HU9 3PN
HA 32,35,Broadway Manor,HU9 3PN
HA 32,36,Broadway Manor,HU9 3PN
HA 32,12A,Broadway Manor,HU9 3PN
HA 32,1,FAROES CLOSE,HU9 4AN
HA 32,2,Feroes Close,HU9 4AN
HA 32,3,FAROES CLOSE,HU9 4AN
HA 32,4,FAROES CLOSE,HU9 4AN
HA 32,5,FAROES CLOSE,HU9 4AN
HA 32,6,FAROES CLOSE,HU9 4AN
HA 32,7,FAROES CLOSE,HU9 4AN
HA 32,9,FAROES CLOSE,HU9 4AN
HA 32,10,FAROES CLOSE,HU9 4AN
HA 32,11,FAROES CLOSE,HU9 4AN
HA 32,12,FAROES CLOSE,HU9 4AN
HA 32,14,FAROES CLOSE,HU9 4AN
HA 32,15,FAROES CLOSE,HU9 4AN
HA 32,16,FAROES CLOSE,HU9 4AN
HA 32,17,FAROES CLOSE,HU9 4AN
HA 32,18,FAROES CLOSE,HU9 4AN
HA 32,19,FAROES CLOSE,HU9 4AN
HA 32,81,MAYBURY ROAD,HU93LB
HA 32,1,ZIEGFELD COURT,HU93PH
HA 32,2,ZIEGFELD COURT,HU93PH
HA 32,3,ZIEGFELD COURT,HU93PH
HA 32,4,ZIEGFELD COURT,HU93PH
HA 32,5,ZIEGFELD COURT,HU93PH
HA 32,6,ZIEGFELD COURT,HU93PH
HA 32,7,ZIEGFELD COURT,HU93PH
HA 32,8,ZIEGFELD COURT,HU93PH
HA 32,9,ZIEGFELD COURT,HU93PH
HA 32,1,GOLDEN COURT,HU93PJ
HA 32,2,GOLDEN COURT,HU93PJ
HA 32,3,GOLDEN COURT,HU93PJ
HA 32,4,GOLDEN COURT,HU93PJ
HA 32,5,GOLDEN COURT,HU93PJ
HA 32,6,GOLDEN COURT,HU93PJ
HA 32,7,GOLDEN COURT,HU93PJ
HA 32,8,GOLDEN COURT,HU93PJ
HA 32,10,GOLDEN COURT,HU93PJ
HA 32,11,GOLDEN COURT,HU93PJ
HA 32,12,GOLDEN COURT,HU93PJ
HA 32,14,GOLDEN COURT,HU93PJ
HA 32,15,GOLDEN COURT,HU93PJ
HA 32,16,GOLDEN COURT,HU93PJ
HA 32,17,GOLDEN COURT,HU93PJ
HA 32,18,GOLDEN COURT,HU93PJ
HA 32,19,GOLDEN COURT,HU93PJ
HA 32,20,GOLDEN COURT,HU93PJ
HA 32,22,GOLDEN COURT,HU93PJ
HA 32,23,GOLDEN COURT,HU93PJ
HA 32,24,GOLDEN COURT,HU93PJ
HA 32,15,ROYALE COURT,HU9 3JZ
HA 32,6,SHERWOOD COURT,HU114DF
HA 32,979,HESSLE ROAD,HU4 6QG
HA 32,985,HESSLE ROAD,HU4 6QG
HA 32,2,BUSH CLOSE,HU4 6SP
HA 32,11,BUSH CLOSE,HU4 6SP
HA 32,16,BUSH CLOSE,HU4 6SP
HA 32,52,FORESTER WAY,HU4 6SR
HA 32,72,FORESTER WAY,HU4 6SR
HA 32,74,FORESTER WAY,HU4 6SR
HA 32,3,SUMMERGROVES WAY,HU4 6SZ
HA 32,5,WALNUT TREE WAY,HU4 6TG
HA 32,6,WALNUT TREE WAY,HU4 6TG
HA 32,417,Endike Lane,HU6 8AG
HA 32,5,Ashbury Court,HU6 8DA
HA 32,9,Ashbury Court,HU6 8DA
HA 32,12,Ashbury Court,HU6 8DA
HA 32,28,Green Close,HU6 8DA
HA 32,34,Green Close,HU6 8DA
HA 32,51,Green Close,HU6 8DA
HA 32,259,Endike Lane,HU6 8DX
HA 32,261,Endike Lane,HU6 8DX
HA 32,17,Ashbury Court,HU6 8DY
HA 32,20,Ashbury Court,HU6 8DY
HA 32,30,Westgarth Avenue,HU6 8LS
HA 32,45,Westgarth Avenue,HU6 8LS
HA 32,65,Westgarth Avenue,HU6 8LS
HA 32,12,BEAUTIMAN COURT,HU6 8LX
HA 32,1,THE BROADWAY,HU9 3JH
HA 32,12,HEBRIDES CLOSE,HU9 3LF
HA 32,26,HEBRIDES CLOSE,HU9 3LF
HA 32,37,HEBRIDES CLOSE,HU9 3LF
HA 32,38,HEBRIDES CLOSE,HU9 3LF
HA 32,24,Broadway Drive,HU9 3PB
HA 32,50,Broadway Drive,HU9 3PB
HA 32,54,Broadway Drive,HU9 3PB
HA 32,2,IMPERIAL COURT,HU9 3PG
HA 32,5,SCHUBERT CLOSE,HU9 3PL
HA 32,8,SCHUBERT CLOSE,HU9 3PL
HA 32,19,SCHUBERT CLOSE,HU9 3PL
HA 32,34,SCHUBERT CLOSE,HU9 3PL
HA 32,8,FAROES CLOSE,HU9 4AN
HA 32,9,GOLDEN COURT,HU93PJ
HA 32,21,GOLDEN COURT,HU93PJ
1 Housing Association No. Address Postcode
2 HA 32 1 SHERWOOD COURT HU114DF
3 HA 32 2 SHERWOOD COURT HU114DF
4 HA 32 3 SHERWOOD COURT HU114DF
5 HA 32 4 SHERWOOD COURT HU114DF
6 HA 32 5 SHERWOOD COURT HU114DF
7 HA 32 7 SHERWOOD COURT HU114DF
8 HA 32 8 SHERWOOD COURT HU114DF
9 HA 32 9 SHERWOOD COURT HU114DF
10 HA 32 10 SHERWOOD COURT HU114DF
11 HA 32 27 Seaton Grove HU4 6HF
12 HA 32 29 Seaton Grove HU4 6HF
13 HA 32 31 Seaton Grove HU4 6HF
14 HA 32 33 Seaton Grove HU4 6HF
15 HA 32 35 Seaton Grove HU4 6HF
16 HA 32 39 Seaton Grove HU4 6HF
17 HA 32 41 Seaton Grove HU4 6HF
18 HA 32 43 Seaton Grove HU4 6HF
19 HA 32 7 Norton Grove HU4 6HG
20 HA 32 9 Norton Grove HU4 6HG
21 HA 32 11 Norton Grove HU4 6HG
22 HA 32 15 Norton Grove HU4 6HG
23 HA 32 17 Norton Grove HU4 6HG
24 HA 32 19 Norton Grove HU4 6HG
25 HA 32 21 Norton Grove HU4 6HG
26 HA 32 28 Coxwold HU4 6HH
27 HA 32 30 Coxwold HU4 6HH
28 HA 32 32 Coxwold HU4 6HH
29 HA 32 34 Coxwold HU4 6HH
30 HA 32 36 Coxwold HU4 6HH
31 HA 32 38 Coxwold HU4 6HH
32 HA 32 40 Coxwold HU4 6HH
33 HA 32 42 Coxwold HU4 6HH
34 HA 32 44 Coxwold HU4 6HH
35 HA 32 971 HESSLE ROAD HU4 6QG
36 HA 32 973 HESSLE ROAD HU4 6QG
37 HA 32 975 HESSLE ROAD HU4 6QG
38 HA 32 977 HESSLE ROAD HU4 6QG
39 HA 32 981 HESSLE ROAD HU4 6QG
40 HA 32 983 HESSLE ROAD HU4 6QG
41 HA 32 1 Hessle Road HU4 6RS
42 HA 32 2 Hessle Road HU4 6RS
43 HA 32 3 Hessle Road HU4 6RS
44 HA 32 4 Hessle Road HU4 6RS
45 HA 32 5 Hessle Road HU4 6RS
46 HA 32 6 Hessle Road HU4 6RS
47 HA 32 7 Hessle Road HU4 6RS
48 HA 32 8 Hessle Road HU4 6RS
49 HA 32 9 Hessle Road HU4 6RS
50 HA 32 10 Hessle Road HU4 6RS
51 HA 32 11 Hessle Road HU4 6RS
52 HA 32 12 Hessle Road HU4 6RS
53 HA 32 14 Hessle Road HU4 6RS
54 HA 32 15 Hessle Road HU4 6RS
55 HA 32 16 Hessle Road HU4 6RS
56 HA 32 17 Hessle Road HU4 6RS
57 HA 32 18 Hessle Road HU4 6RS
58 HA 32 19 Hessle Road HU4 6RS
59 HA 32 20 Hessle Road HU4 6RS
60 HA 32 21 Hessle Road HU4 6RS
61 HA 32 22 Hessle Road HU4 6RS
62 HA 32 23 Hessle Road HU4 6RS
63 HA 32 24 Hessle Road HU4 6RS
64 HA 32 25 Hessle Road HU4 6RS
65 HA 32 26 Hessle Road HU4 6RS
66 HA 32 27 Hessle Road HU4 6RS
67 HA 32 28 Hessle Road HU4 6RS
68 HA 32 29 Hessle Road HU4 6RS
69 HA 32 30 Hessle Road HU4 6RS
70 HA 32 31 Hessle Road HU4 6RS
71 HA 32 32 Hessle Road HU4 6RS
72 HA 32 33 Hessle Road HU4 6RS
73 HA 32 34 Hessle Road HU4 6RS
74 HA 32 35 Hessle Road HU4 6RS
75 HA 32 36 Hessle Road HU4 6RS
76 HA 32 37 Hessle Road HU4 6RS
77 HA 32 46 FORESTER WAY HU4 6SR
78 HA 32 48 FORESTER WAY HU4 6SR
79 HA 32 50 FORESTER WAY HU4 6SR
80 HA 32 54 FORESTER WAY HU4 6SR
81 HA 32 56 FORESTER WAY HU4 6SR
82 HA 32 62 FORESTER WAY HU4 6SR
83 HA 32 64 FORESTER WAY HU4 6SR
84 HA 32 66 FORESTER WAY HU4 6SR
85 HA 32 68 FORESTER WAY HU4 6SR
86 HA 32 70 FORESTER WAY HU4 6SR
87 HA 32 15 SUMMERGROVES WAY HU4 6SZ
88 HA 32 1 WALNUT TREE WAY HU4 6TG
89 HA 32 2 WALNUT TREE WAY HU4 6TG
90 HA 32 3 WALNUT TREE WAY HU4 6TG
91 HA 32 4 WALNUT TREE WAY HU4 6TG
92 HA 32 7 WALNUT TREE WAY HU4 6TG
93 HA 32 8 WALNUT TREE WAY HU4 6TG
94 HA 32 9 WALNUT TREE WAY HU4 6TG
95 HA 32 291 Cottingham Road HU5 4AT
96 HA 32 293 Cottingham Road HU5 4AT
97 HA 32 295 Cottingham Road HU5 4AT
98 HA 32 297 Cottingham Road HU5 4AT
99 HA 32 299 Cottingham Road HU5 4AT
100 HA 32 301 Cottingham Road HU5 4AT
101 HA 32 303 Cottingham Road HU5 4AT
102 HA 32 305 Cottingham Road HU5 4AT
103 HA 32 307 Cottingham Road HU5 4AT
104 HA 32 309 Cottingham Road HU5 4AT
105 HA 32 1 Edith Cavell Court HU5 4BA
106 HA 32 2 Edith Cavell Court HU5 4BA
107 HA 32 3 Edith Cavell Court HU5 4BA
108 HA 32 4 Edith Cavell Court HU5 4BA
109 HA 32 5 Edith Cavell Court HU5 4BA
110 HA 32 6 Edith Cavell Court HU5 4BA
111 HA 32 7 Edith Cavell Court HU5 4BA
112 HA 32 8 Edith Cavell Court HU5 4BA
113 HA 32 9 Edith Cavell Court HU5 4BA
114 HA 32 10 Edith Cavell Court HU5 4BA
115 HA 32 11 Edith Cavell Court HU5 4BA
116 HA 32 12 Edith Cavell Court HU5 4BA
117 HA 32 106 Barringhton Avenue HU5 4BE
118 HA 32 112 Barringhton Avenue HU5 4BE
119 HA 32 114 Barringhton Avenue HU5 4BE
120 HA 32 116 Barringhton Avenue HU5 4BE
121 HA 32 118 Barringhton Avenue HU5 4BE
122 HA 32 120 Barringhton Avenue HU5 4BE
123 HA 32 122 Barringhton Avenue HU5 4BE
124 HA 32 124 Barringhton Avenue HU5 4BE
125 HA 32 126 Barringhton Avenue HU5 4BE
126 HA 32 1 Florence Nightingale Court HU5 4BW
127 HA 32 2 Florence Nightingale Court HU5 4BW
128 HA 32 3 Florence Nightingale Court HU5 4BW
129 HA 32 4 Florence Nightingale Court HU5 4BW
130 HA 32 5 Florence Nightingale Court HU5 4BW
131 HA 32 6 Florence Nightingale Court HU5 4BW
132 HA 32 7 Florence Nightingale Court HU5 4BW
133 HA 32 8 Florence Nightingale Court HU5 4BW
134 HA 32 9 Florence Nightingale Court HU5 4BW
135 HA 32 10 Florence Nightingale Court HU5 4BW
136 HA 32 11 Florence Nightingale Court HU5 4BW
137 HA 32 12 Florence Nightingale Court HU5 4BW
138 HA 32 14 Florence Nightingale Court HU5 4BW
139 HA 32 15 Florence Nightingale Court HU5 4BW
140 HA 32 17 Florence Nightingale Court HU5 4BW
141 HA 32 19 Florence Nightingale Court HU5 4BW
142 HA 32 12 Green Close HU6 8DA
143 HA 32 44 Green Close HU6 8DA
144 HA 32 49 Green Close HU6 8DA
145 HA 32 50 Green Close HU6 8DA
146 HA 32 14 Ashbury Court HU6 8DY
147 HA 32 38 Westgarth Avenue HU6 8LS
148 HA 32 46 WESTGARTH AVENUE HU6 8LS
149 HA 32 48 WESTGARTH AVENUE HU6 8LS
150 HA 32 54 Westgarth Avenue HU6 8LS
151 HA 32 10 BEAUTIMAN COURT HU6 8LX
152 HA 32 1 Rosey Row HU9 1HF
153 HA 32 2 Rosey Row HU9 1HF
154 HA 32 3 Rosey Row HU9 1HF
155 HA 32 4 Rosey Row HU9 1HF
156 HA 32 5 Rosey Row HU9 1HF
157 HA 32 6 Rosey Row HU9 1HF
158 HA 32 7 Rosey Row HU9 1HF
159 HA 32 8 Rosey Row HU9 1HF
160 HA 32 9 Rosey Row HU9 1HF
161 HA 32 10 Rosey Row HU9 1HF
162 HA 32 11 Rosey Row HU9 1HF
163 HA 32 12 Rosey Row HU9 1HF
164 HA 32 14 Rosey Row HU9 1HF
165 HA 32 15 Rosey Row HU9 1HF
166 HA 32 16 Rosey Row HU9 1HF
167 HA 32 17 Rosey Row HU9 1HF
168 HA 32 18 Rosey Row HU9 1HF
169 HA 32 19 Rosey Row HU9 1HF
170 HA 32 20 Rosey Row HU9 1HF
171 HA 32 21 Rosey Row HU9 1HF
172 HA 32 24 Steynburg Street HU9 2PF
173 HA 32 26 Steynburg Street HU9 2PF
174 HA 32 28 Steynburg Street HU9 2PF
175 HA 32 30 Steynburg Street HU9 2PF
176 HA 32 36 Steynburg Street HU9 2PF
177 HA 32 38 Steynburg Street HU9 2PF
178 HA 32 40 Steynburg Street HU9 2PF
179 HA 32 42 Steynburg Street HU9 2PF
180 HA 32 19 Rustenburg HU9 2PT
181 HA 32 21 Rustenburg HU9 2PT
182 HA 32 23 Rustenburg HU9 2PT
183 HA 32 25 Rustenburg HU9 2PT
184 HA 32 27 Rustenburg HU9 2PT
185 HA 32 29 Rustenburg HU9 2PT
186 HA 32 31 Rustenburg HU9 2PT
187 HA 32 33 Rustenburg HU9 2PT
188 HA 32 35 Rustenburg HU9 2PT
189 HA 32 37 Rustenburg HU9 2PT
190 HA 32 55 Rustenburg HU9 2PT
191 HA 32 57 Rustenburg HU9 2PT
192 HA 32 59 Rustenburg HU9 2PT
193 HA 32 61 Rustenburg HU9 2PT
194 HA 32 3 The Broadway HU9 3JH
195 HA 32 5 THE BROADWAY HU9 3JH
196 HA 32 7 The Broadway HU9 3JH
197 HA 32 9 The Broadway HU9 3JH
198 HA 32 11 The Broadway HU9 3JH
199 HA 32 1 BOWLING CIRCLE HU9 3JL
200 HA 32 3 BOWLING CIRCLE HU9 3JL
201 HA 32 5 BOWLING CIRCLE HU9 3JL
202 HA 32 7 BOWLING CIRCLE HU9 3JL
203 HA 32 9 BOWLING CIRCLE HU9 3JL
204 HA 32 1 MAJESTIC COURT HU9 3JY
205 HA 32 2 MAJESTIC COURT HU9 3JY
206 HA 32 3 MAJESTIC COURT HU9 3JY
207 HA 32 4 MAJESTIC COURT HU9 3JY
208 HA 32 5 MAJESTIC COURT HU9 3JY
209 HA 32 6 MAJESTIC COURT HU9 3JY
210 HA 32 7 MAJESTIC COURT HU9 3JY
211 HA 32 8 MAJESTIC COURT HU9 3JY
212 HA 32 9 MAJESTIC COURT HU9 3JY
213 HA 32 10 MAJESTIC COURT HU9 3JY
214 HA 32 11 MAJESTIC COURT HU9 3JY
215 HA 32 12 MAJESTIC COURT HU9 3JY
216 HA 32 14 MAJESTIC COURT HU9 3JY
217 HA 32 15 Majestic Court HU9 3JY
218 HA 32 16 MAJESTIC COURT HU9 3JY
219 HA 32 1 ROYALE COURT HU9 3JZ
220 HA 32 2 ROYALE COURT HU9 3JZ
221 HA 32 3 ROYALE COURT HU9 3JZ
222 HA 32 4 ROYALE COURT HU9 3JZ
223 HA 32 5 ROYALE COURT HU9 3JZ
224 HA 32 6 ROYALE COURT HU9 3JZ
225 HA 32 7 ROYALE COURT HU9 3JZ
226 HA 32 8 ROYALE COURT HU9 3JZ
227 HA 32 9 ROYALE COURT HU9 3JZ
228 HA 32 10 ROYALE COURT HU9 3JZ
229 HA 32 11 ROYALE COURT HU9 3JZ
230 HA 32 12 ROYALE COURT HU9 3JZ
231 HA 32 14 ROYALE COURT HU9 3JZ
232 HA 32 16 ROYALE COURT HU9 3JZ
233 HA 32 17 ROYALE COURT HU9 3JZ
234 HA 32 18 ROYALE COURT HU9 3JZ
235 HA 32 19 ROYALE COURT HU9 3JZ
236 HA 32 20 ROYALE COURT HU9 3JZ
237 HA 32 21 ROYALE COURT HU9 3JZ
238 HA 32 22 ROYALE COURT HU9 3JZ
239 HA 32 23 ROYALE COURT HU9 3JZ
240 HA 32 24 ROYALE COURT HU9 3JZ
241 HA 32 25 ROYALE COURT HU9 3JZ
242 HA 32 26 ROYALE COURT HU9 3JZ
243 HA 32 12A ROYALE COURT HU9 3JZ
244 HA 32 79 MAYBURY ROAD HU9 3LB
245 HA 32 1 HEBRIDES CLOSE HU9 3LF
246 HA 32 2 HEBRIDES CLOSE HU9 3LF
247 HA 32 3 HEBRIDES CLOSE HU9 3LF
248 HA 32 4 HEBRIDES CLOSE HU9 3LF
249 HA 32 5 HEBRIDES CLOSE HU9 3LF
250 HA 32 6 HEBRIDES CLOSE HU9 3LF
251 HA 32 7 HEBRIDES CLOSE HU9 3LF
252 HA 32 8 HEBRIDES CLOSE HU9 3LF
253 HA 32 9 HEBRIDES CLOSE HU9 3LF
254 HA 32 10 HEBRIDES CLOSE HU9 3LF
255 HA 32 11 HEBRIDES CLOSE HU9 3LF
256 HA 32 14 Hebrides Close HU9 3LF
257 HA 32 15 HEBRIDES CLOSE HU9 3LF
258 HA 32 16 HEBRIDES CLOSE HU9 3LF
259 HA 32 17 HEBRIDES CLOSE HU9 3LF
260 HA 32 18 HEBRIDES CLOSE HU9 3LF
261 HA 32 19 HEBRIDES CLOSE HU9 3LF
262 HA 32 20 HEBRIDES CLOSE HU9 3LF
263 HA 32 21 HEBRIDES CLOSE HU9 3LF
264 HA 32 22 HEBRIDES CLOSE HU9 3LF
265 HA 32 23 HEBRIDES CLOSE HU9 3LF
266 HA 32 24 HEBRIDES CLOSE HU9 3LF
267 HA 32 25 HEBRIDES CLOSE HU9 3LF
268 HA 32 27 HEBRIDES CLOSE HU9 3LF
269 HA 32 28 HEBRIDES CLOSE HU9 3LF
270 HA 32 29 HEBRIDES CLOSE HU9 3LF
271 HA 32 30 HEBRIDES CLOSE HU9 3LF
272 HA 32 31 HEBRIDES CLOSE HU9 3LF
273 HA 32 32 HEBRIDES CLOSE HU9 3LF
274 HA 32 33 HEBRIDES CLOSE HU9 3LF
275 HA 32 34 HEBRIDES CLOSE HU9 3LF
276 HA 32 35 HEBRIDES CLOSE HU9 3LF
277 HA 32 36 HEBRIDES CLOSE HU9 3LF
278 HA 32 39 HEBRIDES CLOSE HU9 3LF
279 HA 32 40 HEBRIDES CLOSE HU9 3LF
280 HA 32 41 HEBRIDES CLOSE HU9 3LF
281 HA 32 42 HEBRIDES CLOSE HU9 3LF
282 HA 32 2 CROMARTY CLOSE HU9 3LG
283 HA 32 4 CROMARTY CLOSE HU9 3LG
284 HA 32 6 CROMARTY CLOSE HU9 3LG
285 HA 32 8 CROMARTY CLOSE HU9 3LG
286 HA 32 10 CROMARTY CLOSE HU9 3LG
287 HA 32 12 CROMARTY CLOSE HU9 3LG
288 HA 32 14 CROMARTY CLOSE HU9 3LG
289 HA 32 16 CROMARTY CLOSE HU9 3LG
290 HA 32 18 CROMARTY CLOSE HU9 3LG
291 HA 32 20 CROMARTY CLOSE HU9 3LG
292 HA 32 22 CROMARTY CLOSE HU9 3LG
293 HA 32 24 CROMARTY CLOSE HU9 3LG
294 HA 32 26 CROMARTY CLOSE HU9 3LG
295 HA 32 28 CROMARTY CLOSE HU9 3LG
296 HA 32 30 CROMARTY CLOSE HU9 3LG
297 HA 32 32 CROMARTY CLOSE HU9 3LG
298 HA 32 34 CROMARTY CLOSE HU9 3LG
299 HA 32 36 CROMARTY CLOSE HU9 3LG
300 HA 32 40 CROMARTY CLOSE HU9 3LG
301 HA 32 42 CROMARTY CLOSE HU9 3LG
302 HA 32 44 CROMARTY CLOSE HU9 3LG
303 HA 32 46 CROMARTY CLOSE HU9 3LG
304 HA 32 48 CROMARTY CLOSE HU9 3LG
305 HA 32 48 CROMARTY CLOSE HU9 3LG
306 HA 32 50 CROMARTY CLOSE HU9 3LG
307 HA 32 52 CROMARTY CLOSE HU9 3LG
308 HA 32 54 CROMARTY CLOSE HU9 3LG
309 HA 32 56 CROMARTY CLOSE HU9 3LG
310 HA 32 58 CROMARTY CLOSE HU9 3LG
311 HA 32 60 CROMARTY CLOSE HU9 3LG
312 HA 32 62 CROMARTY CLOSE HU9 3LG
313 HA 32 64 CROMARTY CLOSE HU9 3LG
314 HA 32 66 CROMARTY CLOSE HU9 3LG
315 HA 32 68 CROMARTY CLOSE HU9 3LG
316 HA 32 1 RONALDSWAY CLOSE HU9 3LH
317 HA 32 2 RONALDSWAY CLOSE HU9 3LH
318 HA 32 3 RONALDSWAY CLOSE HU9 3LH
319 HA 32 3 MALIN LODGE, RONALDSWAY CLOSE HU9 3LH
320 HA 32 4 MALIN LODGE, RONALDSWAY CLOSE HU9 3LH
321 HA 32 6 MALIN LODGE, RONALDSWAY CLOSE HU9 3LH
322 HA 32 9 MALIN LODGE, RONALDSWAY CLOSE HU9 3LH
323 HA 32 10 MALIN LODGE, RONALDSWAY CLOSE HU9 3LH
324 HA 32 15 MALIN LODGE, RONALDSWAY CLOSE HU9 3LH
325 HA 32 17 MALIN LODGE, RONALDSWAY CLOSE HU9 3LH
326 HA 32 18 MALIN LODGE, RONALDSWAY CLOSE HU9 3LH
327 HA 32 7 BROADWAY DRIVE HU9 3PA
328 HA 32 9 BROADWAY DRIVE HU9 3PA
329 HA 32 11 BROADWAY DRIVE HU9 3PA
330 HA 32 15 Broadway Drive HU9 3PA
331 HA 32 17 Broadway Drive HU9 3PA
332 HA 32 19 Broadway Drive HU9 3PA
333 HA 32 21 Broadway Drive HU9 3PA
334 HA 32 23 Broadway Drive HU9 3PA
335 HA 32 25 Broadway Drive HU9 3PA
336 HA 32 27 Broadway Drive HU9 3PA
337 HA 32 29 Broadway Drive HU9 3PA
338 HA 32 31 Broadway Drive HU9 3PA
339 HA 32 33 Broadway Drive HU9 3PA
340 HA 32 35 Broadway Drive HU9 3PA
341 HA 32 37 BROADWAY DRIVE HU9 3PA
342 HA 32 39 BROADWAY DRIVE HU9 3PA
343 HA 32 41 Broadway Drive HU9 3PA
344 HA 32 43 BROADWAY DRIVE HU9 3PA
345 HA 32 45 BROADWAY DRIVE HU9 3PA
346 HA 32 47 BROADWAY DRIVE HU9 3PA
347 HA 32 49 BROADWAY DRIVE HU9 3PA
348 HA 32 2 Broadway Drive HU9 3PB
349 HA 32 4 Broadway Drive HU9 3PB
350 HA 32 6 Broadway Drive HU9 3PB
351 HA 32 8 Broadway Drive HU9 3PB
352 HA 32 10 Broadway Drive HU9 3PB
353 HA 32 12 Broadway Drive HU9 3PB
354 HA 32 14 Broadway Drive HU9 3PB
355 HA 32 16 Broadway Drive HU9 3PB
356 HA 32 18 Broadway Drive HU9 3PB
357 HA 32 20 Broadway Drive HU9 3PB
358 HA 32 22 Broadway Drive HU9 3PB
359 HA 32 26 Broadway Drive HU9 3PB
360 HA 32 28 Broadway Drive HU9 3PB
361 HA 32 28 ADA HOLMES CIRCLE HU9 3PB
362 HA 32 30 Broadway Drive HU9 3PB
363 HA 32 32 Broadway Drive HU9 3PB
364 HA 32 34 Broadway Drive HU9 3PB
365 HA 32 36 Broadway Drive HU9 3PB
366 HA 32 38 Broadway Drive HU9 3PB
367 HA 32 40 Broadway Drive HU9 3PB
368 HA 32 42 Broadway Drive HU9 3PB
369 HA 32 44 Broadway Drive HU9 3PB
370 HA 32 46 Broadway Drive HU9 3PB
371 HA 32 48 Broadway Drive HU9 3PB
372 HA 32 52 Broadway Drive HU9 3PB
373 HA 32 56 Broadway Drive HU9 3PB
374 HA 32 58 Broadway Drive HU9 3PB
375 HA 32 60 Broadway Drive HU9 3PB
376 HA 32 55 RUTHERGLEN DRIVE HU9 3PF
377 HA 32 57 RUTHERGLEN DRIVE HU9 3PF
378 HA 32 59 RUTHERGLEN DRIVE HU9 3PF
379 HA 32 1 IMPERIAL COURT HU9 3PG
380 HA 32 3 IMPERIAL COURT HU9 3PG
381 HA 32 4 IMPERIAL COURT HU9 3PG
382 HA 32 5 IMPERIAL COURT HU9 3PG
383 HA 32 6 IMPERIAL COURT HU9 3PG
384 HA 32 7 IMPERIAL COURT HU9 3PG
385 HA 32 8 IMPERIAL COURT HU9 3PG
386 HA 32 9 IMPERIAL COURT HU9 3PG
387 HA 32 10 IMPERIAL COURT HU9 3PG
388 HA 32 10 SCHUBERT CLOSE HU9 3PL
389 HA 32 27 SCHUBERT CLOSE HU9 3PL
390 HA 32 28 SCHUBERT CLOSE HU9 3PL
391 HA 32 32 SCHUBERT CLOSE HU9 3PL
392 HA 32 1 Broadway Manor HU9 3PN
393 HA 32 1 Broadway Cottages HU9 3PN
394 HA 32 2 Broadway Manor HU9 3PN
395 HA 32 2 Broadway Cottages HU9 3PN
396 HA 32 3 Broadway Cottages HU9 3PN
397 HA 32 6 Broadway Manor HU9 3PN
398 HA 32 8 Broadway Manor HU9 3PN
399 HA 32 17 Broadway Manor HU9 3PN
400 HA 32 18 Broadway Manor HU9 3PN
401 HA 32 19 Broadway Manor HU9 3PN
402 HA 32 20 Broadway Manor HU9 3PN
403 HA 32 24 Broadway Manor HU9 3PN
404 HA 32 31 Broadway Manor HU9 3PN
405 HA 32 35 Broadway Manor HU9 3PN
406 HA 32 36 Broadway Manor HU9 3PN
407 HA 32 12A Broadway Manor HU9 3PN
408 HA 32 1 FAROES CLOSE HU9 4AN
409 HA 32 2 Feroes Close HU9 4AN
410 HA 32 3 FAROES CLOSE HU9 4AN
411 HA 32 4 FAROES CLOSE HU9 4AN
412 HA 32 5 FAROES CLOSE HU9 4AN
413 HA 32 6 FAROES CLOSE HU9 4AN
414 HA 32 7 FAROES CLOSE HU9 4AN
415 HA 32 9 FAROES CLOSE HU9 4AN
416 HA 32 10 FAROES CLOSE HU9 4AN
417 HA 32 11 FAROES CLOSE HU9 4AN
418 HA 32 12 FAROES CLOSE HU9 4AN
419 HA 32 14 FAROES CLOSE HU9 4AN
420 HA 32 15 FAROES CLOSE HU9 4AN
421 HA 32 16 FAROES CLOSE HU9 4AN
422 HA 32 17 FAROES CLOSE HU9 4AN
423 HA 32 18 FAROES CLOSE HU9 4AN
424 HA 32 19 FAROES CLOSE HU9 4AN
425 HA 32 81 MAYBURY ROAD HU93LB
426 HA 32 1 ZIEGFELD COURT HU93PH
427 HA 32 2 ZIEGFELD COURT HU93PH
428 HA 32 3 ZIEGFELD COURT HU93PH
429 HA 32 4 ZIEGFELD COURT HU93PH
430 HA 32 5 ZIEGFELD COURT HU93PH
431 HA 32 6 ZIEGFELD COURT HU93PH
432 HA 32 7 ZIEGFELD COURT HU93PH
433 HA 32 8 ZIEGFELD COURT HU93PH
434 HA 32 9 ZIEGFELD COURT HU93PH
435 HA 32 1 GOLDEN COURT HU93PJ
436 HA 32 2 GOLDEN COURT HU93PJ
437 HA 32 3 GOLDEN COURT HU93PJ
438 HA 32 4 GOLDEN COURT HU93PJ
439 HA 32 5 GOLDEN COURT HU93PJ
440 HA 32 6 GOLDEN COURT HU93PJ
441 HA 32 7 GOLDEN COURT HU93PJ
442 HA 32 8 GOLDEN COURT HU93PJ
443 HA 32 10 GOLDEN COURT HU93PJ
444 HA 32 11 GOLDEN COURT HU93PJ
445 HA 32 12 GOLDEN COURT HU93PJ
446 HA 32 14 GOLDEN COURT HU93PJ
447 HA 32 15 GOLDEN COURT HU93PJ
448 HA 32 16 GOLDEN COURT HU93PJ
449 HA 32 17 GOLDEN COURT HU93PJ
450 HA 32 18 GOLDEN COURT HU93PJ
451 HA 32 19 GOLDEN COURT HU93PJ
452 HA 32 20 GOLDEN COURT HU93PJ
453 HA 32 22 GOLDEN COURT HU93PJ
454 HA 32 23 GOLDEN COURT HU93PJ
455 HA 32 24 GOLDEN COURT HU93PJ
456 HA 32 15 ROYALE COURT HU9 3JZ
457 HA 32 6 SHERWOOD COURT HU114DF
458 HA 32 979 HESSLE ROAD HU4 6QG
459 HA 32 985 HESSLE ROAD HU4 6QG
460 HA 32 2 BUSH CLOSE HU4 6SP
461 HA 32 11 BUSH CLOSE HU4 6SP
462 HA 32 16 BUSH CLOSE HU4 6SP
463 HA 32 52 FORESTER WAY HU4 6SR
464 HA 32 72 FORESTER WAY HU4 6SR
465 HA 32 74 FORESTER WAY HU4 6SR
466 HA 32 3 SUMMERGROVES WAY HU4 6SZ
467 HA 32 5 WALNUT TREE WAY HU4 6TG
468 HA 32 6 WALNUT TREE WAY HU4 6TG
469 HA 32 417 Endike Lane HU6 8AG
470 HA 32 5 Ashbury Court HU6 8DA
471 HA 32 9 Ashbury Court HU6 8DA
472 HA 32 12 Ashbury Court HU6 8DA
473 HA 32 28 Green Close HU6 8DA
474 HA 32 34 Green Close HU6 8DA
475 HA 32 51 Green Close HU6 8DA
476 HA 32 259 Endike Lane HU6 8DX
477 HA 32 261 Endike Lane HU6 8DX
478 HA 32 17 Ashbury Court HU6 8DY
479 HA 32 20 Ashbury Court HU6 8DY
480 HA 32 30 Westgarth Avenue HU6 8LS
481 HA 32 45 Westgarth Avenue HU6 8LS
482 HA 32 65 Westgarth Avenue HU6 8LS
483 HA 32 12 BEAUTIMAN COURT HU6 8LX
484 HA 32 1 THE BROADWAY HU9 3JH
485 HA 32 12 HEBRIDES CLOSE HU9 3LF
486 HA 32 26 HEBRIDES CLOSE HU9 3LF
487 HA 32 37 HEBRIDES CLOSE HU9 3LF
488 HA 32 38 HEBRIDES CLOSE HU9 3LF
489 HA 32 24 Broadway Drive HU9 3PB
490 HA 32 50 Broadway Drive HU9 3PB
491 HA 32 54 Broadway Drive HU9 3PB
492 HA 32 2 IMPERIAL COURT HU9 3PG
493 HA 32 5 SCHUBERT CLOSE HU9 3PL
494 HA 32 8 SCHUBERT CLOSE HU9 3PL
495 HA 32 19 SCHUBERT CLOSE HU9 3PL
496 HA 32 34 SCHUBERT CLOSE HU9 3PL
497 HA 32 8 FAROES CLOSE HU9 4AN
498 HA 32 9 GOLDEN COURT HU93PJ
499 HA 32 21 GOLDEN COURT HU93PJ

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,665 @@
import numpy as np
import pandas as pd
ECO4_NEW_RATES = 1710
GBIS_NEW_RATES = 600
def app():
# Load in the excel
nov_ha_data = pd.read_excel(
'etl/eligibility/ha_15_32/ALL HA FIGURES AND ASSIGNED INSTALLERS 21.11.2023 with sales data.xlsx',
)
# Drop rows where HA name is null
nov_ha_data = nov_ha_data.dropna(subset=["HA Name"])
nov_ha_data["ha_number"] = nov_ha_data["HA Name"].str.extract(r"(\d+)").astype(int)
nov_ha_data = nov_ha_data.sort_values("ha_number", ascending=True)
variance_explanations = pd.read_excel(
'etl/eligibility/ha_15_32/ALL HA FIGURES AND ASSIGNED INSTALLERS 21.11.2023 with sales data.xlsx',
sheet_name="Variance explanations"
)
september_figures = pd.read_excel(
"etl/eligibility/ha_15_32/ALL HA FIGURES AND ASSIGNED INSTALLERS SEP 23 UPDATE (2).xlsx",
sheet_name="HA Stats"
)
historical_invoices = pd.read_excel(
"etl/eligibility/ha_15_32/ALL HA FIGURES AND ASSIGNED INSTALLERS 21.11.2023 with sales data.xlsx",
sheet_name="Jul 22 to Oct 23"
)
# Drop rows where installer rates is null
historical_invoices = historical_invoices[~pd.isnull(historical_invoices["INSTALLER RATES"])]
historical_invoices = historical_invoices[historical_invoices["INSTALLER RATES"] != "NA "]
# By Scheme, take a weighted mean of the INSTALLER RATES, weighted on the number of rows
n_invoices = historical_invoices.groupby(["Scheme", "INSTALLER RATES"])["Invoice number"].count().reset_index()
n_invoices = n_invoices[n_invoices["Scheme"].isin(["Eco 4", "GBIS"])]
historical_scheme_rates = n_invoices.groupby("Scheme").apply(
lambda x: np.average(x["INSTALLER RATES"], weights=x["Invoice number"])
).reset_index().rename(columns={0: "Historical rates"})
# we take just entries sales data that have sales > 0
sales_data = nov_ha_data[nov_ha_data["Sales"] > 0]
# We now need to adjust sales data depending on the variance explanations
sales_data = sales_data.merge(
variance_explanations[["HA", 'Which figure is correct']],
how="left",
left_on="ha_number",
right_on="HA"
)
def adjust_sales(row):
if pd.isnull(row["Which figure is correct"]):
return row["Sales"]
if row["Which figure is correct"] == "HA facts & figures":
return row['No. of Tech surveys complete']
if row["Which figure is correct"] == "Billed amount":
return row["Sales"]
if row["Which figure is correct"] in ["Both correct, HA facts and figures includes November", "Both correct"]:
return row["Sales"]
raise ValueError(f"Unknown value for 'Which figure is correct': {row['Which figure is correct']}")
# We now need to adjust sales data depending on the variance explanations
sales_data["adjusted_sales"] = sales_data.apply(lambda row: adjust_sales(row), axis=1)
# We therefore adjust GBIS and ECO4 sales data based on adjusted sales
sales_data["adjusted_eco4_sales"] = sales_data["No. of Tech surveys complete - Eco 4"] / sales_data["Sales"] * \
sales_data["adjusted_sales"]
sales_data["adjusted_gbis_sales"] = sales_data["No. of Tech surveys complete - GBIS"] / sales_data["Sales"] * \
sales_data["adjusted_sales"]
sales_data["cancellation_rate"] = (sales_data["Sales"] - sales_data["adjusted_sales"]) / sales_data["Sales"]
# The difference between the adjusted sales and the actual sales is the cancellation
cancellations = (sales_data["adjusted_sales"].sum() - sales_data["Sales"].sum()) / sales_data["Sales"].sum()
# Given the cancellations, we can now adjust the expected remaining surveys
sales_data["No. of Tech surveys remaining"] = sales_data["No. of Tech surveys remaining"] * (
1 - sales_data["cancellation_rate"]
)
# We now merge on the expected values for September
sales_data = sales_data.merge(
september_figures[["Redacted HA", "ECO4", "GBIS"]].rename(
columns={"Redacted HA": "HA Name", "ECO4": "Sept Expected ECO4", "GBIS": "Sept Expected GBIS"}
),
how="left",
on="HA Name",
)
sales_data["Sept Expected ECO4"] = sales_data["Sept Expected ECO4"].fillna(0)
sales_data["Sept Expected GBIS"] = sales_data["Sept Expected GBIS"].fillna(0)
# We calculate the ECO4 and GBIS conversion rates with the adjusted numbers
sales_data["ECO4 Conversion"] = sales_data["adjusted_eco4_sales"] / sales_data["adjusted_sales"]
sales_data["GBIS Conversion"] = sales_data["adjusted_gbis_sales"] / sales_data["adjusted_sales"]
# We now calculate the expected remaining ECO4 and GBIS sales
# We take the number of remaining surveys and multiply by the conversion rate for each scheme, which tells us
# how many more we should expect to see
sales_data["Expected Remaining ECO4"] = sales_data["No. of Tech surveys remaining"] * sales_data["ECO4 Conversion"]
sales_data["Expected Remaining GBIS"] = sales_data["No. of Tech surveys remaining"] * sales_data["GBIS Conversion"]
# We now produce a forecasted ECO4 and GBIS sales figure
sales_data["Forecasted ECO4 Sales"] = sales_data["adjusted_eco4_sales"] + sales_data["Expected Remaining ECO4"]
sales_data["Forecasted GBIS Sales"] = sales_data["adjusted_gbis_sales"] + sales_data["Expected Remaining GBIS"]
# Take the columns we're interestd in
# HA # Properties Sept ECO4 Figures Sept GBIS Figures Nov Total Sales Nov ECO4 Sales Nov GBIS Sales
# Remaining Surveys ECO4 conversion GBIS conversion Forecasted ECO4 Sales Forecasted GBIS sales ECO4 Change
# GBIS Change
sales_data_formatted = sales_data[[
"HA Name",
"ASSET LIST no.",
"Sept Expected ECO4",
"Sept Expected GBIS",
"adjusted_sales",
"adjusted_eco4_sales",
"adjusted_gbis_sales",
"No. of Tech surveys remaining",
"ECO4 Conversion",
"GBIS Conversion",
"Forecasted ECO4 Sales",
"Forecasted GBIS Sales"
]].rename(
columns={
"adjusted_sales": "Oct Total Sales (adjusted for variance)",
"adjusted_eco4_sales": "Oct ECO4 Sales (adjusted for variance)",
"adjusted_gbis_sales": "Oct GBIS Sales (adjusted for variance)",
"No. of Tech surveys remaining": "Remaining Surveys",
}
)
# Convert columns which should be integers to integers
for col in ["ASSET LIST no.", "Remaining Surveys", "Sept Expected ECO4", "Sept Expected GBIS",
"Oct Total Sales (adjusted for variance)", "Oct ECO4 Sales (adjusted for variance)",
"Oct GBIS Sales (adjusted for variance)", "Forecasted ECO4 Sales", "Forecasted GBIS Sales"]:
sales_data_formatted[col] = sales_data_formatted[col].fillna(0)
sales_data_formatted[col] = sales_data_formatted[col].astype(int)
# Remove HA 17 because this was EPCs only. We also remove HA33 because they do not have access to the full portfolio
sales_data_formatted = sales_data_formatted[
~sales_data_formatted["HA Name"].isin(["HA 17", "HA 33"])
]
# September expected ECO4 and GBIS
sept_expected_eco4 = sales_data_formatted["Sept Expected ECO4"].sum()
sept_expected_gbis = sales_data_formatted["Sept Expected GBIS"].sum()
# Completed so far
oct_eco4_sales = sales_data_formatted["Oct ECO4 Sales (adjusted for variance)"].sum()
oct_gbis_sales = sales_data_formatted["Oct GBIS Sales (adjusted for variance)"].sum()
# Forecasted figures
forecasted_eco4_sales = sales_data_formatted["Forecasted ECO4 Sales"].sum()
forecasted_gbis_sales = sales_data_formatted["Forecasted GBIS Sales"].sum()
# Expected remaining sales
expected_remaining_eco4_sales = forecasted_eco4_sales - oct_eco4_sales
expected_remaining_gbis_sales = forecasted_gbis_sales - oct_gbis_sales
# Forecast change vs September
forecasted_eco4_change = 100 * (forecasted_eco4_sales - sept_expected_eco4) / sept_expected_eco4
forecasted_gbis_change = 100 * (forecasted_gbis_sales - sept_expected_gbis) / sept_expected_gbis
aggregates = pd.DataFrame(
columns=["Scheme", "Sept Expected", "Oct Completed", "Forecasted Remaining Sales", "Forecasted Total Sales",
"Forecasted Change vs Sept"],
data=[
["ECO4", sept_expected_eco4, oct_eco4_sales, expected_remaining_eco4_sales, forecasted_eco4_sales,
forecasted_eco4_change],
["GBIS", sept_expected_gbis, oct_gbis_sales, expected_remaining_gbis_sales, forecasted_gbis_sales,
forecasted_gbis_change],
]
)
# Multiply by histoical rates to get revenue
# For ECO4, this is ~£1456 and for GBIS it's ~£432
historical_gbis_price = historical_scheme_rates[
historical_scheme_rates["Scheme"] == "GBIS"
]["Historical rates"].iloc[0]
historical_eco4_price = historical_scheme_rates[
historical_scheme_rates["Scheme"] == "Eco 4"
]["Historical rates"].iloc[0]
aggregates["Sept Expected Revenue"] = np.where(
aggregates["Scheme"] == "ECO4",
aggregates["Sept Expected"] * historical_eco4_price,
aggregates["Sept Expected"] * historical_gbis_price
)
aggregates["Completed Revenue"] = np.where(
aggregates["Scheme"] == "ECO4",
aggregates["Oct Completed"] * historical_eco4_price,
aggregates["Oct Completed"] * historical_gbis_price
)
# We use the new rates for the forecasted revenue
aggregates["Forecasted Remaining Revenue"] = np.where(
aggregates["Scheme"] == "ECO4",
aggregates["Forecasted Remaining Sales"] * ECO4_NEW_RATES,
aggregates["Forecasted Remaining Sales"] * GBIS_NEW_RATES
)
# We also calculate the forecasted remaining revenue at the original price
aggregates["Forecasted Remaining Revenue (original price)"] = np.where(
aggregates["Scheme"] == "ECO4",
aggregates["Forecasted Remaining Sales"] * historical_eco4_price,
aggregates["Forecasted Remaining Sales"] * historical_gbis_price
)
aggregates["Forecasted Revenue"] = aggregates["Completed Revenue"] + aggregates["Forecasted Remaining Revenue"]
# Forecasted revenue with original price
aggregates["Forecasted Revenue (original price)"] = (
aggregates["Completed Revenue"] + aggregates["Forecasted Remaining Revenue (original price)"]
)
# Create a totals row which sums up the two rows
forecasted_change_vs_sept = 100 * (
aggregates["Forecasted Total Sales"].sum() - aggregates["Sept Expected"].sum()
) / aggregates["Sept Expected"].sum()
aggregates = pd.concat(
[
aggregates,
pd.DataFrame(
[
["Total", aggregates["Sept Expected"].sum(), aggregates["Oct Completed"].sum(),
aggregates["Forecasted Remaining Sales"].sum(), aggregates["Forecasted Total Sales"].sum(),
forecasted_change_vs_sept,
aggregates["Sept Expected Revenue"].sum(), aggregates["Completed Revenue"].sum(),
aggregates["Forecasted Remaining Revenue"].sum(),
aggregates["Forecasted Remaining Revenue (original price)"].sum(),
aggregates["Forecasted Revenue"].sum(),
aggregates["Forecasted Revenue (original price)"].sum(),
]
],
columns=aggregates.columns
)
]
)
# For each property in the asset list, we now calculate an average conversion rate to ECO4 and GBIS
# We do this by taking the forecasted sales values for each schemes and dividing by the number of properties
number_properties = sales_data_formatted["ASSET LIST no."].sum()
eco4_conversion_rate = forecasted_eco4_sales / number_properties
gbis_conversion_rate = forecasted_gbis_sales / number_properties
# We also attribute a future value per property
future_eco4_value = ECO4_NEW_RATES * eco4_conversion_rate
future_gbis_value = GBIS_NEW_RATES * gbis_conversion_rate
# We also calulate a revenue figure for the old rates
historical_eco4_value = historical_eco4_price * eco4_conversion_rate
historical_gbis_value = historical_gbis_price * gbis_conversion_rate
# For the HAs that have not begun selling, we estimate the value of the projects
# We start with some problem HAs
# HA 7, HA 24, HA 25
# These HAs have no sales data, so we use the expected figures
problem_has_data = nov_ha_data[
(nov_ha_data["HA Name"].isin(["HA 7", "HA 24", "HA 25"]))
].copy()
# Merge on the september expected figures
problem_has_data = problem_has_data.merge(
september_figures[["Redacted HA", "ECO4", "GBIS"]].rename(
columns={"Redacted HA": "HA Name", "ECO4": "Sept Expected ECO4", "GBIS": "Sept Expected GBIS"}
),
how="left",
on="HA Name",
)
# Fill NAs
problem_has_data["Sept Expected ECO4"] = problem_has_data["Sept Expected ECO4"].fillna(0)
problem_has_data["Sept Expected GBIS"] = problem_has_data["Sept Expected GBIS"].fillna(0)
# We now calculate the expected ECO4 and GBIS sales based on the average conversion rates
problem_has_data["Expected ECO4 Sales"] = problem_has_data["ASSET LIST no."] * eco4_conversion_rate
problem_has_data["Expected GBIS Sales"] = problem_has_data["ASSET LIST no."] * gbis_conversion_rate
# Filter just on columns we're interested in
problem_has_data = problem_has_data[[
"HA Name",
"ASSET LIST no.",
"Sept Expected ECO4",
"Sept Expected GBIS",
"ECO4",
"GBIS",
"Expected ECO4 Sales",
"Expected GBIS Sales"
]].rename(
columns={
"ECO4": "Nov Expected ECO4",
"GBIS": "Nov Expected GBIS",
}
)
# Fill NAs
problem_has_data["Nov Expected ECO4"] = problem_has_data["Nov Expected ECO4"].fillna(0)
problem_has_data["Nov Expected GBIS"] = problem_has_data["Nov Expected GBIS"].fillna(0)
# We calculate HA level Sept, Nov expected revenue, based on historical rates and then forecasted revenue
problem_has_data["Sept Expected ECO4 Value"] = problem_has_data["Sept Expected ECO4"] * historical_eco4_price
problem_has_data["Sept Expected GBIS Value"] = problem_has_data["Sept Expected GBIS"] * historical_gbis_price
problem_has_data["Nov Expected ECO4 Value"] = problem_has_data["Nov Expected ECO4"] * historical_eco4_price
problem_has_data["Nov Expected GBIS Value"] = problem_has_data["Nov Expected GBIS"] * historical_gbis_price
problem_has_data["Forecasted ECO4 Revenue"] = problem_has_data["ASSET LIST no."] * future_eco4_value
problem_has_data["Forecasted GBIS Revenue"] = problem_has_data["ASSET LIST no."] * future_gbis_value
# Totals
problem_has_data["Sept Expected Total Value"] = problem_has_data["Sept Expected ECO4 Value"] + \
problem_has_data["Sept Expected GBIS Value"]
problem_has_data["Nov Expected Total Value"] = problem_has_data["Nov Expected ECO4 Value"] + \
problem_has_data["Nov Expected GBIS Value"]
problem_has_data["Forecasted Total Revenue"] = problem_has_data["Forecasted ECO4 Revenue"] + \
problem_has_data["Forecasted GBIS Revenue"]
# We calculate a total expected value for September, November and then forecasted
problem_has_expected_eco4_value = problem_has_data["Sept Expected ECO4"].sum() * historical_eco4_price
problem_has_expected_gbis_value = problem_has_data["Sept Expected GBIS"].sum() * historical_gbis_price
problem_has_expected_total_value = problem_has_expected_eco4_value + problem_has_expected_gbis_value
problem_has_nov_eco4_value = problem_has_data["Nov Expected ECO4"].sum() * historical_eco4_price
problem_has_nov_gbis_value = problem_has_data["Nov Expected GBIS"].sum() * historical_gbis_price
problem_has_nov_total_value = problem_has_nov_eco4_value + problem_has_nov_gbis_value
forecasted_eco4_value = problem_has_data["ASSET LIST no."].sum() * future_eco4_value
forecasted_gbis_value = problem_has_data["ASSET LIST no."].sum() * future_gbis_value
problem_has_forecasted_total_value = forecasted_eco4_value + forecasted_gbis_value
problem_has_summary = pd.DataFrame(
columns=["Scheme", "Sept Expected", "Nov Expected", "Forecasted"],
data=[
["ECO4", problem_has_expected_eco4_value, problem_has_nov_eco4_value, forecasted_eco4_value],
["GBIS", problem_has_expected_gbis_value, problem_has_nov_gbis_value, forecasted_gbis_value],
["Total", problem_has_expected_total_value, problem_has_nov_total_value, problem_has_forecasted_total_value]
]
)
# We now also estimate the value of the remaining HAs based on historical sales performance and new rates
# We take the has that are not in the sales data
remaining_has = nov_ha_data[
~nov_ha_data["HA Name"].isin(sales_data_formatted["HA Name"])
].copy()
# Merge on the september expected figures
remaining_has = remaining_has.merge(
september_figures[["Redacted HA", "ECO4", "GBIS"]].rename(
columns={"Redacted HA": "HA Name", "ECO4": "Sept Expected ECO4", "GBIS": "Sept Expected GBIS"}
),
how="left",
on="HA Name",
)
# We update the asset list size for HA 33, because they do not have access to the full portfolio
remaining_has.loc[remaining_has["HA Name"] == "HA 33", "ASSET LIST no."] = 20699
# We also remove HA 17
remaining_has = remaining_has[~remaining_has["HA Name"].isin(["HA 17"])]
# We now calculate the expected ECO4 and GBIS sales based on the average conversion rates
remaining_has["Expected ECO4 Sales"] = remaining_has["ASSET LIST no."] * eco4_conversion_rate
remaining_has["Expected GBIS Sales"] = remaining_has["ASSET LIST no."] * gbis_conversion_rate
# Filter just on columns we're interested in
remaining_has = remaining_has[[
"HA Name",
"ASSET LIST no.",
"Sept Expected ECO4",
"Sept Expected GBIS",
"ECO4",
"GBIS",
]].rename(
columns={
"ECO4": "Nov Expected ECO4",
"GBIS": "Nov Expected GBIS",
}
)
remaining_has = remaining_has.fillna(0)
# We take just HAs that had an initial september expectation for ECO4 or GBIS, or that now have a Nov expectation
remaining_has = remaining_has[
(remaining_has["Sept Expected ECO4"] > 0) | (remaining_has["Sept Expected GBIS"] > 0) |
(remaining_has["Nov Expected ECO4"] > 0) | (remaining_has["Nov Expected GBIS"] > 0)
]
# Expected sales based on asset list size and conversion rate
remaining_has["Forecasted Sales ECO4"] = remaining_has["ASSET LIST no."] * eco4_conversion_rate
remaining_has["Forecasted Sales GBIS"] = remaining_has["ASSET LIST no."] * gbis_conversion_rate
# Calculat the total expected value for September and November
remaining_has["Sept Expected ECO4 Value"] = remaining_has["Sept Expected ECO4"] * historical_eco4_price
remaining_has["Sept Expected GBIS Value"] = remaining_has["Sept Expected GBIS"] * historical_gbis_price
remaining_has["Nov Expected ECO4 Value"] = remaining_has["Nov Expected ECO4"] * historical_eco4_price
remaining_has["Nov Expected GBIS Value"] = remaining_has["Nov Expected GBIS"] * historical_gbis_price
# Calculate forecasted revenue
remaining_has["Forecasted ECO4 Revenue"] = remaining_has["ASSET LIST no."] * future_eco4_value
remaining_has["Forecasted GBIS Revenue"] = remaining_has["ASSET LIST no."] * future_gbis_value
# We also calculate forecasted revenue with the original price
remaining_has["Forecasted ECO4 Revenue (original price)"] = remaining_has["ASSET LIST no."] * historical_eco4_value
remaining_has["Forecasted GBIS Revenue (original price)"] = remaining_has["ASSET LIST no."] * historical_gbis_value
# Calculate totals for each scheme
remaining_has_september_eco4_sales = remaining_has["Sept Expected ECO4"].sum()
remaining_has_september_gbis_sales = remaining_has["Sept Expected GBIS"].sum()
remaining_has_november_eco4_sales = remaining_has["Nov Expected ECO4"].sum()
remaining_has_november_gbis_sales = remaining_has["Nov Expected GBIS"].sum()
remaining_has_forecasted_eco4_sales = remaining_has["Forecasted Sales ECO4"].sum()
remaining_has_forecasted_gbis_sales = remaining_has["Forecasted Sales GBIS"].sum()
remaining_has_september_eco4_value = remaining_has["Sept Expected ECO4 Value"].sum()
remaining_has_september_gbis_value = remaining_has["Sept Expected GBIS Value"].sum()
remaining_has_november_eco4_value = remaining_has["Nov Expected ECO4 Value"].sum()
remaining_has_november_gbis_value = remaining_has["Nov Expected GBIS Value"].sum()
remaining_has_forecasted_eco4_value = remaining_has["Forecasted ECO4 Revenue"].sum()
remaining_has_forecasted_gbis_value = remaining_has["Forecasted GBIS Revenue"].sum()
remaining_has_forecasted_eco4_value_original_price = remaining_has["Forecasted ECO4 Revenue (original price)"].sum()
remaining_has_forecasted_gbis_value_original_price = remaining_has["Forecasted GBIS Revenue (original price)"].sum()
# Calculate the change in forecasted sales against the September expected sales
remaining_has_foecast_change_eco4 = 100 * (
remaining_has["Forecasted Sales ECO4"].sum() - remaining_has["Sept Expected ECO4"].sum()
) / remaining_has["Sept Expected ECO4"].sum()
remaining_has_foecast_change_gbis = 100 * (
remaining_has["Forecasted Sales GBIS"].sum() - remaining_has["Sept Expected GBIS"].sum()
) / remaining_has["Sept Expected GBIS"].sum()
# Total change
remaining_has_foecast_change_total = 100 * (
remaining_has["Forecasted Sales ECO4"].sum() + remaining_has["Forecasted Sales GBIS"].sum() -
remaining_has["Sept Expected ECO4"].sum() - remaining_has["Sept Expected GBIS"].sum()
) / (remaining_has["Sept Expected ECO4"].sum() + remaining_has["Sept Expected GBIS"].sum())
asset_list_size = remaining_has["ASSET LIST no."].sum()
# Create a summary table of the rest with the totals for ECO4, GBIS and then a total row
remaining_has_aggregate = pd.DataFrame(
columns=["Scheme", "Asset List Size", "Sept Expected Sales", "Nov Expected Sales", "Forecasted Sales",
"Forecasted Change vs Sept",
"Sept Expected Value", "Nov Expected Value", "Forecasted Value", "Forecasted Value (original price)"],
data=[
[
"ECO4", asset_list_size, remaining_has_september_eco4_sales, remaining_has_november_eco4_sales,
remaining_has_forecasted_eco4_sales, remaining_has_foecast_change_eco4,
remaining_has_september_eco4_value,
remaining_has_november_eco4_value, remaining_has_forecasted_eco4_value,
remaining_has_forecasted_eco4_value_original_price
],
[
"GBIS", asset_list_size, remaining_has_september_gbis_sales, remaining_has_november_gbis_sales,
remaining_has_forecasted_gbis_sales, remaining_has_foecast_change_gbis,
remaining_has_september_gbis_value,
remaining_has_november_gbis_value, remaining_has_forecasted_gbis_value,
remaining_has_forecasted_gbis_value_original_price
],
[
"Total", asset_list_size, remaining_has_september_eco4_sales + remaining_has_september_gbis_sales,
remaining_has_november_eco4_sales + remaining_has_november_gbis_sales,
remaining_has_forecasted_eco4_sales + remaining_has_forecasted_gbis_sales,
remaining_has_foecast_change_total,
remaining_has_september_eco4_value + remaining_has_september_gbis_value,
remaining_has_november_eco4_value + remaining_has_november_gbis_value,
remaining_has_forecasted_eco4_value + remaining_has_forecasted_gbis_value,
remaining_has_forecasted_eco4_value_original_price +
remaining_has_forecasted_gbis_value_original_price
]
]
)
# Calculate pipeline value
pipeline_value = aggregates[["Scheme", "Completed Revenue", "Forecasted Remaining Revenue"]].merge(
remaining_has_aggregate[["Scheme", "Forecasted Value"]].rename(
columns={"Forecasted Value": "Forecasted Revenue, Unconfirmed HAs"}
), how="inner", on="Scheme"
)
# Calculate the total
pipeline_value["Total Value"] = (
pipeline_value["Completed Revenue"] + pipeline_value["Forecasted Remaining Revenue"] + pipeline_value[
"Forecasted Revenue, Unconfirmed HAs"]
)
# TODO: Insert model figures
model_results = pd.DataFrame(
[
{
# This one, we don't have sales data
"HA Name": "HA 15",
"Model Expected Additional ECO4 (unit level)": None,
"Model Expected Total ECO4 (unit level)": 296,
"Model Expected Additional GBIS (unit level)": None,
"Model Expected Total GBIS (unit level)": 209,
},
{
"HA Name": "HA 16",
# Old before re-run
# "Model Expected Additional ECO4 (unit level)": 418,
# "Model Expected Total ECO4 (unit level)": 1820,
# "Model Expected Additional GBIS (unit level)": 576,
# "Model Expected Total GBIS (unit level)": 612,
# IN the partial sales data, WFT have completed 1407 ECO4, 36 GBIS
"Model Expected Additional ECO4 (unit level)": 411 + 342 + 235,
"Model Expected Total ECO4 (unit level)": 1407 + 411 + 342 + 235,
"Model Expected Additional GBIS (unit level)": 223,
"Model Expected Total GBIS (unit level)": 36 + 223,
},
{
"HA Name": "HA 24",
"Model Expected Additional ECO4 (unit level)": 224,
"Model Expected Total ECO4 (unit level)": 848,
"Model Expected Additional GBIS (unit level)": 552,
"Model Expected Total GBIS (unit level)": 552,
},
{
"HA Name": "HA 25",
"Model Expected Additional ECO4 (unit level)": None,
"Model Expected Total ECO4 (unit level)": 1709 + 59,
"Model Expected Additional GBIS (unit level)": None,
"Model Expected Total GBIS (unit level)": 2004 + 107,
}
]
)
sales_data_formatted["Remaining ECO4 Sales"] = (
sales_data_formatted["Forecasted ECO4 Sales"] - sales_data_formatted["Oct ECO4 Sales (adjusted for variance)"]
)
sales_data_formatted["Remaining GBIS Sales"] = (
sales_data_formatted["Forecasted GBIS Sales"] - sales_data_formatted["Oct GBIS Sales (adjusted for variance)"]
)
sales_data_formatted["Completed ECO4 Revenue"] = (sales_data_formatted[
"Oct ECO4 Sales (adjusted for variance)"] *
historical_eco4_price)
sales_data_formatted["Completed GBIS Revenue"] = (sales_data_formatted[
"Oct GBIS Sales (adjusted for variance)"] *
historical_gbis_price)
ha_subset_with_sales = ["HA 15", "HA 16", "HA 24"]
has_subset_with_sales_value = sales_data_formatted[
sales_data_formatted["HA Name"].isin(ha_subset_with_sales)
].copy()[
[
"HA Name",
"Oct ECO4 Sales (adjusted for variance)",
"Oct GBIS Sales (adjusted for variance)",
"Remaining ECO4 Sales",
"Remaining GBIS Sales",
"Forecasted ECO4 Sales",
"Forecasted GBIS Sales",
"Completed ECO4 Revenue",
"Completed GBIS Revenue"
]
]
has_subset_with_sales_value["Remaining ECO4 Revenue"] = has_subset_with_sales_value[
"Remaining ECO4 Sales"] * ECO4_NEW_RATES
has_subset_with_sales_value["Remaining GBIS Revenue"] = has_subset_with_sales_value[
"Remaining GBIS Sales"] * GBIS_NEW_RATES
has_subset_with_sales_value["Remaining Total Revenue"] = (
has_subset_with_sales_value["Remaining ECO4 Revenue"] + has_subset_with_sales_value["Remaining GBIS Revenue"]
)
model_results["Model Expected Additional ECO4 Revenue"] = (
model_results["Model Expected Additional ECO4 (unit level)"] * ECO4_NEW_RATES
)
model_results["Model Expected Additional GBIS revenue"] = (
model_results["Model Expected Additional GBIS (unit level)"] * GBIS_NEW_RATES
)
model_results["Model Expected Additional Total Revenue"] = (
model_results["Model Expected Additional ECO4 Revenue"] + model_results[
"Model Expected Additional GBIS revenue"]
)
# Show more columns with pandas
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
# Look at HA 16
ha16_model = model_results[model_results["HA Name"] == "HA 16"]
has_subset_with_sales_value[has_subset_with_sales_value["HA Name"] == "HA 16"]
# WFT: For HA 16: 4,598,190 ECO4, 57,000 GBIS
# Model:
# Look at HA 24
ha24_model = model_results[model_results["HA Name"] == "HA 24"]
has_subset_with_sales_value[has_subset_with_sales_value["HA Name"] == "HA 24"]
# Look at HA 15
ha15_data = has_subset_with_sales_value[has_subset_with_sales_value["HA Name"] == "HA 15"]
ha15_portfolio_value = ha15_data["Completed ECO4 Revenue"] + ha15_data[
"Completed GBIS Revenue"] + ha15_data["Remaining Total Revenue"]
# # This doesn't have sales data so in the model analysis, we just value the ha as a whole
ha15_model = model_results[model_results["HA Name"] == "HA 15"]
ha15_value = ha15_model["Model Expected Total ECO4 (unit level)"].iloc[0] * ECO4_NEW_RATES + \
ha15_model["Model Expected Total GBIS (unit level)"].iloc[0] * GBIS_NEW_RATES
model_results["Expected ECO4 Revenue"] = model_results["Model Expected Total ECO4 (unit level)"] * ECO4_NEW_RATES
model_results["Expected GBIS Revenue"] = model_results["Model Expected Total GBIS (unit level)"] * GBIS_NEW_RATES
model_results["Expected Total Revenue"] = model_results["Expected ECO4 Revenue"] + model_results[
"Expected GBIS Revenue"]
model_results[model_results["HA Name"].isin(["HA 15"])]
# We now create a final excel with all of the data
# We want:
# 1) aggregates
# 2) sales_data_formatted
# 3) remaining_has_aggregate
# 4) remaining_has
# 5) problem_has_summary
# Function to get the maximum column width
def get_col_widths(dataframe):
# First we find the maximum length of the index column
idx_max = max([len(str(s)) for s in dataframe.index.values] + [len(str(dataframe.index.name))])
# Then, we concatenate this to the max of the lengths of column name and its max value for each column, row-wise
return [idx_max] + [max(dataframe[col].astype(str).map(len).max(), len(col)) for col in dataframe.columns]
# Create a Pandas Excel writer using XlsxWriter as the engine
with pd.ExcelWriter('HA Pipeline Analysis.xlsx', engine='xlsxwriter') as writer:
# Write each dataframe to a different worksheet without the index
for df, sheet in [(aggregates, 'Forecasted Sales'),
(sales_data_formatted, 'Sales Data'),
(remaining_has_aggregate, 'Remaining HAs Value'),
(remaining_has, 'Remaining HAs data'),
(pipeline_value, 'Pipeline Value'),
(problem_has_summary, 'Problem HAs Analysis'),
(problem_has_data, 'Problem HAs Data')
]:
df.to_excel(writer, sheet_name=sheet, index=False)
# Auto-adjust columns' width
for i, width in enumerate(get_col_widths(df)):
writer.sheets[sheet].set_column(i, i, width)

View file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,113 @@
import openpyxl
import pandas as pd
import numpy as np
def get_excel_survey_list(workbook_path, worksheet_name=None):
survey_workbook = openpyxl.load_workbook(workbook_path)
if worksheet_name is not None:
survey_sheet = survey_workbook[worksheet_name]
else:
survey_sheet = survey_workbook.active
survey_rows = []
survey_colors = []
for row in survey_sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
row_data = [cell.value for cell in row] # This will get you the cell values
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
# row_color = COLOR_INDEX[row_color]
survey_rows.append(row_data)
survey_colors.append(row_color)
survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
survey_list["row_colour"] = survey_colors
return survey_list
def load_data():
# Load for HA 16 - ECO 4
ha16_survey_list = get_excel_survey_list('etl/eligibility/ha_15_32/HESTIA- HA 16 ECO4 SURVEY LIST.xlsx')
# Load for HA 24 - ECO 4
ha24_survey_list = get_excel_survey_list('etl/eligibility/ha_15_32/HESTIA - HA 24 ECO4 SURVEY LIST.xlsx')
# Load for HA 25 - ECO 3
ha25_survey_list = get_excel_survey_list(
'etl/eligibility/ha_15_32/HESTIA - HA 25 ECO3 SURVEY LIST.xlsx', worksheet_name="CAVITY"
)
# Remove columns with None column names
ha25_survey_list = ha25_survey_list.dropna(axis=1, how='all')
# Standardised this installation status columns
ha16_survey_list["survey_status"] = ha16_survey_list["INSTALLED OR CANCELLED"].copy()
ha16_survey_list["survey_status"] = ha16_survey_list["survey_status"].replace(
{
"NO UPDATE - CHECKED 2.10.23": "no update",
"NO UPDATE - CHECKED 18.12.23": "no update",
"INSTALLED": "installed",
"CANCELLED": "cancelled",
"LOFT STILL TO BE INSTALLED": "loft remaining",
}
)
ha24_survey_list["survey_status"] = ha24_survey_list["INSTALLED OR CANCELLED"].copy()
ha24_survey_list["survey_status"] = ha24_survey_list["survey_status"].replace(
{
"NO UPDATE - CHECKED 21.11.23": "no update",
"NO UPDATE - CHECKED 18.12.23": "no update",
"INSTALLED": "installed",
"CANCELLED": "cancelled",
"LOFT STILL TO BE INSTALLED": "loft remaining",
"SEE NOTES >>": "see notes",
}
)
# We need to prepare HA25 differently
ha25_survey_list["survey_status"] = np.where(
ha25_survey_list["row_colour"] == "FF7030A0", "installed",
np.where(ha25_survey_list["row_colour"] == "FF92D050", "installed",
np.where(ha25_survey_list["row_colour"] == "FFFF0000", "cancelled",
np.where(ha25_survey_list["row_colour"] == "FFFFFF00", "filler row - drop",
np.where(ha25_survey_list["row_colour"] == "FF38FD23", "installed", "unknown")
)
)
)
)
ha25_survey_list = ha25_survey_list[ha25_survey_list["survey_status"] != "filler row - drop"]
# We standardise the cancellation reasons - just create a new column
ha16_survey_list["cancellation_reason"] = ha16_survey_list["INSTALLERS NOTES ; REASONS FOR CANCELLATIONS"].copy()
ha24_survey_list["cancellation_reason"] = ha24_survey_list["INSTALLERS NOTES ; REASONS FOR CANCELLATIONS"].copy()
# There's no cancellation reason for HA25
ha25_survey_list["cancellation_reason"] = "No reason provided"
# Combine the dataframes
ha16_survey_list["HA"] = "HA 16"
ha24_survey_list["HA"] = "HA 24"
ha25_survey_list["HA"] = "HA 25"
cancellation_data = pd.concat(
[
ha16_survey_list[["HA", "survey_status", "cancellation_reason"]],
ha24_survey_list[["HA", "survey_status", "cancellation_reason"]],
ha25_survey_list[["HA", "survey_status", "cancellation_reason"]]
]
)
# Take just rows that we have a confirmed status for
cancellation_data = cancellation_data[~cancellation_data["survey_status"].isin(["no update", "loft remaining"])]
return cancellation_data
def app():
"""
This application is used to analyse the cancellation data provided by warmfront
:return:
"""
# This is cancellations of jobs that completed invasive surveys and the installer could not conclude the work
sales_cancellation_data = load_data()

View file

@ -0,0 +1,647 @@
import os
import msgpack
import openpyxl
from pathlib import Path
from datetime import datetime
import pandas as pd
import numpy as np
from utils.s3 import read_from_s3
from utils.logger import setup_logger
from dotenv import load_dotenv
from utils.s3 import read_dataframe_from_s3_parquet
from tqdm import tqdm
from backend.SearchEpc import SearchEpc
from etl.eligibility.Eligibility import Eligibility
from etl.eligibility.ha_15_32.app import prepare_model_data_row
from etl.epc.DataProcessor import DataProcessor
from etl.epc.settings import COLUMNS_TO_MERGE_ON
from backend.ml_models.api import ModelApi
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
from recommendations.recommendation_utils import calculate_cavity_age
from recommendation_utils import convert_thickness_to_numeric
import re
ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
logger = setup_logger()
load_dotenv(ENV_FILE)
def load_data():
# This asset list is spread across two sheets, which we need to combine
asset_list_filenames = [
"HESTIA - HA 16 ASSET LIST PART 1 OF 2.xlsx",
"HESTIA - HA 16 ASSET LIST PART 2 OF 2.xlsx",
]
# Prepare lists to collect rows data and their colors
rows_data = []
rows_colors = []
colnames = []
for asset_list_filename in asset_list_filenames:
workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/{asset_list_filename}')
sheet = workbook.active
sheet_colnames = [cell.value for cell in sheet[1]]
colnames.append(sheet_colnames)
for row in sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
row_data = [cell.value for cell in row] # This will get you the cell values
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
# row_color = COLOR_INDEX[row_color]
rows_data.append(row_data)
rows_colors.append(row_color)
asset_list = pd.DataFrame(rows_data, columns=colnames[0])
# Remove None columns
asset_list = asset_list.iloc[:, 0:12]
asset_list['row_color'] = rows_colors
asset_list["row_colour_name"] = np.where(
asset_list["row_color"] == "FFFF0000", "red",
np.where(asset_list["row_color"] == "FF92D050", "green", "yellow")
)
# Split up the address on commas, which is useful for matching later
split_addresses = asset_list['Address'].str.split(',', expand=True)
split_addresses.columns = ['temp', 'address2', 'address3', 'address4', 'address5']
asset_list = pd.concat([asset_list, split_addresses], axis=1)
# There is no commas separating house number and address 1
split_addresses2 = asset_list['temp'].str.split(' ', expand=True)
split_addresses2.columns = ['HouseNo', 'part1', 'part2', "part3", "part4"]
# We could re-concatenate but we only care about HouseNo for the moment
asset_list = pd.concat([asset_list, split_addresses2[["HouseNo"]]], axis=1)
# We now read in the survey list
survey_workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/HESTIA- HA 16 ECO4 SURVEY LIST.xlsx')
survey_sheet = survey_workbook.active
survey_rows = []
survey_colors = []
for row in survey_sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
row_data = [cell.value for cell in row] # This will get you the cell values
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
# row_color = COLOR_INDEX[row_color]
survey_rows.append(row_data)
survey_colors.append(row_color)
survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
# For the survey list, we don't need the colours, since there is a column called "INSTALLED OR CANCELLED"
# which describes the status of the property
survey_list["row_colour"] = survey_colors
survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(survey_list))]
# Tidy up the street/block name a bit
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("/", ", ")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.lower()
survey_list["Street / Block Name"] = np.where(
survey_list["Street / Block Name"] == "REEDS RD",
"Reeds ROAD",
survey_list["Street / Block Name"]
)
# Replace " rd " with "road"
survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(r'\brd\b', 'road', regex=True)
# Replace " , " with ", "
survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(
" , ", ', ',
)
# Fix "{place} ,{place}" with "{place}, {place}"
survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(r'\s*,\s*', ', ', regex=True)
# Strip whitespace
survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.strip()
# Correct errors
survey_list["Post Code"] = np.where(
survey_list["Post Code"] == "M38 0SA",
"M38 9SA",
survey_list["Post Code"]
)
survey_list["Post Code"] = np.where(
(survey_list["Street / Block Name"] == "nelson drive") & (survey_list["Post Code"] == "M44 5JE"),
"M44 5JF",
survey_list["Post Code"]
)
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("eccels", "eccles")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("chatley, road", "chatley road")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("vaughen", "Vaughan")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("cresent", "crescent")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("plantation road",
"plantation avenue")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("how clough drive",
"howclough drive")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brockhurst lane",
"brookhurst lane")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("biirch road",
"birch road")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hadson road",
"hodson road")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("harbonne avennue",
"narbonne avenue")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("cumberland road, cadishead",
"cumberland avenue, cadishead")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("aston field drive",
"ashton field drive")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("wedgewood road",
"wedgwood road")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hamilton close",
"hamilton avenue")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("lichens crescent, fitton hill",
"lichens crescent")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("south croft, fitton hill",
"south croft")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(", fitton hill", "")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("firtree dr", "fir tree avenue")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hawthorne road",
"hawthorn crescent")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("rein lee avenue",
"reins lee avenue")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("westerhill road",
"wester hill road")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("st martins road",
"saint martins road")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("timperley avenue",
"timperley close")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("eastwood road",
"eastwood avenue")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("new road", "new street")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("grassmere road",
"grasmere road")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hulton road",
"hulton avenue")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("beechfield avenue",
"beechfield road")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("princess avenue",
"princes avenue")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("edge ford crecent",
"edge fold crescent")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("conniston avenue",
"coniston avenue")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("blackthorne crescent",
"blackthorn crescent")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("wellstock road",
"wellstock lane")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brackley avenue",
"brackley street")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brook avenue swinton",
"brook avenue, swinton")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("green avenue swinton",
"green avenue, swinton")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("grasmere avenue wardley",
"grasmere avenue, wardley")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("mardale avenue wardle",
"mardale avenue, wardle")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("carleach grove",
"cartleach Grove")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("arbour grove",
"arbor Grove")
# Replacement for clively avenue 66-68
survey_list["NO."] = np.where(
survey_list["NO."] == "66-68",
"66",
survey_list["NO."]
)
# asset_list[asset_list["Address"].str.lower().str.contains("clively")]
# We now need to merge the survey list onto the asset list
# Could be easier just to do a search on each row, even though it's much slower
matched = []
for _, row in tqdm(survey_list.iterrows(), total=len(survey_list)):
house_number = row["NO."]
if isinstance(house_number, str):
house_number = house_number.lower()
# Filter on the first line of the address
df = asset_list[asset_list["Address"].str.lower().str.contains(row["Street / Block Name"].lower())].copy()
# df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
df = df[df["Address"].str.lower().str.contains(str(house_number))]
if df.shape[0] != 1:
df = df[df["HouseNo"] == str(house_number)]
if df.shape[0] != 1:
df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
if df.shape[0] != 1:
raise ValueError("Investigate")
matched.append(
{
"survey_key": row["survey_key"],
"matched_address": df["Address"].values[0],
"survey_house_no": row["NO."],
"survey_street_name": row["Street / Block Name"],
"survey_postcode": row["Post Code"],
"survey_status": row["INSTALLED OR CANCELLED"]
}
)
matched = pd.DataFrame(matched)
matched["warmfront_identified"] = True
# Combine asset list and surveys
data = asset_list.merge(
matched, how="left", left_on="Address", right_on="matched_address",
)
data["warmfront_identified"] = data["warmfront_identified"].fillna(False)
return data, survey_list
def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
scoring_data = []
results = []
nodata = []
property_type_lookup = {
'Semi Detached Bungalow': {"property-type": "Bungalow", "built-form": "Semi-Detached"},
'Mid Terraced House': {"property-type": "House", "built-form": "Mid-Terrace"},
'End Terraced House': {"property-type": "House", "built-form": "End-Terrace"},
'Low Rise Flat': {"property-type": "Flat", "built-form": "Mid-Terrace"},
'Semi-Detached House': {"property-type": "House", "built-form": "Semi-Detached"},
'Detached Bungalow': {"property-type": "Bungalow", "built-form": "Detached"},
'End Terraced Bungalow': {"property-type": "Bungalow", "built-form": "End-Terrace"},
'Mid Terraced Bungalow': {"property-type": "Bungalow", "built-form": "Mid-Terrace"},
'Medium Rise Flat': {"property-type": "Flat", "built-form": "Mid-Terrace"},
'Detached House': {"property-type": "House", "built-form": "Detached"},
'Cottage Flat': {"property-type": "Flat", "built-form": "Semi-Detached"},
'Maisonette Medium Rise': {"property-type": "Flat", "built-form": "Mid-Terrace"},
'Maisonette Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"},
'End Terraced Town House': {"property-type": "House", "built-form": "End-Terrace"},
'Flat Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"},
'Mid Terraced Town House': {"property-type": "House", "built-form": "Mid-Terrace"},
}
for index, property_meta in tqdm(data.iterrows(), total=len(data)):
searcher = SearchEpc(
address1=property_meta["HouseNo"],
postcode=property_meta["Postcode"],
auth_token=EPC_AUTH_TOKEN,
os_api_key=None,
full_address=property_meta["Address"]
)
searcher.ordnance_survey_client.property_type = property_type_lookup[property_meta["Type"]]["property-type"]
searcher.ordnance_survey_client.built_form = property_type_lookup[property_meta["Type"]]["built-form"]
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
nodata.append(property_meta)
continue
if searcher.newest_epc.get("estimated"):
# We insert the row ID as our proxy for UPRN
proxy_uprn = int(property_meta["row_id"].split("_")[1])
searcher.newest_epc["uprn"] = proxy_uprn
newest_epc = searcher.newest_epc
older_epcs = searcher.older_epcs
full_sap_epc = searcher.full_sap_epc
# We also want to get the penultimate epc
penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
if not penultimate_epc:
penultimate_epc = newest_epc
eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
eligibility.check_gbis_warmfront()
eligibility.check_eco4_warmfront()
if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
eligibility.check_gbis_warmfront()
eligibility.check_eco4_warmfront()
# If this is the case, we need to update the older epcs
# We don't update just to make data cleaning easier
if penultimate_epc.get("estimated") is None:
older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
# If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity
# Loft MUST be suitable
cavity_age = None
if (
eligibility.walls["is_cavity_wall"] and
eligibility.walls["is_filled_cavity"] and
eligibility.loft["suitability"] and
eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
):
# We check the age of the cavity and if it's particularly old, we flag it
cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
# Full checks
eligibility.check_gbis()
eligibility.check_eco4()
if eligibility.eco4_warmfront["eligible"]:
if eligibility.epc["uprn"] == "":
eligibility.epc["uprn"] = int(property_meta["row_id"].split("_")[1])
scoring_dictionary = prepare_model_data_row(
property_id=property_meta["row_id"],
modelling_epc=eligibility.epc,
cleaned=cleaned,
cleaning_data=cleaning_data,
created_at=created_at,
old_data=older_epcs,
full_sap_epc=full_sap_epc,
photo_supply_lookup=photo_supply_lookup,
floor_area_decile_thresholds=floor_area_decile_thresholds
)
scoring_data.extend(scoring_dictionary)
results.append(
{
"row_id": property_meta["row_id"],
"uprn": eligibility.epc["uprn"],
"Address": property_meta["Address"],
"Postcode": property_meta["Postcode"],
"property_type": eligibility.epc["property-type"],
"gbis_eligible": eligibility.gbis_warmfront,
"eco4_eligible": eligibility.eco4_warmfront["eligible"],
"eco4_message": eligibility.eco4_warmfront["message"],
"sap": float(eligibility.epc["current-energy-efficiency"]),
"gbis_eligible_future": eligibility.gbis["eligible"],
"gbis_eligible_future_message": eligibility.gbis["message"],
"eco4_eligible_future": eligibility.eco4["eligible"],
"eco4_eligible_future_message": eligibility.eco4["message"],
# Property components
"roof": eligibility.roof["clean_description"],
"walls": eligibility.walls["clean_description"],
"cavity_type": eligibility.cavity["type"],
"heating": eligibility.epc["mainheat-description"],
"tenure": eligibility.tenure,
"date_epc": eligibility.epc["lodgement-date"],
"loft_thickness": eligibility.roof["insulation_thickness"],
"cavity_age": cavity_age,
**eligibility.walls,
**eligibility.roof,
}
)
scoring_df = pd.DataFrame(scoring_data)
# Perform the same cleaning as in the model - first clean number of room variables though
scoring_df = DataProcessor.apply_averages_cleaning(
data_to_clean=scoring_df,
cleaning_data=cleaning_data,
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
)
scoring_df = DataProcessor.apply_averages_cleaning(
data_to_clean=scoring_df,
cleaning_data=cleaning_data,
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
).drop(columns=["LOCAL_AUTHORITY"])
scoring_df = DataProcessor.clean_missings_after_description_process(
scoring_df,
ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
"insulation_thickness" in c) or ("ENERGY_EFF" in c)]
)
scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
scoring_df["UPRN"] = scoring_df["UPRN"].astype(int)
model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
all_predictions = model_api.predict_all(
df=scoring_df,
bucket="retrofit-data-dev",
prediction_buckets={
"sap_change_predictions": "retrofit-sap-predictions-dev",
"heat_demand_predictions": "retrofit-heat-predictions-dev",
"carbon_change_predictions": "retrofit-carbon-predictions-dev"
}
)
predictions = all_predictions["sap_change_predictions"].copy()
results_df = pd.DataFrame(results)
predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
results_df[["row_id", "sap"]], how="left", on="row_id"
)
predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
results_df = results_df.merge(
predictions[["sap_uplift", "row_id"]],
how="left",
on="row_id"
)
results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
eligibility_assessment = []
for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
# The upgrade requirements are dependent on the current SAP
# If the property is an F or G, it only needs to upgrade to an %
if row["sap"] <= 38:
if row["post_install_sap"] >= 57:
eligibility_classification = "highest confidence"
elif row["post_install_sap"] >= 55:
eligibility_classification = "high confidence"
elif row["post_install_sap"] >= 53:
eligibility_classification = "medium confidence"
else:
eligibility_classification = "unlikely"
else:
if row["post_install_sap"] >= 71:
eligibility_classification = "highest confidence"
elif row["post_install_sap"] >= 69:
eligibility_classification = "high confidence"
elif row["post_install_sap"] >= 67:
eligibility_classification = "medium confidence"
else:
eligibility_classification = "unlikely"
eligibility_assessment.append(
{
"row_id": row["row_id"],
"eligibility_classification": eligibility_classification
}
)
eligibility_assessment = pd.DataFrame(eligibility_assessment)
results_df = results_df.merge(
eligibility_assessment, how="left", on="row_id"
)
return results_df, scoring_data, nodata
def analyse_results(results_df, data, survey_list):
analysis_data = data[["row_id", "survey_key", "warmfront_identified", "row_colour_name"]].merge(
results_df, how="left", on="row_id"
).merge(
survey_list[["survey_key", survey_list.columns[0]]].rename(columns={survey_list.columns[0]: "funding_scheme"}),
how="left", on="survey_key"
)
analysis_data["roof_insulation_thickness"] = np.where(
pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
)
analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
)
warmfront_sold_eco4 = analysis_data[
(analysis_data["warmfront_identified"] == True) & (
analysis_data["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"]))
] # 1407
warmfront_sold_gbis = analysis_data[
(analysis_data["warmfront_identified"] == True) & (
analysis_data["funding_scheme"].isin(["ECO4 GBIS (ECO+)"]))
]
ideal_eco4_warmfront_not_sold = analysis_data[
(analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
analysis_data["roof_insulation_thickness_numeric"] <= 100)
]
secondary_eco4_warmfront_not_sold = analysis_data[
(analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
analysis_data["roof_insulation_thickness_numeric"] > 100)
]
# underperforming cavities
underperforming_cavities = analysis_data[
(analysis_data["eco4_message"] == "Failed due to full cavity - check cavity age") & (
analysis_data["cavity_age"] > 10 * 365
) & (analysis_data["roof_insulation_thickness_numeric"] <= 100)
]
identified_gbis_not_sold = analysis_data[
(analysis_data["gbis_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
analysis_data["eco4_eligible"] == False
)
]
eco_eligible = analysis_data[analysis_data["eco4_eligible"] == True]
eco_ineligible = analysis_data[analysis_data["eco4_eligible"] == False]
eco_ineligible["eco4_message"].value_counts()
# SAP too high:
sap_too_high = eco_ineligible[eco_ineligible["eco4_message"] == "sap too high"].copy()
further_possibilities = sap_too_high[
sap_too_high["walls"].isin(
[
"Cavity wall, as built, insulated",
"Cavity wall, as built, no insulation",
"Cavity wall, as built, partial insulation",
"Cavity wall, no insulation",
"Cavity wall, partial insulation"
]
)
]
filled_cavities = eco_ineligible[
eco_ineligible["eco4_message"] == "sap too high"
]
warmfront_identified = analysis_data[analysis_data["warmfront_identified"]]
warmfront_identified["walls"].value_counts()
all_identified_gbis = analysis_data[
(analysis_data["warmfront_identified"] & analysis_data["funding_scheme"].isin(
["ECO4 GBIS (ECO+)"])) |
(analysis_data["gbis_eligible"] & analysis_data["eco4_eligible"].isin([False, None]))
]
empty_cavity_desriptions = [
"Cavity wall, as built, no insulation", "Cavity wall, as built, partial insulation",
"Cavity wall, no insulation", "Cavity wall, partial insulation"
]
empty_cavities = analysis_data[analysis_data["walls"].isin(empty_cavity_desriptions)]
remaining_empty = empty_cavities[~empty_cavities["warmfront_identified"]]
warmfront_identified = analysis_data[analysis_data["warmfront_identified"]]
# Of the ECO jobs, what proportion to we get right
warmfront_identified_eco = warmfront_identified[
warmfront_identified["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"])
]
eco_success_rate = warmfront_identified_eco["eco4_eligible"].sum() / warmfront_identified_eco.shape[0]
warmfront_identified_gbis = warmfront_identified[
warmfront_identified["funding_scheme"].isin(["ECO4 GBIS (ECO+)"])
]
gbis_success_rate = warmfront_identified_gbis["gbis_eligible"].sum() / warmfront_identified_gbis.shape[0]
# Additional identified
additional_identified_eco = analysis_data[
(analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False)
]
additional_identified_eco["eligibility_classification"].value_counts()
additional_identified_gbis = analysis_data[
(analysis_data["gbis_eligible"] == True) & (analysis_data["eco4_eligible"] == False) & (
analysis_data["warmfront_identified"] == False
)
].shape[0]
# Future
additional_identified_eco_future = analysis_data[
(analysis_data["eco4_eligible_future"] == True) & (analysis_data["warmfront_identified"] == False)
].shape[0]
additional_identified_gbis_future = analysis_data[
(analysis_data["gbis_eligible_future"] == True) & (analysis_data["eco4_eligible_future"] == False) & (
analysis_data["warmfront_identified"] == False
)
].shape[0]
def app():
data, survey_list = load_data()
data["row_id"] = ["ha16_" + str(i) for i in range(0, len(data))]
cleaned = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-dev"
)
cleaned = msgpack.unpackb(cleaned, raw=False)
cleaning_data = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
)
created_at = datetime.now().isoformat()
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
results_df, scoring_data, nodata = get_epc_data(
data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds
)
# Store
# Old file was ha16.pickle
# import pickle
# with open("ha16_10_jan.pickle", "wb") as f:
# pickle.dump(
# {
# "scoring_data": scoring_data,
# "results": results_df,
# "nodata": nodata
# }, f
# )
# Read pickle
# import pickle
# with open("ha16_10_jan.pickle", "rb") as f:
# saved = pickle.load(f)
# scoring_data = saved["scoring_data"]
# results_df = saved["results"]
# nodata = saved["nodata"]

View file

@ -0,0 +1,524 @@
import os
import msgpack
import openpyxl
from pathlib import Path
from datetime import datetime
import pandas as pd
import numpy as np
from utils.s3 import read_from_s3, read_dataframe_from_s3_parquet
from utils.logger import setup_logger
from dotenv import load_dotenv
from tqdm import tqdm
from backend.SearchEpc import SearchEpc
from etl.eligibility.Eligibility import Eligibility
from etl.eligibility.ha_15_32.app import prepare_model_data_row
from etl.epc.DataProcessor import DataProcessor
from etl.epc.settings import COLUMNS_TO_MERGE_ON
from backend.ml_models.api import ModelApi
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
from recommendations.recommendation_utils import calculate_cavity_age
from recommendation_utils import convert_thickness_to_numeric
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
logger = setup_logger()
load_dotenv(ENV_FILE)
def load_data():
workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/HESTIA - HA 24 ASSET LIST.xlsx')
sheet = workbook.active
sheet_colnames = [cell.value for cell in sheet[1]]
rows_data = []
rows_colors = []
for row in sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
row_data = [cell.value for cell in row] # This will get you the cell values
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
# row_color = COLOR_INDEX[row_color]
rows_data.append(row_data)
rows_colors.append(row_color)
asset_list = pd.DataFrame(rows_data, columns=sheet_colnames)
# Remove None columns
asset_list = asset_list.iloc[:, 0:10]
asset_list['row_color'] = rows_colors
asset_list["row_colour_name"] = np.where(
asset_list["row_color"] == "FFFF0000", "red",
np.where(asset_list["row_color"] == "FF92D050", "green", "yellow")
)
asset_list["row_colour_code"] = np.where(
asset_list["row_colour_name"] == "red", "does not meet criteria",
np.where(asset_list["row_colour_name"] == "green", "identified potential eco", "maybe in the future")
)
# The third column is listed as "Address" but it's actually the postcode". We have two Address columns so we
# change just the third
asset_list.columns.values[2] = "Postcode"
# Split up the address on commas, which is useful for matching later
split_addresses = asset_list['Address'].str.split(',', expand=True)
split_addresses.columns = ['temp', 'address2', 'address3', 'address4', 'address5', 'address6']
asset_list = pd.concat([asset_list, split_addresses], axis=1)
# There is no commas separating house number and address 1
split_addresses2 = asset_list['temp'].str.split(' ', expand=True)
split_addresses2.columns = ['HouseNo', 'part1', 'part2', "part3", "part4"]
# We could re-concatenate but we only care about HouseNo for the moment
asset_list = pd.concat([asset_list, split_addresses2[["HouseNo"]]], axis=1)
# Read in surveys
survey_workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/HESTIA - HA 24 ECO4 SURVEY LIST.xlsx')
survey_sheet = survey_workbook.active
survey_rows = []
survey_colors = []
for row in survey_sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
row_data = [cell.value for cell in row] # This will get you the cell values
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
# row_color = COLOR_INDEX[row_color]
survey_rows.append(row_data)
survey_colors.append(row_color)
survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
survey_list["row_colour"] = survey_colors
survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(survey_list))]
# Tidy up the street/block name a bit
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("/", ", ")
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.lower()
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.strip()
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
"council house, nidds lane", "nidds lane"
)
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
"wirral avenue", "wirrall avenue"
)
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
"st ives road", "st. ives crescent"
)
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
"sundringham road", "sandringham road"
)
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
"milton avenue", "milton road"
)
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
"st ives crescent", "st. ives crescent"
)
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
"council house, waterbelly lane", "waterbelly lane"
)
# Generally remove "councile house, " from the start of the street name
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
"council house, ", ""
)
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
"st. leodegars close", "st leodegars close"
)
# asset_list[asset_list["Address"].str.lower().str.contains("wirral")]["Address"]
# Drop all None rows
survey_list = survey_list[~pd.isnull(survey_list["Street / Block Name"])]
survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(survey_list))]
matched = []
for _, row in tqdm(survey_list.iterrows(), total=len(survey_list)):
house_number = row["NO."]
if isinstance(house_number, str):
house_number = house_number.lower()
# Filter on the first line of the address
df = asset_list[asset_list["Address"].str.lower().str.contains(row["Street / Block Name"].lower())].copy()
# df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
df = df[df["Address"].str.lower().str.contains(str(house_number))]
if df.shape[0] != 1:
df = df[df["HouseNo"] == str(house_number)]
if df.shape[0] != 1:
df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
if df.shape[0] != 1:
print(row["Street / Block Name"])
print(house_number)
print(row["Post Code"].lower())
raise ValueError("Investigate")
matched.append(
{
"survey_key": row["survey_key"],
"matched_address": df["Address"].values[0],
"survey_house_no": row["NO."],
"survey_street_name": row["Street / Block Name"],
"survey_postcode": row["Post Code"],
"survey_status": row["INSTALLED OR CANCELLED"]
}
)
matched = pd.DataFrame(matched)
matched["warmfront_identified"] = True
# Combine asset list and surveys
data = asset_list.merge(
matched, how="left", left_on="Address", right_on="matched_address",
)
data["warmfront_identified"] = data["warmfront_identified"].fillna(False)
return data, survey_list
def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
scoring_data = []
results = []
nodata = []
property_type_lookup = {
"01 HOUSE": "House",
"02 FLAT": "Flat",
"03 BUNGALOW": "Bungalow",
"05 BEDSIT": "Flat",
"04 MAISONETTE": "Maisonette",
"01 HOUSE MID": "House",
"10 PBUNGALOW": "Bungalow",
"14 SFLAT": "Flat",
"12 SBEDSIT": "Flat",
"11 PFLAT": "Flat",
"13 SBUNGALOW": "Bungalow",
" 01 HOUSE MID": "House",
"09 PBEDSIT": "Flat"
}
for _, property_meta in tqdm(data.iterrows(), total=len(data)):
searcher = SearchEpc(
address1=property_meta["HouseNo"],
postcode=property_meta["Postcode"],
auth_token=EPC_AUTH_TOKEN,
os_api_key=None,
full_address=property_meta["Address"]
)
searcher.ordnance_survey_client.property_type = property_type_lookup[property_meta["Property Type"]]
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
nodata.append(property_meta)
continue
newest_epc = searcher.newest_epc
older_epcs = searcher.older_epcs
full_sap_epc = searcher.full_sap_epc
# We also want to get the penultimate epc
penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
if not penultimate_epc:
penultimate_epc = newest_epc
eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
eligibility.check_gbis_warmfront()
eligibility.check_eco4_warmfront()
if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
eligibility.check_gbis_warmfront()
eligibility.check_eco4_warmfront()
# If this is the case, we need to update the older epcs
# older_epcs = [
# x for x in older_epcs if x["lmk-key"] not in [newest_epc["lmk-key"], penultimate_epc["lmk-key"]]
# ]
# If this is the case, we need to update the older epcs
# We don't update just to make data cleaning easier
if penultimate_epc.get("estimated") is None:
older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
# Loft MUST be suitable
cavity_age = None
if (
eligibility.walls["is_cavity_wall"] and
eligibility.walls["is_filled_cavity"] and
eligibility.loft["suitability"] and
eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
):
# We check the age of the cavity and if it's particularly old, we flag it
cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
# Full checks
eligibility.check_gbis()
eligibility.check_eco4()
if eligibility.eco4_warmfront["eligible"]:
if eligibility.epc["uprn"] in ["", None]:
eligibility.epc["uprn"] = int(property_meta["row_id"].split("_")[1])
scoring_dictionary = prepare_model_data_row(
property_id=property_meta["row_id"],
modelling_epc=eligibility.epc,
cleaned=cleaned,
cleaning_data=cleaning_data,
created_at=created_at,
old_data=older_epcs,
full_sap_epc=full_sap_epc,
photo_supply_lookup=photo_supply_lookup,
floor_area_decile_thresholds=floor_area_decile_thresholds
)
scoring_data.extend(scoring_dictionary)
results.append(
{
"row_id": property_meta["row_id"],
"uprn": eligibility.epc["uprn"],
"Address": property_meta["Address"],
"Postcode": property_meta["Postcode"],
"property_type": eligibility.epc["property-type"],
"gbis_eligible": eligibility.gbis_warmfront,
"eco4_eligible": eligibility.eco4_warmfront["eligible"],
"eco4_message": eligibility.eco4_warmfront["message"],
"sap": float(eligibility.epc["current-energy-efficiency"]),
"gbis_eligible_future": eligibility.gbis["eligible"],
"gbis_eligible_future_message": eligibility.gbis["message"],
"eco4_eligible_future": eligibility.eco4["eligible"],
"eco4_eligible_future_message": eligibility.eco4["message"],
# Property components
"roof": eligibility.roof["clean_description"],
"walls": eligibility.walls["clean_description"],
"cavity_type": eligibility.cavity["type"],
"heating": eligibility.epc["mainheat-description"],
"tenure": eligibility.tenure,
"date_epc": eligibility.epc["lodgement-date"],
"cavity_age": cavity_age,
**eligibility.walls,
**eligibility.roof,
}
)
scoring_df = pd.DataFrame(scoring_data)
# Perform the same cleaning as in the model - first clean number of room variables though
scoring_df = DataProcessor.apply_averages_cleaning(
data_to_clean=scoring_df,
cleaning_data=cleaning_data,
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
)
scoring_df = DataProcessor.apply_averages_cleaning(
data_to_clean=scoring_df,
cleaning_data=cleaning_data,
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
).drop(columns=["LOCAL_AUTHORITY"])
scoring_df = DataProcessor.clean_missings_after_description_process(
scoring_df,
ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
"insulation_thickness" in c) or ("ENERGY_EFF" in c)]
)
scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
scoring_df["UPRN"] = scoring_df["UPRN"].astype(int)
model_api = ModelApi(portfolio_id="ha24-eligibility", timestamp=created_at)
all_predictions = model_api.predict_all(
df=scoring_df,
bucket="retrofit-data-dev",
prediction_buckets={
"sap_change_predictions": "retrofit-sap-predictions-dev",
"heat_demand_predictions": "retrofit-heat-predictions-dev",
"carbon_change_predictions": "retrofit-carbon-predictions-dev"
}
)
predictions = all_predictions["sap_change_predictions"].copy()
results_df = pd.DataFrame(results)
predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
results_df[["row_id", "sap"]], how="left", on="row_id"
)
predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
results_df = results_df.merge(
predictions[["sap_uplift", "row_id"]],
how="left",
on="row_id"
)
results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
eligibility_assessment = []
for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
# The upgrade requirements are dependent on the current SAP
# If the property is an F or G, it only needs to upgrade to an %
if row["sap"] <= 38:
if row["post_install_sap"] >= 57:
eligibility_classification = "highest confidence"
elif row["post_install_sap"] >= 55:
eligibility_classification = "high confidence"
elif row["post_install_sap"] >= 53:
eligibility_classification = "medium confidence"
else:
eligibility_classification = "unlikely"
else:
if row["post_install_sap"] >= 71:
eligibility_classification = "highest confidence"
elif row["post_install_sap"] >= 69:
eligibility_classification = "high confidence"
elif row["post_install_sap"] >= 67:
eligibility_classification = "medium confidence"
else:
eligibility_classification = "unlikely"
eligibility_assessment.append(
{
"row_id": row["row_id"],
"eligibility_classification": eligibility_classification
}
)
eligibility_assessment = pd.DataFrame(eligibility_assessment)
results_df = results_df.merge(
eligibility_assessment, how="left", on="row_id"
)
return results_df, scoring_data, nodata
def analyse_results(results_df, data, survey_list):
analysis_data = data[["row_id", "survey_key", "warmfront_identified"]].merge(
results_df, how="left", on="row_id"
).merge(
survey_list[["survey_key", survey_list.columns[0]]].rename(columns={survey_list.columns[0]: "funding_scheme"}),
how="left", on="survey_key"
)
# NEW
analysis_data["roof_insulation_thickness"] = np.where(
pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
)
analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
)
warmfront_sold_eco4 = analysis_data[
(analysis_data["warmfront_identified"] == True) & (
analysis_data["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"]))
]
warmfront_sold_gbis = analysis_data[
(analysis_data["warmfront_identified"] == True) & (
analysis_data["funding_scheme"].isin(["ECO4 GBIS (ECO+)"]))
]
# 1407
additional_eco4_warmfront_not_sold = analysis_data[
(analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
analysis_data["roof_insulation_thickness_numeric"] <= 100)
]
additional_gbis_warmfront_not_sold = analysis_data[
(analysis_data["gbis_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
~analysis_data["row_id"].isin(additional_eco4_warmfront_not_sold["row_id"].values)
)
]
additional_gbis_warmfront_not_sold["walls"].value_counts()
analysis_data["walls"].value_counts()
# END NEW
all_identified_eco = analysis_data[
(analysis_data["warmfront_identified"] & analysis_data["funding_scheme"].isin(
["ECO4 A/W"])) |
(analysis_data["eco4_eligible"])
]
all_identified_gbis = analysis_data[
(analysis_data["warmfront_identified"] & analysis_data["funding_scheme"].isin(
["ECO4 GBIS (ECO+)"])) |
(analysis_data["gbis_eligible"] & analysis_data["eco4_eligible"].isin([False, None]))
]
warmfront_identified = analysis_data[analysis_data["warmfront_identified"]]
# Of the ECO jobs, what proportion to we get right
warmfront_identified_eco = warmfront_identified[
warmfront_identified["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"])
]
eco_success_rate = warmfront_identified_eco["eco4_eligible"].sum() / warmfront_identified_eco.shape[0]
warmfront_identified_gbis = warmfront_identified[
warmfront_identified["funding_scheme"].isin(["ECO4 GBIS (ECO+)"])
]
# No gbis for this
# gbis_success_rate = warmfront_identified_gbis["gbis_eligible"].sum() / warmfront_identified_gbis.shape[0]
# Additional identified
additional_identified_eco = analysis_data[
(analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False)
]
additional_identified_eco["eligibility_classification"].value_counts()
additional_identified_gbis = analysis_data[
(analysis_data["gbis_eligible"] == True) & (analysis_data["eco4_eligible"] == False) & (
analysis_data["warmfront_identified"] == False
)
].shape[0]
# Future
additional_identified_eco_future = analysis_data[
(analysis_data["eco4_eligible_future"] == True) & (analysis_data["warmfront_identified"] == False)
].shape[0]
additional_identified_gbis_future = analysis_data[
(analysis_data["gbis_eligible_future"] == True) & (analysis_data["eco4_eligible_future"] == False) & (
analysis_data["warmfront_identified"] == False
)
].shape[0]
def app():
data, survey_list = load_data()
data["row_id"] = ["ha24_" + str(i) for i in range(0, len(data))]
cleaned = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-dev"
)
cleaned = msgpack.unpackb(cleaned, raw=False)
cleaning_data = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
)
created_at = datetime.now().isoformat()
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
results_df, scoring_data, nodata = get_epc_data(
data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds
)
# Pickle results just in case
# import pickle
# with open("ha24_10_jan.pickle", "wb") as f:
# pickle.dump(
# {
# "scoring_data": scoring_data,
# "results": results_df,
# "nodata": nodata
# }, f
# )
# Read in pickle
# import pickle
# with open("ha24_10_jan.pickle", "rb") as f:
# saved = pickle.load(f)
# scoring_data = saved["scoring_data"]
# results_df = saved["results"]
# nodata = saved["nodata"]

View file

@ -0,0 +1,883 @@
import os
import msgpack
import openpyxl
from pathlib import Path
from datetime import datetime
import pandas as pd
import numpy as np
from utils.s3 import read_from_s3
from utils.logger import setup_logger
from dotenv import load_dotenv
from utils.s3 import read_dataframe_from_s3_parquet
from tqdm import tqdm
from backend.SearchEpc import SearchEpc
from etl.eligibility.Eligibility import Eligibility
from etl.eligibility.ha_15_32.app import prepare_model_data_row
from etl.epc.DataProcessor import DataProcessor
from etl.epc.settings import COLUMNS_TO_MERGE_ON
from backend.ml_models.api import ModelApi
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
from recommendations.recommendation_utils import calculate_cavity_age
from recommendation_utils import convert_thickness_to_numeric
import re
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
logger = setup_logger()
load_dotenv(ENV_FILE)
def load_data():
workbook = openpyxl.load_workbook('etl/eligibility/ha_15_32/HESTIA - HA 25 ASSET LIST.xlsx', data_only=True)
sheet = workbook.active
rows_data = []
rows_colors = []
for row in sheet.iter_rows(min_row=1, values_only=True): # use values_only=True to get values
row_data = list(row) # No need for comprehension, values_only=True returns a tuple of values
rows_data.append(row_data)
# Headers are on the final row. Pop them off and store them and then remove them from rows_data
headers = rows_data.pop()
# The postcode header is None, so we replace it with "postcode"
headers[-1] = "postcode"
# Handle colours separately
for row in sheet.iter_rows(min_row=1, values_only=False):
# Assume first cell color is indicative of entire row
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
rows_colors.append(row_color)
# Remove the final row of colours, which is the header
rows_colors.pop()
asset_list = pd.DataFrame(rows_data, columns=headers)
asset_list['row_color'] = rows_colors
asset_list["row_colour_name"] = np.where(
asset_list["row_color"] == "FFFF0000", "red",
np.where(asset_list["row_color"] == "FF00B050", "green", "yellow")
)
asset_list["row_colour_code"] = np.where(
asset_list["row_colour_name"] == "red", "does not meet criteria",
np.where(asset_list["row_colour_name"] == "green", "identified potential eco", "maybe in the future")
)
asset_list["address"] = asset_list["T1_Address"].copy().str.lower()
asset_list["address"] = asset_list["address"].str.replace("flat", "")
asset_list["address"] = asset_list["address"].str.strip()
split_addresses = asset_list['address'].str.split(' ', expand=True)
split_addresses.columns = ['HouseNo', 'address2', 'address3', 'address4', 'address5', 'address6', 'address7',
'address8',
'address9', 'address10', 'address11', 'address12', 'address13', 'address14', ]
split_addresses["HouseNo"] = split_addresses["HouseNo"].str.replace(";", "")
# We could re-concatenate but we only care about HouseNo for the moment
asset_list = pd.concat([asset_list, split_addresses[["HouseNo"]]], axis=1)
asset_list["postcode"] = asset_list["postcode"].str.strip()
# We analysis historical ECO3 survey list
eco3_survey_workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/HESTIA - HA 25 ECO3 SURVEY LIST.xlsx')
eco3_survey_sheet = eco3_survey_workbook["CAVITY"]
eco3_survey_rows = []
eco3_survey_colors = []
for row in eco3_survey_sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
row_data = [cell.value for cell in row] # This will get you the cell values
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
# row_color = COLOR_INDEX[row_color]
eco3_survey_rows.append(row_data)
eco3_survey_colors.append(row_color)
# Some adhoc analysis on the eco3 survey list, just to get completion and cancellation rates historically
eco3_survey_list = pd.DataFrame(eco3_survey_rows, columns=[cell.value for cell in eco3_survey_sheet[1]])
eco3_survey_list["row_colour"] = eco3_survey_colors
# Remove rows where street name is missing
eco3_survey_list = eco3_survey_list[~pd.isnull(eco3_survey_list["Street / Block Name"])]
# We need to parse the row colours
# We have the following mappings:
# FF7030A0: purple
# FF92D050: green
# FFFF0000: red
# FFFFFF00: yellow
# FF38FD23: green
eco3_survey_list["row_colour_name"] = np.where(
eco3_survey_list["row_colour"] == "FF7030A0", "purple",
np.where(eco3_survey_list["row_colour"] == "FF92D050", "green",
np.where(eco3_survey_list["row_colour"] == "FFFF0000", "red",
np.where(eco3_survey_list["row_colour"] == "FFFFFF00", "yellow",
np.where(eco3_survey_list["row_colour"] == "FF38FD23", "green", "unknown")
)
)
)
)
# We map the meaning:
# red: cancelled
# green: installed advised install complete
# purple: installer advised install complete + post works EPC
# yellow: filler row - drop
eco3_survey_list["row_colour_code"] = np.where(
eco3_survey_list["row_colour_name"] == "red", "cancelled",
np.where(eco3_survey_list["row_colour_name"] == "green", "installed advised install complete",
np.where(eco3_survey_list["row_colour_name"] == "purple",
"installer advised install complete + post works EPC",
np.where(eco3_survey_list["row_colour_name"] == "yellow", "filler row - drop", "unknown")
)
)
)
# This is good enough for the indicative cancellation rates
# We now read in the indicative survey list which identified pospects for ECO4 works
eco4_survey_workbook = openpyxl.load_workbook(
f'etl/eligibility/ha_15_32/HESTIA - HA 25 ADHOC ISOLATED IDENTIFIED PROPERTIES FOR CWI.xlsx'
)
eco4_prospect_survey_sheet = eco4_survey_workbook["LiveWest"]
eco4_prospects_survey_rows = []
eco4_prospects_survey_colors = []
for row in eco4_prospect_survey_sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
row_data = [cell.value for cell in row] # This will get you the cell values
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
# row_color = COLOR_INDEX[row_color]
eco4_prospects_survey_rows.append(row_data)
eco4_prospects_survey_colors.append(row_color)
# Some adhoc analysis on the eco3 survey list, just to get completion and cancellation rates historically
eco4_prospects_survey_list = pd.DataFrame(
eco4_prospects_survey_rows, columns=[cell.value for cell in eco4_prospect_survey_sheet[1]]
)
eco4_prospects_survey_list["row_colour"] = eco4_prospects_survey_colors
eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.lower()
eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.strip()
eco4_prospects_survey_list = eco4_prospects_survey_list[~pd.isnull(eco4_prospects_survey_list["ADDRESS 1"])]
eco4_prospects_survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(eco4_prospects_survey_list))]
# Correct some errors in the survey list
eco4_prospects_survey_list["POSTCODE"] = np.where(
(eco4_prospects_survey_list["ADDRESS 1"] == "berry park") &
(eco4_prospects_survey_list["POSTCODE"] == "PL12 6HP"),
"PL12 6EN",
eco4_prospects_survey_list["POSTCODE"]
)
# Remove semi colons from address in asset and survey list
asset_list["T1_Address"] = asset_list["T1_Address"].str.replace(";", "")
eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.replace(";", "")
# In the prosepcts survey list, we have 6 WALKHAM MEADOWS listed twice, which should be 6a and 6b
eco4_prospects_survey_list.loc[838, "NO"] = "6a"
eco4_prospects_survey_list.loc[839, "NO"] = "6b"
# 3, 7, 9 BOLDVENTURE ROAD should be BOLDVENTURE CLOSE
eco4_prospects_survey_list["ADDRESS 1"] = np.where(
(eco4_prospects_survey_list["ADDRESS 1"] == "boldventure road") &
(eco4_prospects_survey_list["NO"].isin([3, 7, 9])),
"boldventure close",
eco4_prospects_survey_list["ADDRESS 1"]
)
eco4_prospects_survey_list["ADDRESS 1"] = np.where(
(eco4_prospects_survey_list["ADDRESS 1"] == "old farm road") & (
eco4_prospects_survey_list["POSTCODE"] == "PL5 1EP"),
"old school road",
eco4_prospects_survey_list["ADDRESS 1"]
)
eco4_prospects_survey_list["ADDRESS 1"] = np.where(
(eco4_prospects_survey_list["ADDRESS 1"] == "croft orchard") & (
eco4_prospects_survey_list["POSTCODE"] == "TQ12 6RP") & (
eco4_prospects_survey_list["NO"] == 52),
"drum way",
eco4_prospects_survey_list["ADDRESS 1"]
)
# String replace
eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.replace(
"the gulls, collaton road", "the gulls collaton road"
)
eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.replace(
"crows-an-eglose", "crows-an-eglos"
)
# We have a high volume of rows that do not match
matched = []
nomatch = []
for _, row in tqdm(eco4_prospects_survey_list.iterrows(), total=len(eco4_prospects_survey_list)):
# Not in the asset list
if (row["ADDRESS 1"] == "berry park") and row["NO"] in [40, 42] and row["POSTCODE"] == "PL12 6EN":
nomatch.append(row.to_dict())
continue
# Not in the asset list
if (row["ADDRESS 1"] == "roberts road") and row["NO"] == 23 and row["POSTCODE"] == "PL5 1DP":
nomatch.append(row.to_dict())
continue
# Not in the asset list
if row["ADDRESS 1"] in [
"kaynton mead", "broadmoor lane", "hoopers barton", "ecos court", "selwood road",
"castle street"
]:
nomatch.append(row.to_dict())
continue
house_number = row["NO"]
if isinstance(house_number, str):
house_number = house_number.lower()
if "flat" in house_number:
house_number = house_number.split("flat")[1].strip()
# Filter on the first line of the address
df = asset_list[asset_list["T1_Address"].str.lower().str.contains(row["ADDRESS 1"].lower())].copy()
if house_number is not None:
if df.shape[0] != 1:
df = df[df["T1_Address"].str.lower().str.contains(str(house_number))]
if df.shape[0] != 1:
if house_number is not None:
df = df[df["HouseNo"] == str(house_number)]
if df.shape[0] != 1:
if row["POSTCODE"] is not None:
df = df[df["postcode"].str.lower().str.contains(row["POSTCODE"].lower())]
if df.shape[0] != 1:
nomatch.append(row.to_dict())
continue
matched.append(
{
"survey_key": row["survey_key"],
"matched_address": df["T1_Address"].values[0],
"survey_house_no": row["NO"],
"survey_street_name": row["ADDRESS 1"],
"survey_postcode": row["POSTCODE"],
}
)
nomatch = pd.DataFrame(nomatch)
matched = pd.DataFrame(matched)
matched["warmfront_identified"] = True
# Combine asset list and surveys
data = asset_list.merge(
matched, how="left", left_on="T1_Address", right_on="matched_address",
)
data["warmfront_identified"] = data["warmfront_identified"].fillna(False)
lost_identified_properties = eco4_prospects_survey_list[
~eco4_prospects_survey_list["survey_key"].isin(matched["survey_key"])
]
return data, eco4_prospects_survey_list, lost_identified_properties
def map_year_to_age_band(year):
try:
year = int(year)
except ValueError:
return "Invalid Year" # Or any other way you want to handle invalid inputs
if year < 1900:
return "England and Wales: before 1900"
elif 1900 <= year <= 1929:
return "England and Wales: 1900-1929"
elif 1930 <= year <= 1949:
return "England and Wales: 1930-1949"
elif 1950 <= year <= 1966:
return "England and Wales: 1950-1966"
elif 1967 <= year <= 1975:
return "England and Wales: 1967-1975"
elif 1976 <= year <= 1982:
return "England and Wales: 1976-1982"
elif 1983 <= year <= 1990:
return "England and Wales: 1983-1990"
elif 1991 <= year <= 1995:
return "England and Wales: 1991-1995"
elif 1996 <= year <= 2002:
return "England and Wales: 1996-2002"
elif 2003 <= year <= 2006:
return "England and Wales: 2003-2006"
elif 2007 <= year <= 2011:
return "England and Wales: 2007-2011"
else: # Assuming all remaining years are 2012 onwards
return "England and Wales: 2012 onwards"
def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
scoring_data = []
results = []
nodata = []
property_type_lookup = {
"Flat": {"property-type": "Flat", "built-form": None},
"Mid Terrace House": {"property-type": "House", "built-form": "Mid-Terrace"},
"End Terrace House": {"property-type": "House", "built-form": "End-Terrace"},
"Maisonnette": {"property-type": "Flat", "built-form": None},
"Semi Detached House": {"property-type": "House", "built-form": "Semi-Detached"},
"Detached House": {"property-type": "House", "built-form": "Detached"},
"Coach House": {"property-type": "House", "built-form": "Detached"},
"Bungalow": {"property-type": "Bungalow", "built-form": None},
"Detached Bungalow": {"property-type": "Bungalow", "built-form": "Detached"},
"House": {"property-type": "House", "built-form": None},
"Semi Detached Bung": {"property-type": "Bungalow", "built-form": "Semi-Detached"},
"Bedspace": {"property-type": None, "built-form": None},
"Office Buildings": {"property-type": None, "built-form": None},
"End Terrace Bungalow": {"property-type": "Bungalow", "built-form": "End-Terrace"},
"Mid Terrace Bungalow": {"property-type": "Bungalow", "built-form": "Mid-Terrace"},
"Bedsit": {"property-type": "Flat", "built-form": None},
"Mid Terrace Housekeeping": {"property-type": "House", "built-form": "Mid-Terrace"},
"Mid Terrace Housekeeping ": {"property-type": "House", "built-form": "Mid-Terrace"},
"End Terrace Housex": {"property-type": "House", "built-form": "End-Terrace"},
"Guest Room": {"property-type": None, "built-form": None}
}
for _, property_meta in tqdm(data, total=len(data)):
searcher = SearchEpc(
address1=property_meta["HouseNo"],
postcode=property_meta["postcode"],
auth_token=EPC_AUTH_TOKEN,
os_api_key=None,
full_address=property_meta["address"]
)
searcher.ordnance_survey_client.property_type = property_type_lookup[property_meta["T1_AssetType"]][
"property-type"]
searcher.ordnance_survey_client.built_form = property_type_lookup[property_meta["T1_AssetType"]]["built-form"]
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
nodata.append(property_meta)
continue
if searcher.newest_epc.get("estimated"):
# We insert the row ID as our proxy for UPRN
proxy_uprn = int(property_meta["row_id"].split("_")[1])
searcher.newest_epc["uprn"] = proxy_uprn
newest_epc = searcher.newest_epc
older_epcs = searcher.older_epcs
full_sap_epc = searcher.full_sap_epc
# We also want to get the penultimate epc
# penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
# if not penultimate_epc:
# penultimate_epc = newest_epc
eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
eligibility.check_gbis_warmfront()
eligibility.check_eco4_warmfront()
# if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
# eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
# eligibility.check_gbis_warmfront()
# eligibility.check_eco4_warmfront()
# # If this is the case, we need to update the older epcs
# # We don't update just to make data cleaning easier
# if penultimate_epc.get("estimated") is None:
# older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
# If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity
# Loft MUST be suitable
cavity_age = None
if (
eligibility.walls["is_cavity_wall"] and
eligibility.walls["is_filled_cavity"] and
eligibility.loft["suitability"] and
eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
):
# We check the age of the cavity and if it's particularly old, we flag it
cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
# Full checks
eligibility.check_gbis()
eligibility.check_eco4()
if eligibility.eco4_warmfront["eligible"]:
if eligibility.epc["uprn"] in ["", None]:
eligibility.epc["uprn"] = int(property_meta["row_id"].split("_")[1])
if eligibility.epc["construction-age-band"] in ["", None]:
eligibility.epc["construction-age-band"] = map_year_to_age_band(property_meta["Build Yr"])
# This is not the right place to do this but this is temp
if eligibility.epc["extension-count"] in ["", None]:
eligibility.epc["extension-count"] = 0
# Not in the right place but temp
if eligibility.epc["built-form"] in ["", None]:
if not older_epcs:
eligibility.epc["built-form"] = "Mid-Terrace"
scoring_dictionary = prepare_model_data_row(
property_id=property_meta["row_id"],
modelling_epc=eligibility.epc,
cleaned=cleaned,
cleaning_data=cleaning_data,
created_at=created_at,
old_data=older_epcs,
full_sap_epc=full_sap_epc,
photo_supply_lookup=photo_supply_lookup,
floor_area_decile_thresholds=floor_area_decile_thresholds,
)
scoring_data.extend(scoring_dictionary)
results.append(
{
"row_id": property_meta["row_id"],
"uprn": eligibility.epc["uprn"],
"Address": property_meta["T1_Address"],
"Postcode": property_meta["postcode"],
"property_type": eligibility.epc["property-type"],
"gbis_eligible": eligibility.gbis_warmfront,
"eco4_eligible": eligibility.eco4_warmfront["eligible"],
"eco4_message": eligibility.eco4_warmfront["message"],
"sap": float(eligibility.epc["current-energy-efficiency"]),
"gbis_eligible_future": eligibility.gbis["eligible"],
"gbis_eligible_future_message": eligibility.gbis["message"],
"eco4_eligible_future": eligibility.eco4["eligible"],
"eco4_eligible_future_message": eligibility.eco4["message"],
# Property components
"roof": eligibility.roof["clean_description"],
"walls": eligibility.walls["clean_description"],
"cavity_type": eligibility.cavity["type"],
"heating": eligibility.epc["mainheat-description"],
"tenure": eligibility.tenure,
"date_epc": eligibility.epc["lodgement-date"],
"cavity_age": cavity_age,
**eligibility.walls,
**eligibility.roof,
}
)
scoring_df = pd.DataFrame(scoring_data)
# Perform the same cleaning as in the model - first clean number of room variables though
scoring_df = DataProcessor.apply_averages_cleaning(
data_to_clean=scoring_df,
cleaning_data=cleaning_data,
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
)
scoring_df = DataProcessor.apply_averages_cleaning(
data_to_clean=scoring_df,
cleaning_data=cleaning_data,
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
).drop(columns=["LOCAL_AUTHORITY"])
scoring_df = DataProcessor.clean_missings_after_description_process(
scoring_df,
ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
"insulation_thickness" in c) or ("ENERGY_EFF" in c)]
)
scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
scoring_df["UPRN"] = scoring_df["UPRN"].astype(int)
model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
all_predictions = model_api.predict_all(
df=scoring_df,
bucket="retrofit-data-dev",
prediction_buckets={
"sap_change_predictions": "retrofit-sap-predictions-dev",
"heat_demand_predictions": "retrofit-heat-predictions-dev",
"carbon_change_predictions": "retrofit-carbon-predictions-dev"
}
)
predictions = all_predictions["sap_change_predictions"].copy()
results_df = pd.DataFrame(results)
predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
results_df[["row_id", "sap"]], how="left", on="row_id"
)
predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
results_df = results_df.merge(
predictions[["sap_uplift", "row_id"]],
how="left",
on="row_id"
)
results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
eligibility_assessment = []
for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
# The upgrade requirements are dependent on the current SAP
# If the property is an F or G, it only needs to upgrade to an %
if row["sap"] <= 38:
if row["post_install_sap"] >= 57:
eligibility_classification = "highest confidence"
elif row["post_install_sap"] >= 55:
eligibility_classification = "high confidence"
elif row["post_install_sap"] >= 53:
eligibility_classification = "medium confidence"
else:
eligibility_classification = "unlikely"
else:
if row["post_install_sap"] >= 71:
eligibility_classification = "highest confidence"
elif row["post_install_sap"] >= 69:
eligibility_classification = "high confidence"
elif row["post_install_sap"] >= 67:
eligibility_classification = "medium confidence"
else:
eligibility_classification = "unlikely"
eligibility_assessment.append(
{
"row_id": row["row_id"],
"eligibility_classification": eligibility_classification
}
)
eligibility_assessment = pd.DataFrame(eligibility_assessment)
results_df = results_df.merge(
eligibility_assessment, how="left", on="row_id"
)
return results_df, scoring_data, nodata
def get_epc_data_for_lost_surveys(
lost_identified_properties, cleaned, cleaning_data, created_at, photo_supply_lookup,
floor_area_decile_thresholds
):
lost_identified_properties["row_id"] = [
"lost_surveys_ha25_" + str(i) for i in range(0, len(lost_identified_properties))
]
scoring_data = []
results = []
nodata = []
property_type_lookup = {
"MID-TERRACE": {"property-type": "House", "built-form": "Mid-Terrace"},
"N/A": {"property-type": "House", "built-form": None},
"END-TERRACE": {"property-type": "House", "built-form": "End-Terrace"},
"GROUND-FLOOR": {"property-type": "House", "built-form": None},
"TOP-FLOOR": {"property-type": "House", "built-form": None},
"SEMI-DETACHED": {"property-type": "House", "built-form": "Semi-Detached"},
"MID-FLOOR": {"property-type": "House", "built-form": None},
"TOP-FLOOR FLAT": {"property-type": "House", "built-form": None},
"DETACHED": {"property-type": "House", "built-form": "Detached"},
"MID-FLOOR FLAT": {"property-type": "House", "built-form": None},
"SEMI- DETACHED": {"property-type": "House", "built-form": "Semi-Detached"},
"NO EPC ON GOV": {"property-type": "House", "built-form": None},
"Top-floor flat": {"property-type": "House", "built-form": None},
"GROUND-FLOOR FLAT": {"property-type": "House", "built-form": None},
"NOT ON GOV SITE": {"property-type": "House", "built-form": None}
}
for _, property_meta in tqdm(lost_identified_properties.iterrows(), total=len(lost_identified_properties)):
if property_meta["POSTCODE"] is None:
continue
full_address = ", ".join(
[str(x) for x in [
property_meta["NO"], property_meta["ADDRESS 1"], property_meta["ADDRESS 2"], property_meta["ADDRESS 3"]
] if x is not None]
)
searcher = SearchEpc(
address1=str(property_meta["NO"]),
postcode=property_meta["POSTCODE"],
auth_token=EPC_AUTH_TOKEN,
os_api_key=None,
full_address=full_address
)
property_type_key = property_meta["PROPERTY TYPE"]
if property_type_key is not None:
searcher.ordnance_survey_client.property_type = property_type_lookup[property_type_key.strip()][
"property-type"]
searcher.ordnance_survey_client.built_form = property_type_lookup[property_type_key.strip()][
"built-form"]
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
nodata.append(property_meta)
continue
if searcher.newest_epc.get("estimated"):
# We insert the row ID as our proxy for UPRN
proxy_uprn = int(property_meta["row_id"].split("_")[-1])
searcher.newest_epc["uprn"] = proxy_uprn
newest_epc = searcher.newest_epc
older_epcs = searcher.older_epcs
full_sap_epc = searcher.full_sap_epc
# We also want to get the penultimate epc
penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
if not penultimate_epc:
penultimate_epc = newest_epc
eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
eligibility.check_gbis_warmfront()
eligibility.check_eco4_warmfront()
if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
eligibility.check_gbis_warmfront()
eligibility.check_eco4_warmfront()
# If this is the case, we need to update the older epcs
# We don't update just to make data cleaning easier
if penultimate_epc.get("estimated") is None:
older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
# Full checks
eligibility.check_gbis()
eligibility.check_eco4()
if eligibility.eco4_warmfront["eligible"] & (eligibility.epc["construction-age-band"] not in ["", None]):
if eligibility.epc["uprn"] in ["", None]:
eligibility.epc["uprn"] = int(property_meta["row_id"].split("_")[1])
scoring_dictionary = prepare_model_data_row(
property_id=property_meta["row_id"],
modelling_epc=eligibility.epc,
cleaned=cleaned,
cleaning_data=cleaning_data,
created_at=created_at,
old_data=older_epcs,
full_sap_epc=full_sap_epc,
photo_supply_lookup=photo_supply_lookup,
floor_area_decile_thresholds=floor_area_decile_thresholds,
)
scoring_data.extend(scoring_dictionary)
results.append(
{
"row_id": property_meta["row_id"],
"uprn": eligibility.epc["uprn"],
"Address": property_meta["ADDRESS 1"],
"Postcode": property_meta["POSTCODE"],
"property_type": eligibility.epc["property-type"],
"gbis_eligible": eligibility.gbis_warmfront,
"eco4_eligible": eligibility.eco4_warmfront["eligible"],
"eco4_message": eligibility.eco4_warmfront["message"],
"sap": float(eligibility.epc["current-energy-efficiency"]),
"gbis_eligible_future": eligibility.gbis["eligible"],
"gbis_eligible_future_message": eligibility.gbis["message"],
"eco4_eligible_future": eligibility.eco4["eligible"],
"eco4_eligible_future_message": eligibility.eco4["message"],
# Property components
"roof": eligibility.roof["clean_description"],
"walls": eligibility.walls["clean_description"],
"cavity_type": eligibility.cavity["type"],
"heating": eligibility.epc["mainheat-description"],
"tenure": eligibility.tenure,
"date_epc": eligibility.epc["lodgement-date"],
**eligibility.walls,
**eligibility.roof,
}
)
scoring_df = pd.DataFrame(scoring_data)
# Perform the same cleaning as in the model - first clean number of room variables though
scoring_df = DataProcessor.apply_averages_cleaning(
data_to_clean=scoring_df,
cleaning_data=cleaning_data,
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
)
scoring_df = DataProcessor.apply_averages_cleaning(
data_to_clean=scoring_df,
cleaning_data=cleaning_data,
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
).drop(columns=["LOCAL_AUTHORITY"])
scoring_df = DataProcessor.clean_missings_after_description_process(
scoring_df,
ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
"insulation_thickness" in c) or ("ENERGY_EFF" in c)]
)
scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
scoring_df["UPRN"] = scoring_df["UPRN"].astype(int)
model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
all_predictions = model_api.predict_all(
df=scoring_df,
bucket="retrofit-data-dev",
prediction_buckets={
"sap_change_predictions": "retrofit-sap-predictions-dev",
"heat_demand_predictions": "retrofit-heat-predictions-dev",
"carbon_change_predictions": "retrofit-carbon-predictions-dev"
}
)
predictions = all_predictions["sap_change_predictions"].copy()
results_df = pd.DataFrame(results)
predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
results_df[["row_id", "sap"]], how="left", on="row_id"
)
predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
results_df = results_df.merge(
predictions[["sap_uplift", "row_id"]],
how="left",
on="row_id"
)
results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
eligibility_assessment = []
for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
# The upgrade requirements are dependent on the current SAP
# If the property is an F or G, it only needs to upgrade to an %
if row["sap"] <= 38:
if row["post_install_sap"] >= 57:
eligibility_classification = "highest confidence"
elif row["post_install_sap"] >= 55:
eligibility_classification = "high confidence"
elif row["post_install_sap"] >= 53:
eligibility_classification = "medium confidence"
else:
eligibility_classification = "unlikely"
else:
if row["post_install_sap"] >= 71:
eligibility_classification = "highest confidence"
elif row["post_install_sap"] >= 69:
eligibility_classification = "high confidence"
elif row["post_install_sap"] >= 67:
eligibility_classification = "medium confidence"
else:
eligibility_classification = "unlikely"
eligibility_assessment.append(
{
"row_id": row["row_id"],
"eligibility_classification": eligibility_classification
}
)
eligibility_assessment = pd.DataFrame(eligibility_assessment)
results_df = results_df.merge(
eligibility_assessment, how="left", on="row_id"
)
return results_df, scoring_data, nodata
def analyse_results(results_df, data, eco4_prospects_survey_list):
analysis_data = data[["row_id", "survey_key", "warmfront_identified"]].merge(
results_df, how="left", on="row_id"
)
analysis_data = analysis_data.merge(
eco4_prospects_survey_list[["survey_key", "ADDRESS 1", "NO", "POSTCODE"]],
how="left", on="survey_key"
)
# NEW
analysis_data["roof_insulation_thickness"] = np.where(
pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
)
analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
)
warmfront_identified = analysis_data[
(analysis_data["warmfront_identified"] == True)
] # 2204
# Because we don't know which property is for which scheme, we'll just look at what we found
ideal_eco4 = analysis_data[
(analysis_data["eco4_eligible"] == True) &
(analysis_data["roof_insulation_thickness_numeric"] <= 100) &
(analysis_data["sap"] <= 54)
] # 335
gbis = analysis_data[
(analysis_data["gbis_eligible"] == True) &
~analysis_data["row_id"].isin(ideal_eco4["row_id"].values)
]
ideal_eco4 = ideal_eco4[ideal_eco4["sap"] <= 54]
def analyse_lost_surveys(results_df):
results_df["roof_insulation_thickness"] = np.where(
pd.isnull(results_df["roof_insulation_thickness"]), None, results_df["roof_insulation_thickness"]
)
results_df["roof_insulation_thickness_numeric"] = results_df["roof_insulation_thickness"].apply(
lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
)
ideal_eco4 = results_df[
(results_df["eco4_eligible"] == True) &
(results_df["roof_insulation_thickness_numeric"] <= 100) &
(results_df["sap"] <= 54)
] # 25
gbis = results_df[
(results_df["gbis_eligible"] == True) &
~results_df["row_id"].isin(ideal_eco4["row_id"].values)
] # 82
def app():
data, eco4_prospects_survey_list, lost_identified_properties = load_data()
data["row_id"] = ["ha25_" + str(i) for i in range(0, len(data))]
cleaned = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-dev"
)
cleaned = msgpack.unpackb(cleaned, raw=False)
cleaning_data = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
)
created_at = datetime.now().isoformat()
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
results_df, scoring_data, nodata = get_epc_data(
data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds
)
# Pickle the outputs
# Old data was ha25.pickle
# import pickle
# with open("ha25_10_jan.pickle", "wb") as f:
# pickle.dump(
# {
# "results_df": results_df,
# "scoring_data": scoring_data,
# "nodata": nodata
# },
# f
# )
# Load in pickle
import pickle
with open("ha25_10_jan.pickle", "rb") as f:
saved = pickle.load(f)
results_df = saved["results_df"]
scoring_data = saved["scoring_data"]
nodata = saved["nodata"]

View file

@ -0,0 +1,326 @@
import msgpack
from pathlib import Path
from datetime import datetime
import pandas as pd
from utils.s3 import read_from_s3
from utils.logger import setup_logger
from dotenv import load_dotenv
from backend.app.utils import read_parquet_from_s3
from tqdm import tqdm
from backend.SearchEpc import SearchEpc
from etl.eligibility.Eligibility import Eligibility
from etl.eligibility.ha_15_32.app import prepare_model_data_row
from etl.epc.DataProcessor import DataProcessor
from etl.epc.settings import COLUMNS_TO_MERGE_ON
from backend.ml_models.api import ModelApi
import re
ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
logger = setup_logger()
load_dotenv(ENV_FILE)
def load_ha_33():
"""
Load HA33 data
:return:
"""
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
files = [
"HA 33 Assets 1 of 4.csv",
"HA 33 Assets 2 of 4.csv",
"HA 33 Assets 3 of 4.csv",
"HA 33 Assets 4 of 4.csv"
]
data = []
for file in files:
part = pd.read_csv(f"etl/eligibility/ha_15_32/{file}", low_memory=False)
cols_to_top = [c for c in part.columns if "Unnamed:" in c]
part = part.drop(columns=cols_to_top)
data.append(part)
data = pd.concat(data)
return data
def standardise_ha33(data):
data = data[~pd.isnull(data["ADDRESS"])]
split_addresses = data['ADDRESS'].str.split(',', expand=True)
split_addresses.columns = ['address1', 'address2', 'address3', 'address4', 'address5']
data = pd.concat([data, split_addresses], axis=1)
del split_addresses
# Using regex to replace 'FT {number}' or 'FT{number}', with '{number}'
data['address1'] = data['address1'].str.replace(r'FT\s*(\d+)', r'\1', regex=True)
data.columns = [col.strip() for col in data.columns]
# TODO: we have 23 THIRTY SEVENTH AVENUE, can we replace THIRTY SEVENTH with 37TH
return data
def get_ha_33data(data, cleaned, cleaning_data, created_at):
house_type_lookup = {
"Bungalow": "Bungalow",
"Flat": "Flat",
'House': "House",
'Maisonette': "Maisonette",
'Flalolflfp mujjjjunjimj': "Flat",
'STUDIO': "Flat",
}
# house = data[data["row_id"] == "h3390"].squeeze()
flat_pattern = r'flat\s+(\d+)'
# data = data[data["row_id"].isin(eco_row_ids)]
scoring_data = []
results = []
nodata = []
for _, house in tqdm(data.iterrows(), total=len(data)):
# Check if we gave a flat in address 3
if re.search(flat_pattern, house["address2"].lower(), re.IGNORECASE):
address1 = house["address2"].strip()
else:
address1 = house["address1"].strip()
# I.e. just a number
if len(address1) <= 3:
address1 = address1 + " " + house["address2"].strip()
searcher = SearchEpc(
address1=address1,
postcode=house["POST CODE"]
)
response = searcher.search()
if response["status"] == 204:
nodata.append(house["row_id"])
continue
newest_epc, older_epcs, _ = searcher.retrieve(
property_type=house_type_lookup.get(house["PROPERTY TYPE"], None),
address=house["ADDRESS"],
)
eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
eligibility.check_gbis_warmfront()
eligibility.check_eco4_warmfront()
# If the house is not identified, we do a full gbis and eco4 check
eligibility.check_gbis()
eligibility.check_eco4()
if eligibility.eco4_warmfront["eligible"]:
scoring_dictionary = prepare_model_data_row(
property_id=house["row_id"],
modelling_epc=eligibility.epc,
cleaned=cleaned,
cleaning_data=cleaning_data,
created_at=created_at
)
scoring_data.extend(scoring_dictionary)
# If nothing is eligible or gbis is eligible, then we make a record this
results.append(
{
"row_id": house["row_id"],
"gbis_eligible": eligibility.gbis_warmfront,
"eco4_eligible": eligibility.eco4_warmfront["eligible"],
"eco4_message": eligibility.eco4_warmfront["message"],
"sap": float(eligibility.epc["current-energy-efficiency"]),
"gbis_eligible_future": eligibility.gbis["eligible"],
"gbis_eligible_future_message": eligibility.gbis["message"],
"eco4_eligible_future": eligibility.eco4["eligible"],
"eco4_eligible_future_message": eligibility.eco4["message"],
# Property components
"roof": eligibility.roof["clean_description"],
"walls": eligibility.walls["clean_description"],
"heating": eligibility.epc["mainheat-description"],
"tenure": eligibility.tenure,
"date_epc": eligibility.epc["lodgement-date"],
}
)
# import pickle
# with open("ha33_results.pickle", "wb") as f:
# pickle.dump({
# "results": results,
# "scoring_data": scoring_data,
# "nodata": nodata
# }, f)
# with open("ha33_results.pickle", "rb") as f:
# data = pickle.load(f)
# results = data["results"]
# scoring_data = data["scoring_data"]
# nodata = data["nodata"]
scoring_df = pd.DataFrame(scoring_data)
# Implement the same process that is being used in the recommendation engine to cleaning scoring_df
# Perform the same cleaning as in the model - first clean number of room variables though
scoring_df = DataProcessor.apply_averages_cleaning(
data_to_clean=scoring_df,
cleaning_data=cleaning_data,
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
)
scoring_df = DataProcessor.apply_averages_cleaning(
data_to_clean=scoring_df,
cleaning_data=cleaning_data,
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
).drop(columns=["LOCAL_AUTHORITY"])
scoring_df = DataProcessor.clean_missings_after_description_process(
scoring_df,
ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
"insulation_thickness" in c) or ("ENERGY_EFF" in c)]
)
scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
all_predictions = model_api.predict_all(
df=scoring_df,
bucket="retrofit-data-dev",
prediction_buckets={
"sap_change_predictions": "retrofit-sap-predictions-dev",
"heat_demand_predictions": "retrofit-heat-predictions-dev",
"carbon_change_predictions": "retrofit-carbon-predictions-dev"
}
)
# merge the predictions onto the scoring_df
predictions = all_predictions["sap_change_predictions"].copy()
results_df = pd.DataFrame(results)
predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
results_df[["row_id", "sap"]], how="left", on="row_id"
)
predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
results_df = results_df.merge(
predictions[["sap_uplift", "row_id"]],
how="left",
on="row_id"
)
results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
eligibility_assessment = []
for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
# The upgrade requirements are dependent on the current SAP
# If the property is an F or G, it only needs to upgrade to an %
if row["sap"] <= 38:
if row["post_install_sap"] >= 57:
eligibility_classification = "highest confidence"
elif row["post_install_sap"] >= 55:
eligibility_classification = "high confidence"
elif row["post_install_sap"] >= 53:
eligibility_classification = "medium confidence"
else:
eligibility_classification = "unlikely"
else:
if row["post_install_sap"] >= 71:
eligibility_classification = "highest confidence"
elif row["post_install_sap"] >= 69:
eligibility_classification = "high confidence"
elif row["post_install_sap"] >= 67:
eligibility_classification = "medium confidence"
else:
eligibility_classification = "unlikely"
eligibility_assessment.append(
{
"row_id": row["row_id"],
"eligibility_classification": eligibility_classification
}
)
eligibility_assessment = pd.DataFrame(eligibility_assessment)
results_df = results_df.merge(
eligibility_assessment, how="left", on="row_id"
)
return results_df, scoring_data, nodata
def analyse_ha_33(results_df, data):
# results_df_social = results_df[results_df["tenure"] == "Rented (social)"]
#
# results_df_social["tenure"].value_counts()
data[data["row_id"].isin(results_df["row_id"].values)]["PROPERTY TYPE"].value_counts()
n_identified = (results_df["gbis_eligible"] | results_df["eco4_eligible"]).sum()
n_eco4 = results_df["eco4_eligible"].sum()
n_gbis = results_df[~results_df["eco4_eligible"]]["gbis_eligible"].sum()
eco_eligibile = results_df[results_df["eco4_eligible"]]
eco_eligibile["walls"].value_counts()
eco_eligibile["roof"].value_counts()
results_df[results_df["gbis_eligible"] | results_df["eco4_eligible"]]["tenure"].value_counts()
results_df_social["eligibility_classification"].value_counts()
future_possibilities_eco = results_df[
(results_df["eco4_eligible_future"] == True) & (~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
].copy()
future_possibilities_gbis = results_df[
(results_df["gbis_eligible_future"] == True) & (results_df["eco4_eligible_future"] == False) & (
~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
].copy()
def app():
"""
Because HA33 is large, we deal with it separately
:return:
"""
data = load_ha_33()
data = standardise_ha33(data)
data["row_id"] = ["h33" + str(i) for i in range(0, len(data))]
cleaned = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-dev"
)
cleaned = msgpack.unpackb(cleaned, raw=False)
cleaning_data = read_parquet_from_s3(
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
)
created_at = datetime.now().isoformat()
results_df, _, _ = get_ha_33data(data, cleaned, cleaning_data, created_at)
# Read in
import pickle
with open("ha33_results.pickle", "rb") as f:
data = pickle.load(f)
results_df = pd.DataFrame(data["results"])
scoring_data = data["scoring_data"]
nodata = data["nodata"]

View file

@ -0,0 +1,328 @@
import os
import msgpack
from pathlib import Path
from datetime import datetime
import numpy as np
import pandas as pd
from utils.s3 import read_from_s3
from utils.logger import setup_logger
from dotenv import load_dotenv
from utils.s3 import read_dataframe_from_s3_parquet
from tqdm import tqdm
from backend.SearchEpc import SearchEpc
from etl.eligibility.Eligibility import Eligibility
from etl.eligibility.ha_15_32.app import prepare_model_data_row
from etl.epc.DataProcessor import DataProcessor
from etl.epc.settings import COLUMNS_TO_MERGE_ON
from backend.ml_models.api import ModelApi
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
from recommendations.recommendation_utils import calculate_cavity_age
from recommendation_utils import convert_thickness_to_numeric
import re
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
logger = setup_logger()
load_dotenv(ENV_FILE)
def load_ha_4():
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
data = pd.read_csv(f"etl/eligibility/ha_15_32/HA 4 Asset List.csv", low_memory=False)
return data
def standardise_ha_4(data):
# Location name contains some strings like {0664} which we remove
data['Location Name'] = data['Location Name'].str.replace('\{.*?\}', '', regex=True)
# Trim whitespace from either end of location name
data["Location Name"] = data["Location Name"].str.strip()
# Remove any unusable postcodes
data = data[data["Post Code"] != '\\\\'].copy()
# Some specific replacements
data["Location Name"] = np.where(
data["Location Name"] == "Calderbrook Pl & Cog La",
"Calderbrook Place",
data["Location Name"]
)
return data
def get_ha_4_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
scoring_data = []
results = []
nodata = []
for _, property_meta in tqdm(data.iterrows(), total=len(data)):
# For many of the entries in this dataset, we're actually given an entire building, so we EPCs for every
# building
searcher = SearchEpc(
address1=property_meta["Address Line 1"],
postcode=property_meta["Post Code"],
auth_token=EPC_AUTH_TOKEN,
os_api_key=None,
property_type=property_type_lookup.get(house["Archetype"]),
)
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
searcher = SearchEpc(
address1=property_meta["Location Name"],
postcode=property_meta["Post Code"],
auth_token=EPC_AUTH_TOKEN,
os_api_key=None,
property_type=property_type_lookup.get(house["Archetype"]),
)
searcher.search()
if searcher.newest_epc is None:
nodata.append(house["row_id"])
continue
newest_epc = searcher.newest_epc
older_epcs = searcher.older_epcs
full_sap_epc = searcher.full_sap_epc
searcher.search()
if searcher.data is None:
nodata.append(property_meta.to_dict())
continue
epcs = searcher.data["rows"]
epcs = pd.DataFrame(epcs)
# Take the newest EPC by UPRN
epcs = epcs.sort_values(by=["lodgement-date"], ascending=False)
newest_epcs = epcs.drop_duplicates(subset=["uprn"], keep="first")
# For each EPC, we now check eligibility
for _, epc in newest_epcs.iterrows():
eligibility = Eligibility(epc=epc.to_dict(), cleaned=cleaned)
eligibility.check_gbis_warmfront()
eligibility.check_eco4_warmfront()
# If the house is not identified, we do a full gbis and eco4 check
eligibility.check_gbis()
eligibility.check_eco4()
if eligibility.eco4_warmfront["eligible"]:
# We get old_eps
old_data = epcs[
(epcs["uprn"] == epc["uprn"]) &
(epcs["lmk-key"] != epc["lmk-key"])
].to_dict("records")
full_sap_epc = epcs[
(epcs["uprn"] == epc["uprn"]) &
(epcs["transaction-type"] == "new dwelling")
].to_dict("records")
scoring_dictionary = prepare_model_data_row(
property_id=eligibility.epc["uprn"],
modelling_epc=eligibility.epc,
cleaned=cleaned,
cleaning_data=cleaning_data,
created_at=created_at,
old_data=old_data,
full_sap_epc=full_sap_epc
)
scoring_data.extend(scoring_dictionary)
results.append(
{
"uprn": epc["uprn"],
"Location Name": property_meta["Location Name"],
"Post Code": property_meta["Post Code"],
"property_type": eligibility.epc["property-type"],
"gbis_eligible": eligibility.gbis_warmfront,
"eco4_eligible": eligibility.eco4_warmfront["eligible"],
"eco4_message": eligibility.eco4_warmfront["message"],
"sap": float(eligibility.epc["current-energy-efficiency"]),
"gbis_eligible_future": eligibility.gbis["eligible"],
"gbis_eligible_future_message": eligibility.gbis["message"],
"eco4_eligible_future": eligibility.eco4["eligible"],
"eco4_eligible_future_message": eligibility.eco4["message"],
# Property components
"roof": eligibility.roof["clean_description"],
"walls": eligibility.walls["clean_description"],
"cavity_type": eligibility.cavity["type"],
"heating": eligibility.epc["mainheat-description"],
"tenure": eligibility.tenure,
"date_epc": eligibility.epc["lodgement-date"],
}
)
scoring_df = pd.DataFrame(scoring_data)
# Perform the same cleaning as in the model - first clean number of room variables though
scoring_df = DataProcessor.apply_averages_cleaning(
data_to_clean=scoring_df,
cleaning_data=cleaning_data,
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
)
scoring_df = DataProcessor.apply_averages_cleaning(
data_to_clean=scoring_df,
cleaning_data=cleaning_data,
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
).drop(columns=["LOCAL_AUTHORITY"])
scoring_df = DataProcessor.clean_missings_after_description_process(
scoring_df,
ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
"insulation_thickness" in c) or ("ENERGY_EFF" in c)]
)
scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
all_predictions = model_api.predict_all(
df=scoring_df,
bucket="retrofit-data-dev",
prediction_buckets={
"sap_change_predictions": "retrofit-sap-predictions-dev",
"heat_demand_predictions": "retrofit-heat-predictions-dev",
"carbon_change_predictions": "retrofit-carbon-predictions-dev"
}
)
predictions = all_predictions["sap_change_predictions"].copy()
results_df = pd.DataFrame(results)
predictions = predictions.rename(columns={"property_id": "uprn"}).merge(
results_df[["uprn", "sap"]], how="left", on="uprn"
)
predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
predictions = predictions.groupby("uprn")["sap_uplift"].sum().reset_index()
results_df = results_df.merge(
predictions[["sap_uplift", "uprn"]],
how="left",
on="uprn"
)
results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
results_df = results_df[~pd.isnull(results_df["uprn"])]
eligibility_assessment = []
for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
# The upgrade requirements are dependent on the current SAP
# If the property is an F or G, it only needs to upgrade to an %
if row["sap"] <= 38:
if row["post_install_sap"] >= 57:
eligibility_classification = "highest confidence"
elif row["post_install_sap"] >= 55:
eligibility_classification = "high confidence"
elif row["post_install_sap"] >= 53:
eligibility_classification = "medium confidence"
else:
eligibility_classification = "unlikely"
else:
if row["post_install_sap"] >= 71:
eligibility_classification = "highest confidence"
elif row["post_install_sap"] >= 69:
eligibility_classification = "high confidence"
elif row["post_install_sap"] >= 67:
eligibility_classification = "medium confidence"
else:
eligibility_classification = "unlikely"
eligibility_assessment.append(
{
"uprn": row["uprn"],
"eligibility_classification": eligibility_classification
}
)
eligibility_assessment = pd.DataFrame(eligibility_assessment)
results_df = results_df.merge(
eligibility_assessment, how="left", on="uprn"
)
# We have some properties that are duplicated so we take just one instance
results_df = results_df.drop_duplicates(subset=["uprn"])
return results_df, scoring_data, nodata
def analyse_ha_4(results_df, data):
n_identified = (results_df["gbis_eligible"] | results_df["eco4_eligible"]).sum()
n_eco4 = results_df["eco4_eligible"].sum()
n_gbis = results_df[~results_df["eco4_eligible"]]["gbis_eligible"].sum()
eco_eligibile = results_df[results_df["eco4_eligible"]]
eco_eligibile["eligibility_classification"].value_counts()
future_possibilities_eco = results_df[
(results_df["eco4_eligible_future"] == True) & (~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
].copy()
future_possibilities_gbis = results_df[
(results_df["gbis_eligible_future"] == True) & (results_df["eco4_eligible_future"] == False) & (
~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
].copy()
total_future_possibilities = future_possibilities_eco.shape[0] + future_possibilities_gbis.shape[0]
def app():
data = load_ha_4()
data = standardise_ha_4(data)
data["row_id"] = ["h4" + str(i) for i in range(0, len(data))]
cleaned = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-dev"
)
cleaned = msgpack.unpackb(cleaned, raw=False)
cleaning_data = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
)
created_at = datetime.now().isoformat()
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
results_df, scoring_data, nodata = get_ha_4_data(
data=data,
cleaned=cleaned,
cleaning_data=cleaning_data,
created_at=created_at,
photo_supply_lookup=photo_supply_lookup,
floor_area_decile_thresholds=floor_area_decile_thresholds
)
# Store the data locally as a pickle
# import pickle
# with open("ha_4.pickle", "wb") as f:
# pickle.dump(
# {
# "results_df": results_df,
# "scoring_data": scoring_data,
# "nodata": nodata
# }, f)
# Read in
# import pickle
# with open("ha_4.pickle", "rb") as f:
# data = pickle.load(f)
# results_df = data["results_df"]
# scoring_data = data["scoring_data"]
# nodata = data["nodata"]

View file

@ -0,0 +1,383 @@
import os
import msgpack
import openpyxl
from openpyxl.styles.colors import COLOR_INDEX
from pathlib import Path
from datetime import datetime
import pandas as pd
import numpy as np
from utils.s3 import read_from_s3, read_dataframe_from_s3_parquet
from utils.logger import setup_logger
from dotenv import load_dotenv
from tqdm import tqdm
from backend.SearchEpc import SearchEpc
from etl.eligibility.Eligibility import Eligibility
from etl.eligibility.ha_15_32.app import prepare_model_data_row
from etl.epc.DataProcessor import DataProcessor
from etl.epc.settings import COLUMNS_TO_MERGE_ON
from backend.ml_models.api import ModelApi
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
from recommendations.recommendation_utils import calculate_cavity_age
from recommendation_utils import convert_thickness_to_numeric
ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
logger = setup_logger()
load_dotenv(ENV_FILE)
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
OS_API_KEY = os.getenv("ORDNANCE_SURVEY_API_KEY")
def load_data():
"""
Load the data from the excel
"""
workbook = openpyxl.load_workbook('etl/eligibility/ha_15_32/HESTIA - HA 7 ASSET LIST.xlsx')
sheet = workbook.active
# Prepare lists to collect rows data and their colors
rows_data = []
rows_colors = []
for row in sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
row_data = [cell.value for cell in row] # This will get you the cell values
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
row_color = COLOR_INDEX[row_color]
rows_data.append(row_data)
rows_colors.append(row_color)
df = pd.DataFrame(rows_data, columns=[cell.value for cell in sheet[1]])
# Add the row colors as a new column
df['row_color'] = rows_colors
df.columns.values[8] = "is_active"
# Remove None columns
df = df.dropna(axis=1, how='all')
# We now parse the colours
df["row_color"].unique()
df["row_colour_name"] = np.where(
df["row_color"] == "0000FFFF", "red",
np.where(df["row_color"] == "00FF00FF", "green", "yellow")
)
df["row_code"] = np.where(
df["row_colour_name"] == "red", "invalid",
np.where(df["row_colour_name"] == "green", "potential ECO4", "needs criteria change")
)
return df
def get_ha7_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
property_type_lookup = {
# "Mid Terrace": "Mid-Terrace",
# "End Terrace": "End-Terrace",
# "Semi Detached": "Semi-Detached",
# "Detached": "Detached",
"House": "House",
"Flat": "Flat",
"Bungalow": "Bungalow",
"Maisonette": "Maisonette",
}
scoring_data = []
results = []
nodata = []
for _, house in tqdm(data.iterrows(), total=len(data)):
if house["Address"]:
address = house["Address"]
else:
address = house["Address2"]
searcher = SearchEpc(
address1=address,
postcode=house["Postcode"],
auth_token=EPC_AUTH_TOKEN,
os_api_key=None,
property_type=property_type_lookup.get(house["Archetype"]),
)
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
nodata.append(house["row_id"])
continue
newest_epc = searcher.newest_epc
older_epcs = searcher.older_epcs
full_sap_epc = searcher.full_sap_epc
eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
eligibility.check_gbis_warmfront()
eligibility.check_eco4_warmfront()
# If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity
# Loft MUST be suitable
cavity_age = None
if (
eligibility.walls["is_cavity_wall"] and
eligibility.walls["is_filled_cavity"] and
eligibility.loft["suitability"] and
eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
):
# We check the age of the cavity and if it's particularly old, we flag it
cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
# If the house is not identified, we do a full gbis and eco4 check
eligibility.check_gbis()
eligibility.check_eco4()
if eligibility.eco4_warmfront["eligible"]:
scoring_dictionary = prepare_model_data_row(
property_id=house["row_id"],
modelling_epc=eligibility.epc,
cleaned=cleaned,
cleaning_data=cleaning_data,
created_at=created_at,
old_data=older_epcs,
full_sap_epc=full_sap_epc,
photo_supply_lookup=photo_supply_lookup,
floor_area_decile_thresholds=floor_area_decile_thresholds
)
scoring_data.extend(scoring_dictionary)
# If nothing is eligible or gbis is eligible, then we make a record this
results.append(
{
"row_id": house["row_id"],
"address": house["Address"],
"postcode": house["Postcode"],
"gbis_eligible": eligibility.gbis_warmfront,
"eco4_eligible": eligibility.eco4_warmfront["eligible"],
"eco4_message": eligibility.eco4_warmfront["message"],
"sap": float(eligibility.epc["current-energy-efficiency"]),
"gbis_eligible_future": eligibility.gbis["eligible"],
"gbis_eligible_future_message": eligibility.gbis["message"],
"eco4_eligible_future": eligibility.eco4["eligible"],
"eco4_eligible_future_message": eligibility.eco4["message"],
# Property components
"roof": eligibility.roof["clean_description"],
"walls": eligibility.walls["clean_description"],
"heating": eligibility.epc["mainheat-description"],
"tenure": eligibility.tenure,
"date_epc": eligibility.epc["lodgement-date"],
**newest_epc,
"cavity_age": cavity_age,
**eligibility.walls,
**eligibility.roof,
}
)
scoring_df = pd.DataFrame(scoring_data)
# Implement the same process that is being used in the recommendation engine to cleaning scoring_df
# Perform the same cleaning as in the model - first clean number of room variables though
scoring_df = DataProcessor.apply_averages_cleaning(
data_to_clean=scoring_df,
cleaning_data=cleaning_data,
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
)
scoring_df = DataProcessor.apply_averages_cleaning(
data_to_clean=scoring_df,
cleaning_data=cleaning_data,
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
).drop(columns=["LOCAL_AUTHORITY"])
scoring_df = DataProcessor.clean_missings_after_description_process(
scoring_df,
ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
"insulation_thickness" in c) or ("ENERGY_EFF" in c)]
)
scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
all_predictions = model_api.predict_all(
df=scoring_df,
bucket="retrofit-data-dev",
prediction_buckets={
"sap_change_predictions": "retrofit-sap-predictions-dev",
"heat_demand_predictions": "retrofit-heat-predictions-dev",
"carbon_change_predictions": "retrofit-carbon-predictions-dev"
}
)
predictions = all_predictions["sap_change_predictions"].copy()
results_df = pd.DataFrame(results)
predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
results_df[["row_id", "sap"]], how="left", on="row_id"
)
predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
results_df = results_df.merge(
predictions[["sap_uplift", "row_id"]],
how="left",
on="row_id"
)
results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
eligibility_assessment = []
for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
# The upgrade requirements are dependent on the current SAP
# If the property is an F or G, it only needs to upgrade to an %
if row["sap"] <= 38:
if row["post_install_sap"] >= 57:
eligibility_classification = "highest confidence"
elif row["post_install_sap"] >= 55:
eligibility_classification = "high confidence"
elif row["post_install_sap"] >= 53:
eligibility_classification = "medium confidence"
else:
eligibility_classification = "unlikely"
else:
if row["post_install_sap"] >= 71:
eligibility_classification = "highest confidence"
elif row["post_install_sap"] >= 69:
eligibility_classification = "high confidence"
elif row["post_install_sap"] >= 67:
eligibility_classification = "medium confidence"
else:
eligibility_classification = "unlikely"
eligibility_assessment.append(
{
"row_id": row["row_id"],
"eligibility_classification": eligibility_classification
}
)
eligibility_assessment = pd.DataFrame(eligibility_assessment)
results_df = results_df.merge(
eligibility_assessment, how="left", on="row_id"
)
return results_df, scoring_data, nodata
def analyse_ha_7(results_df, data):
analysis_data = results_df.merge(
data[["row_id", "row_code", "Property Type", "Construction Year Band"]], how="left", on="row_id"
)
analysis_data["row_code"].value_counts()
# NEW
analysis_data["roof_insulation_thickness"] = np.where(
pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
)
analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
)
ideal_eco4 = analysis_data[
(analysis_data["eco4_eligible"] == True) & (
analysis_data["roof_insulation_thickness_numeric"] <= 100)
]
secondary_eco4_warmfront_not_sold = analysis_data[
(analysis_data["eco4_eligible"] == True) & (
analysis_data["roof_insulation_thickness_numeric"] > 100)
]
# underperforming cavities
underperforming_cavities = analysis_data[
(analysis_data["eco4_message"] == "Failed due to full cavity - check cavity age") & (
analysis_data["cavity_age"] > 9 * 365
) & (analysis_data["roof_insulation_thickness_numeric"] <= 100)
]
identified_gbis_not_sold = analysis_data[
(analysis_data["gbis_eligible"] == True) & (
analysis_data["eco4_eligible"] == False
)
]
wf_identified = analysis_data[
(analysis_data["row_code"] == "potential ECO4")
]
# END NEW
warmfront_identification = analysis_data["row_code"].value_counts()
warmfront_identified = analysis_data[analysis_data["row_code"] == "potential ECO4"]
warmfront_identified["walls"].value_counts(normalize=True)
analysis_data["Construction Year Band"].value_counts(normalize=True)
# Number of days from today
days_to_today = (datetime.now() - pd.to_datetime(warmfront_identified["date_epc"])).dt.days
days_to_today.mean()
property_types = analysis_data["Property Type"].value_counts()
n_identified = (results_df["gbis_eligible"] | results_df["eco4_eligible"]).sum()
eco_identified = results_df[results_df["eco4_eligible"]]
n_eco4 = eco_identified["eco4_eligible"].sum()
gbis_identified = results_df[~results_df["eco4_eligible"] & results_df["gbis_eligible"]]
n_gbis = results_df[~results_df["eco4_eligible"]]["gbis_eligible"].sum()
eco_eligibile = results_df[results_df["eco4_eligible"]]
eco_eligibile["eligibility_classification"].value_counts()
future_possibilities_eco = results_df[
(results_df["eco4_eligible_future"] == True) & (~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
].copy()
future_possibilities_gbis = results_df[
(results_df["gbis_eligible_future"] == True) & (results_df["eco4_eligible_future"] == False) & (
~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
].copy()
total_future_possibilities = future_possibilities_eco.shape[0] + future_possibilities_gbis.shape[0]
def app():
data = load_data()
data["row_id"] = ["ha7" + str(i) for i in range(0, len(data))]
cleaned = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-dev"
)
cleaned = msgpack.unpackb(cleaned, raw=False)
cleaning_data = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
)
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
created_at = datetime.now().isoformat()
results_df, scoring_data, nodata = get_ha7_data(
data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds
)
# Pickle results
# import pickle
# with open("ha7_results_jan_10.pkl", "wb") as f:
# pickle.dump({"results_df": results_df, "scoring_data": scoring_data, "nodata": nodata}, f)
# Read in the old data
# import pickle
# with open("ha7_results_jan_10.pkl", "rb") as f:
# old_data = pickle.load(f)
# results_df = old_data["results_df"]
# scoring_data = old_data["scoring_data"]
# nodata = old_data["nodata"]

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,11 @@
pandas
pydantic==1.10.11
epc-api-python==1.0.2
msgpack
tqdm
python-dotenv
boto3
textblob
pyarrow==12.0.1
fuzzywuzzy
python-Levenshtein

View file

@ -5,6 +5,10 @@ from BaseUtility import Definitions
from etl.epc.settings import (
DATA_PROCESSOR_SETTINGS,
EARLIEST_EPC_DATE,
IGNORED_TRANSACTION_TYPES,
IGNORED_FLOOR_LEVELS,
IGNORED_PROPERTY_TYPES,
IGNORED_TENURES,
FULLY_GLAZED_DESCRIPTIONS,
AVERAGE_FIXED_FEATURES,
BUILT_FORM_REMAP,
@ -24,8 +28,14 @@ from recommendations.rdsap_tables import FLOOR_LEVEL_MAP
from typing import List
# TODO: change the setting columns to lower
STARTING_SUFFIX_COMPONENT_COLS = [x.lower() for x in STARTING_SUFFIX_COMPONENT_COLS]
NO_SUFFIX_COMPONENT_COLS = [x.lower() for x in NO_SUFFIX_COMPONENT_COLS]
ENDING_SUFFIX_COMPONENT_COLS = [x.lower() for x in ENDING_SUFFIX_COMPONENT_COLS]
POTENTIAL_COLUMNS = [x.lower() for x in POTENTIAL_COLUMNS]
# These lookups are used to clean the construction age band
bounds_map = {
construction_age_bounds_map = {
"England and Wales: before 1900": {"l": 0, "u": 1899},
"England and Wales: 1930-1949": {"l": 1930, "u": 1949},
"England and Wales: 1900-1929": {"l": 1900, "u": 1929},
@ -40,13 +50,13 @@ bounds_map = {
"England and Wales: 2012 onwards": {"l": 2012, "u": 3000},
}
remap = {
construction_age_remap = {
"England and Wales: 2007 onwards": "England and Wales: 2007-2011"
}
expanded_map = {
i: [
label for label, bounds in bounds_map.items() if (i <= bounds["u"]) and (i >= bounds['l'])
label for label, bounds in construction_age_bounds_map.items() if (i <= bounds["u"]) and (i >= bounds['l'])
][0] for i in range(0, 3001)
}
@ -59,26 +69,205 @@ def is_int(x):
return False
class DataProcessor:
class EPCDataProcessor:
"""
Handle data loading and data preprocessing
"""
def __init__(self, filepath: Path | None, newdata: bool = False) -> None:
def __init__(self, data: pd.DataFrame | None = None, cleaning_averages: pd.DataFrame | None = None,
run_mode: str = "training", violation_mode: bool = False) -> None:
"""
:param filepath: If specified, is the physical location of the data
:param newdata: Indicates if we are processing new, testing data.
:param is_newdata: Indicates if we are processing new, testing data.
In this instance, there are some operations we do not
want to perform, such as confine_data()
"""
self.filepath = filepath
self.data = None
self.newdata = newdata
is_data_a_dataframe = isinstance(data, pd.DataFrame)
self.data: pd.DataFrame = data if is_data_a_dataframe else pd.DataFrame()
def load_data(self, low_memory=False) -> None:
if not self.filepath:
is_cleaning_averages_a_dataframe = isinstance(cleaning_averages, pd.DataFrame)
self.cleaning_averages: pd.DataFrame = cleaning_averages if is_cleaning_averages_a_dataframe else pd.DataFrame()
# FOR NOW IF VIOLATION MODE IS ON, WE USE RUN MODE AS NEWDATA
self.violation_mode = violation_mode
if run_mode not in ["training", "newdata"]:
raise ValueError("Run mode must be either training or newdata")
self.run_mode = run_mode if not violation_mode else "newdata"
def prepare_data(self, filepath: Path | str | None = None) -> None:
"""
Given the run mode, we apply the relevant pipeline steps
Ignore step is used to highlight which steps are not needed in newdata
"""
ignore_step = True if self.run_mode == "newdata" else False
if filepath is not None:
self.load_data(filepath=filepath, low_memory=DATA_PROCESSOR_SETTINGS["low_memory"])
if len(self.data) == 0:
raise Exception("No data to process - check filepath/ data being passed in")
self.confine_data(ignore_step=ignore_step)
self.remap_anomalies()
self.remap_floor_level(ignore_step=ignore_step)
self.remap_build_form()
self.cast_data_column_values_to_lower()
self.standardise_construction_age_band(ignore_step=ignore_step)
self.clean_missing_rooms(ignore_step=ignore_step)
self.recast_df_columns(
column_mappings=DATA_PROCESSOR_SETTINGS["column_mappings"]
)
self.clean_multi_glaze_proportion(ignore_step=ignore_step)
self.clean_photo_supply()
self.retain_multiple_epc_properties(
epc_minimum_count=DATA_PROCESSOR_SETTINGS["epc_minimum_count"], ignore_step=ignore_step
)
self.fill_na_fields()
self.sort_data_by_uprn_lodgement_date(ignore_step=ignore_step)
# Final re-casting after data transformed and prepared
self.recast_df_columns(column_mappings=COLUMNTYPES, auto_subset_columns=True)
self.recast_all_data(column_mappings=COLUMNTYPES, auto_subset_columns=True)
self.na_remapping(auto_subset_columns=True)
self.fill_invalid_constituency_fields(ignore_step=ignore_step)
self.make_cleaning_averages(ignore_step=ignore_step)
self.add_local_authority_to_cleaning_average(ignore_step=ignore_step)
# TODO: check if this has impact on training dataset
# cleaned_data = self.apply_averages_cleaning(
# data_to_clean=self.data,
# cleaning_data=self.cleaning_averages,
# cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
# colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
# )
# When running in newdata mode, cleaning_averages has lower cases so we co-erce back to upper
cleaning_averages = self.cleaning_averages.copy()
if self.run_mode == "newdata":
cleaning_averages.columns = cleaning_averages.columns.str.upper()
cleaned_data = self.apply_averages_cleaning(
data_to_clean=self.data,
cleaning_data=cleaning_averages,
cols_to_merge_on=COLUMNS_TO_MERGE_ON,
)
self.data = self.data if cleaned_data is None else cleaned_data
self.cast_cleaning_averages_columns_to_lower(ignore_step=ignore_step)
self.cast_data_columns_to_lower()
def cast_data_columns_to_lower(self):
"""
Convert all columns names to lower
"""
self.data.columns = self.data.columns.str.lower()
def cast_cleaning_averages_columns_to_lower(self, ignore_step: bool = False):
"""
Convert all column names to lower
No need in newdata mode
"""
if ignore_step:
return
self.cleaning_averages.columns = self.cleaning_averages.columns.str.lower()
def add_local_authority_to_cleaning_average(self, ignore_step: bool = False):
"""
Add the Local authority column to the cleaning averages
No need in newdata mode
"""
if ignore_step:
return
self.cleaning_averages["LOCAL_AUTHORITY"] = self.data["LOCAL_AUTHORITY"].values[0]
def fill_invalid_constituency_fields(self, ignore_step: bool = False):
"""
For some weird cases, where data has missing constituency, we add a dummy value
"""
if self.violation_mode:
# TODO: to fill in
return
if ignore_step:
return
self.data = self.data.fillna({"CONSTITUENCY": self.data["CONSTITUENCY"].mode().values[0]})
def sort_data_by_uprn_lodgement_date(self, ignore_step: bool = False):
"""
Order data by uprn and lodgement data
No Violation mode needed
"""
if ignore_step:
return
self.data = self.data.sort_values(["UPRN", "LODGEMENT_DATE"], ascending=True)
def cast_data_column_values_to_lower(self):
"""
For given columns, cast values to lower
No Violation mode or newdata modes required
"""
convert_to_lower = ["TRANSACTION_TYPE"]
for col in convert_to_lower:
self.data[col] = self.data[col].str.lower()
def remap_build_form(self):
"""
Remap build form to standard values
No Violation mode or newdata modes required
"""
self.data["BUILT_FORM"] = self.data["BUILT_FORM"].replace(BUILT_FORM_REMAP)
def remap_anomalies(self):
"""
Remap anomalies to None
No Violation mode or newdata modes required
"""
# Map all anomaly values to None
data_anomaly_map = dict(
zip(
Definitions.DATA_ANOMALY_MATCHES,
[None] * len(Definitions.DATA_ANOMALY_MATCHES),
)
)
# Use replace function to map data (if exists in key), to corresponding value - i.e. Remove invalid values
data = self.data.replace(data_anomaly_map)
data = data.replace(np.NAN, None)
self.data = data
def remap_floor_level(self, ignore_step: bool = False):
"""
Remap floor level to standard values
"""
if self.violation_mode:
# TODO: We need to handle this case
return
if ignore_step:
return
self.data["FLOOR_LEVEL"] = self.data["FLOOR_LEVEL"].replace(FLOOR_LEVEL_MAP)
def load_data(self, filepath, low_memory=False) -> None:
if not filepath:
raise ValueError("No filepath specified")
self.data = pd.read_csv(self.filepath, low_memory=low_memory)
self.data = pd.read_csv(filepath, low_memory=low_memory)
def insert_data(self, data: pd.DataFrame) -> None:
self.data = data
@ -90,11 +279,11 @@ class DataProcessor:
return x
# Next, we check if it's a value in our map
if bounds_map.get(x):
if construction_age_bounds_map.get(x):
return x
# We check if it's a standard remap value
remap_value = remap.get(x, None)
remap_value = construction_age_remap.get(x, None)
if remap_value:
return remap_value
@ -105,12 +294,19 @@ class DataProcessor:
raise NotImplementedError("Not handled the case for value %s" % x)
def standardise_construction_age_band(self):
def standardise_construction_age_band(self, ignore_step: bool = False):
"""
This function will tidy up some of the non-standard values that are populated in the construction age
band, which is useful for cleaning
"""
if self.violation_mode:
# TODO: to fill in
return
if ignore_step:
return
self.data["CONSTRUCTION_AGE_BAND"] = self.data["CONSTRUCTION_AGE_BAND"].apply(
lambda x: self.clean_construction_age_band(x)
)
@ -119,7 +315,7 @@ class DataProcessor:
~pd.isnull(self.data["CONSTRUCTION_AGE_BAND"])
]
def clean_missing_rooms(self):
def clean_missing_rooms(self, ignore_step: bool = False):
"""
For the number of heated rooms and number of habitable rooms, we clean these values up front,
based on property archetype and age
@ -127,6 +323,14 @@ class DataProcessor:
TODO: We could use a model based impution approach for possibly more accurate cleaning
"""
if self.violation_mode:
# TODO: to fill in
return
if ignore_step:
return
# TODO: DO we want to move this out of this function? (i.e. alter the data before we do any cleaning)
self.data["POSTAL_AREA"] = self.data["POSTCODE"].apply(lambda x: x.split(" ")[0])
def apply_clean(data, matching_columns):
@ -164,59 +368,78 @@ class DataProcessor:
break
to_index -= 1
def pre_process(self) -> pd.DataFrame:
"""
Load data and begin initial cleaning
"""
if self.data is None:
self.load_data(low_memory=DATA_PROCESSOR_SETTINGS["low_memory"])
# def pre_process(self, filepath: Path | None = None) -> tuple[pd.DataFrame, pd.DataFrame]:
# """
# Load data and begin initial cleaning
# """
# if self.data is None:
# self.load_data(filepath=filepath, low_memory=DATA_PROCESSOR_SETTINGS["low_memory"])
if not self.newdata:
self.confine_data()
# if not self.is_newdata:
# self.confine_data()
self.remap_columns()
# self.remap_columns()
# We have some non-standard construction age bands which we'll clean for matching
if not self.newdata:
self.standardise_construction_age_band()
self.clean_missing_rooms()
# # We have some non-standard construction age bands which we'll clean for matching
# if not self.is_newdata:
# self.standardise_construction_age_band()
# self.clean_missing_rooms()
self.recast_df_columns(
column_mappings=DATA_PROCESSOR_SETTINGS["column_mappings"]
)
# self.recast_df_columns(
# column_mappings=DATA_PROCESSOR_SETTINGS["column_mappings"]
# )
if not self.newdata:
self.clean_multi_glaze_proportion()
# if not self.is_newdata:
# self.clean_multi_glaze_proportion()
self.clean_photo_supply()
# self.clean_photo_supply()
if not self.newdata:
self.retain_multiple_epc_properties(
epc_minimum_count=DATA_PROCESSOR_SETTINGS["epc_minimum_count"]
)
# if not self.is_newdata:
# self.retain_multiple_epc_properties(
# epc_minimum_count=DATA_PROCESSOR_SETTINGS["epc_minimum_count"]
# )
if DATA_PROCESSOR_SETTINGS["epc_minimum_count"] >= 1:
# If we have multiple EPC records, we can try and do filling
self.fill_na_fields()
# if DATA_PROCESSOR_SETTINGS["epc_minimum_count"] >= 1:
# # If we have multiple EPC records, we can try and do filling
# self.fill_na_fields()
if not self.newdata:
self.data = self.data.sort_values(["UPRN", "LODGEMENT_DATE"], ascending=True)
# if not self.is_newdata:
# self.data = self.data.sort_values(["UPRN", "LODGEMENT_DATE"], ascending=True)
# Final re-casting after data transformed and prepared
coltypes = {k: v for k, v in COLUMNTYPES.items() if k in self.data.columns} if self.newdata else COLUMNTYPES
for k, v in coltypes.items():
self.data[k] = self.data[k].astype(v)
self.data = self.data.astype(coltypes)
# # Final re-casting after data transformed and prepared
# coltypes = {k: v for k, v in COLUMNTYPES.items() if k in self.data.columns} if self.is_newdata else
# COLUMNTYPES
# for k, v in coltypes.items():
# self.data[k] = self.data[k].astype(v)
# self.data = self.data.astype(coltypes)
self.na_remapping()
# self.na_remapping()
return self.data
# self.cleaning_averages = None
# if not self.is_newdata:
# # We have some odd cases with missing constituency so we fill
# self.data = self.data.fillna({"CONSTITUENCY": self.data["CONSTITUENCY"].mode().values[0]})
def na_remapping(self):
# self.cleaning_averages = self.make_cleaning_averages()
# # We apply averages cleaning to the data
# self.data = self.apply_averages_cleaning(
# data_to_clean=self.data,
# cleaning_data=self.cleaning_averages,
# cols_to_merge_on=COLUMNS_TO_MERGE_ON
# )
# self.cleaning_averages["LOCAL_AUTHORITY"] = self.data["LOCAL_AUTHORITY"].values[0]
# self.cleaning_averages.columns = self.cleaning_averages.columns.str.lower()
# self.data.columns = self.data.columns.str.lower()
# return self.data, self.cleaning_averages
def na_remapping(self, auto_subset_columns: bool = False):
fill_na_map_apply = {
k: v for k, v in fill_na_map.items() if k in self.data.columns
} if self.newdata else fill_na_map
} if auto_subset_columns else fill_na_map
for column, fill_value in fill_na_map_apply.items():
self.data[column] = self.data[column].fillna(fill_value)
@ -243,35 +466,15 @@ class DataProcessor:
["FLOOR_HEIGHT", "TOTAL_FLOOR_AREA"]
].replace("", None)
def remap_columns(self):
def make_cleaning_averages(self, ignore_step: bool = False) -> pd.DataFrame:
"""
Remap all columns, for any non values
Create a dataset to hold averages based on property type, built form, construction age, and rooms.
Not require in newdata mode
"""
# Map all anomaly values to None
data_anomaly_map = dict(
zip(
Definitions.DATA_ANOMALY_MATCHES,
[None] * len(Definitions.DATA_ANOMALY_MATCHES),
)
)
if ignore_step:
return pd.DataFrame()
# Use replace function to map data (if exists in key), to corresponding value - i.e. Remove invalid values
data = self.data.replace(data_anomaly_map)
data = data.replace(np.NAN, None)
# Remap certain columns
if not self.newdata:
data["FLOOR_LEVEL"] = data["FLOOR_LEVEL"].replace(FLOOR_LEVEL_MAP)
data["BUILT_FORM"] = data["BUILT_FORM"].replace(BUILT_FORM_REMAP)
convert_to_lower = ["TRANSACTION_TYPE"]
for col in convert_to_lower:
data[col] = data[col].str.lower()
self.data = data
def make_cleaning_averages(self) -> pd.DataFrame:
# Define a custom function to calculate the median, excluding missing values
def median_without_missing(group):
return group[AVERAGE_FIXED_FEATURES].median(skipna=True)
@ -368,13 +571,20 @@ class DataProcessor:
# "FLOOR_HEIGHT"
# ].fillna(FLOOR_HEIGHT_NATIONAL_AVERAGE)
return cleaning_averages_filled
self.cleaning_averages = cleaning_averages_filled
def retain_multiple_epc_properties(self, epc_minimum_count: int = 1) -> None:
def retain_multiple_epc_properties(self, epc_minimum_count: int = 1, ignore_step: bool = False) -> None:
"""
Reduce the data futher by keeping only datasets with multiple epcs
"""
if self.violation_mode:
# TODO: to fill in
return
if ignore_step:
return
counts = self.data.groupby("UPRN").size().reset_index()
counts.columns = ["UPRN", "count"]
@ -382,22 +592,81 @@ class DataProcessor:
counts = counts[counts["count"] > epc_minimum_count]
self.data = pd.merge(self.data, counts, on="UPRN")
def recast_df_columns(self, column_mappings: dict) -> None:
def recast_df_columns(self, column_mappings: dict, auto_subset_columns: bool = False) -> None:
"""
Recast columns from the dataframe to ensure the behaviour we want
"""
if auto_subset_columns:
column_mappings = {k: v for k, v in column_mappings.items() if k in self.data.columns}
for key, values in column_mappings.items():
if key not in self.data.columns:
raise ValueError("Column mapping incorrectly specified")
for value in values:
self.data[key] = self.data[key].astype(value)
if isinstance(values, list):
for value in values:
self.data[key] = self.data[key].astype(value)
else:
self.data[key] = self.data[key].astype(values)
def confine_data(self) -> None:
def recast_all_data(self, column_mappings: dict, auto_subset_columns: bool = False) -> None:
"""
Using a dictionary to recast all columns at once
"""
if auto_subset_columns:
column_mappings = {k: v for k, v in column_mappings.items() if k in self.data.columns}
self.data = self.data.astype(column_mappings)
def confine_data(self, ignore_step: bool = False):
"""
Include all step to reduce down the data based on assumptions
"""
if self.violation_mode:
violation_uprn_missing = pd.isnull(self.data["UPRN"])
violation_old_lodgment_date = self.data["LODGEMENT_DATE"] < EARLIEST_EPC_DATE
violation_invalid_transaction_type = self.data["TRANSACTION_TYPE"] == IGNORED_TRANSACTION_TYPES
violation_ignored_floor_level = self.data["FLOOR_LEVEL"].isin(IGNORED_FLOOR_LEVELS)
violation_rdsap_score_above_max = self.data[RDSAP_RESPONSE] > MAX_SAP_SCORE
violation_missing_windows_description = pd.isnull(self.data["WINDOWS_DESCRIPTION"])
violation_missing_hotwater_description = pd.isnull(self.data["HOTWATER_DESCRIPTION"])
violation_missing_roof_description = pd.isnull(self.data["ROOF_DESCRIPTION"])
violation_invalid_property_type = self.data["PROPERTY_TYPE"] == IGNORED_PROPERTY_TYPES
violation_invalid_tenure = self.data["TENURE"].isin(IGNORED_TENURES)
violation_df = pd.concat(
[
violation_uprn_missing,
violation_old_lodgment_date,
violation_invalid_transaction_type,
violation_ignored_floor_level,
violation_rdsap_score_above_max,
violation_missing_windows_description,
violation_missing_hotwater_description,
violation_missing_roof_description,
violation_invalid_property_type,
violation_invalid_tenure,
], axis=1,
keys=[
"violation_uprn_missing",
"violation_old_lodgment_date",
"violation_invalid_transaction_type",
"violation_ignored_floor_level",
"violation_rdsap_score_above_max",
"violation_missing_windows_description",
"violation_missing_hotwater_description",
"violation_missing_roof_description",
"violation_invalid_property_type",
"violation_invalid_tenure"
]
)
self.data = pd.concat([self.data, violation_df], axis=1)
if ignore_step:
return
# Filter 1: UPRN is a unique identifier for a property, so we remove any EPCs that don't have one
# Filter 2: Lodgement date is the date the EPC was lodged, so we remove any EPCs that were lodged
@ -416,9 +685,9 @@ class DataProcessor:
self.data = self.data[~pd.isnull(self.data["UPRN"])]
self.data = self.data[self.data["LODGEMENT_DATE"] >= EARLIEST_EPC_DATE]
self.data = self.data[self.data["TRANSACTION_TYPE"] != "new dwelling"]
self.data = self.data[self.data["TRANSACTION_TYPE"] != IGNORED_TRANSACTION_TYPES]
self.data = self.data[
~self.data["FLOOR_LEVEL"].isin(["top floor", "mid floor"])
~self.data["FLOOR_LEVEL"].isin(IGNORED_FLOOR_LEVELS)
]
self.data = self.data[self.data[RDSAP_RESPONSE] <= MAX_SAP_SCORE]
@ -430,16 +699,30 @@ class DataProcessor:
# Because park homes are surveyed unusually (for example, we don't have u-values to
# look up for their different components, they need to be collected in survey and aren't reflected in
# EPCs) we'll ignore them from the model
self.data = self.data[self.data["PROPERTY_TYPE"] != "Park home"]
self.data = self.data[self.data["PROPERTY_TYPE"] != IGNORED_PROPERTY_TYPES]
def clean_multi_glaze_proportion(self) -> None:
# We remove EPCs where the tenure is unknown, but is usually an indicator of a new build
self.data = self.data[~self.data["TENURE"].isin(IGNORED_TENURES)]
# We remap zero values to None
self.data.loc[self.data['FLOOR_HEIGHT'] == 0, 'FLOOR_HEIGHT'] = None
def clean_multi_glaze_proportion(self, ignore_step: bool = False) -> None:
"""
If there is no multi-glaze proportion but the windows are fully glazed, then we should assume a score of 100
"""
if self.violation_mode:
# TODO:
return
if ignore_step:
return
no_multi_glaze_proportion_index = pd.isnull(
self.data["MULTI_GLAZE_PROPORTION"]
) & (self.data["WINDOWS_DESCRIPTION"].isin(FULLY_GLAZED_DESCRIPTIONS))
self.data.loc[no_multi_glaze_proportion_index, "MULTI_GLAZE_PROPORTION"] = 100
def clean_photo_supply(self) -> None:
@ -450,7 +733,9 @@ class DataProcessor:
self.data["PHOTO_SUPPLY"] = self.data["PHOTO_SUPPLY"].fillna(0)
@staticmethod
def apply_averages_cleaning(data_to_clean, cleaning_data, cols_to_merge_on, colnames=None):
def apply_averages_cleaning(
data_to_clean, cleaning_data, cols_to_merge_on, colnames=None, ignore_step: bool = False
):
"""
Clean the input DataFrame using averages from a cleaning DataFrame.
@ -462,6 +747,9 @@ class DataProcessor:
:return: Cleaned DataFrame.
"""
if ignore_step:
return None
# The desired colnames to clean - which may not be present
if colnames is None:
colnames = ["TOTAL_FLOOR_AREA", "FLOOR_HEIGHT", "FIXED_LIGHTING_OUTLETS_COUNT"]
@ -492,12 +780,16 @@ class DataProcessor:
how='left'
)
global_averages = cleaning_data[cols_to_clean].mean()
# Fill NaN values with averages
for col in cols_to_clean:
data_to_clean[col].fillna(data_to_clean[f"{col}_AVERAGE"], inplace=True)
data_to_clean.drop(columns=[f"{col}_AVERAGE"], inplace=True)
# If we still have missings
data_to_clean[col].fillna(data_to_clean[col].mean(), inplace=True)
# Final step if we still have missings - use global mean
data_to_clean[col].fillna(global_averages[col], inplace=True)
return data_to_clean
@ -510,8 +802,8 @@ class DataProcessor:
:return: Pandas dataframe containing the subset of columns defined in COMPONENT_FEATURES
"""
if suffix not in ["_STARTING", "_ENDING"]:
raise Exception("Suffix should be one of _STARTING or _ENDING")
if suffix not in ["_starting", "_ending"]:
raise Exception("Suffix should be one of _starting or _ending")
if suffix == "_STARTING":
starting_cols = self.data[STARTING_SUFFIX_COMPONENT_COLS + EFFICIENCY_FEATURES].copy().add_suffix(suffix)
@ -573,6 +865,7 @@ class DataProcessor:
for col in missings.index:
unique_values = df[col].unique()
# TODO: confirm this behaviour
if True in unique_values or False in unique_values:
df[col] = df[col].fillna(False)
if "none" in unique_values:

836
etl/epc/Dataset.py Normal file
View file

@ -0,0 +1,836 @@
import numpy as np
import pandas as pd
from typing import List
from etl.epc.Record import EPCDifferenceRecord
from etl.epc.ValidationConfiguration import DatasetValidationConfiguration
from etl.epc.settings import EARLIEST_EPC_DATE
from recommendations.rdsap_tables import england_wales_age_band_lookup
from recommendations.recommendation_utils import (
estimate_number_of_floors,
get_wall_u_value,
get_roof_u_value,
get_floor_u_value,
estimate_perimeter,
get_wall_type,
)
# TODO: Can probably produce this in the property change app and store in S3
BOOLEAN_VARIABLES = [
"is_cavity_wall",
"is_filled_cavity",
"is_solid_brick",
"is_system_built",
"is_timber_frame",
"is_granite_or_whinstone",
"is_as_built",
"is_cob",
"is_sandstone_or_limestone",
"is_park_home",
"external_insulation",
"internal_insulation",
"is_park_home_ending",
"external_insulation_ending",
"internal_insulation_ending",
"is_to_unheated_space",
"is_to_external_air",
"is_suspended",
"is_solid",
"another_property_below",
"is_pitched",
"is_roof_room",
"is_loft",
"is_flat",
"is_thatched",
"is_at_rafters",
"has_dwelling_above",
"has_radiators",
"has_fan_coil_units",
"has_pipes_in_screed_above_insulation",
"has_pipes_in_insulated_timber_floor",
"has_pipes_in_concrete_slab",
"has_boiler",
"has_air_source_heat_pump",
"has_room_heaters",
"has_electric_storage_heaters",
"has_warm_air",
"has_electric_underfloor_heating",
"has_electric_ceiling_heating",
"has_community_scheme",
"has_ground_source_heat_pump",
"has_no_system_present",
"has_portable_electric_heaters",
"has_water_source_heat_pump",
"has_electric_heat_pump",
"has_micro-cogeneration",
"has_solar_assisted_heat_pump",
"has_exhaust_source_heat_pump",
"has_community_heat_pump",
"has_electric",
"has_mains_gas",
"has_wood_logs",
"has_coal",
"has_oil",
"has_wood_pellets",
"has_anthracite",
"has_dual_fuel_mineral_and_wood",
"has_smokeless_fuel",
"has_lpg",
"has_b30k",
"has_electricaire",
"has_assumed_for_most_rooms",
"has_underfloor_heating",
"has_radiators_ending",
"has_fan_coil_units_ending",
"has_pipes_in_screed_above_insulation_ending",
"has_pipes_in_insulated_timber_floor_ending",
"has_pipes_in_concrete_slab_ending",
"has_boiler_ending",
"has_air_source_heat_pump_ending",
"has_room_heaters_ending",
"has_electric_storage_heaters_ending",
"has_warm_air_ending",
"has_electric_underfloor_heating_ending",
"has_electric_ceiling_heating_ending",
"has_community_scheme_ending",
"has_ground_source_heat_pump_ending",
"has_no_system_present_ending",
"has_portable_electric_heaters_ending",
"has_water_source_heat_pump_ending",
"has_electric_heat_pump_ending",
"has_micro-cogeneration_ending",
"has_solar_assisted_heat_pump_ending",
"has_exhaust_source_heat_pump_ending",
"has_community_heat_pump_ending",
"has_electric_ending",
"has_mains_gas_ending",
"has_wood_logs_ending",
"has_coal_ending",
"has_oil_ending",
"has_wood_pellets_ending",
"has_anthracite_ending",
"has_dual_fuel_mineral_and_wood_ending",
"has_smokeless_fuel_ending",
"has_lpg_ending",
"has_b30k_ending",
"has_electricaire_ending",
"has_assumed_for_most_rooms_ending",
"has_underfloor_heating_ending",
"multiple_room_thermostats",
"multiple_room_thermostats_ending",
"is_community",
"no_individual_heating_or_community_network",
"is_community_ending",
"no_individual_heating_or_community_network_ending",
]
class BaseDataset:
"""
Base class for all datasets
"""
def __init__(self) -> None:
self.pipeline_steps = {}
def validate_dataset(self):
"""
Validate the dataset against the validation configuration
"""
self.dataset_validation: dict = DatasetValidationConfiguration
# def pipeline_factory(self, pipeline_type: str) -> dict:
# """
# Factory method for creating a pipeline
# """
# if pipeline_type not in self.pipeline_steps:
# raise ValueError(f"Pipeline type {pipeline_type} not found")
# return self.pipeline_steps[pipeline_type]
class TrainingDataset(BaseDataset):
"""
A collection of EPCDifferenceRecords can be combined into a TrainingDataset.
"""
def __init__(
self, datasets: List[EPCDifferenceRecord], cleaned_lookup: dict
) -> None:
# self.pipeline_steps = self.pipeline_factory("training")
self.datasets = datasets
self.df = pd.DataFrame([dataset.difference_record for dataset in datasets])
self._feature_generation()
self._drop_features()
self._clean_efficiency_variables()
self._null_validation(information="Clean Efficiency Variables")
self._expand_description_to_features(cleaned_lookup)
self._adjust_assumed_values_in_wall_descriptions()
self._generate_u_values_from_features()
# TODO: For some of the features that we clean, we have either a true, false or possibly null value
# Those nulls should be False. clean_missings_after_description_process handles this but shouldn't
# need to
self._clean_missing_values()
self._null_validation(information="Clean Missing Values")
self._remove_abnormal_change_in_floor_area()
self._ensure_numeric()
self._organise_starting_ending_columns()
def _organise_starting_ending_columns(self):
"""
Organise the starting and ending columns so that they are next to each other
"""
no_suffix_cols = [
col
for col in self.df.columns
if "_ending" not in col and "_starting" not in col
]
starting_cols = [col for col in self.df.columns if "_starting" in col]
ending_cols = [col for col in self.df.columns if "_ending" in col]
common_cols = [
col.rsplit("_", 1)[0]
for col in starting_cols
if col.replace("_starting", "_ending") in ending_cols
]
only_ending_cols = [
col
for col in ending_cols
if col.replace("_ending", "_starting") not in starting_cols
]
common_cols = [[col + "_starting", col + "_ending"] for col in common_cols]
self.df = self.df.loc[
:,
no_suffix_cols
+ only_ending_cols
+ [col for cols in common_cols for col in cols],
]
def _remove_abnormal_change_in_floor_area(self):
"""
Remove properties where the change in floor area is greater than 100%
"""
self.df["tfa_diff_abs"] = abs(
self.df["total_floor_area_ending"] - self.df["total_floor_area_starting"]
)
self.df["tfa_diff_prop"] = (
self.df["tfa_diff_abs"] / self.df["total_floor_area_starting"]
)
self.df = self.df[self.df["tfa_diff_prop"] < 0.5]
self.df = self.df.drop(columns=["tfa_diff_abs", "tfa_diff_prop"])
def _ensure_numeric(self):
"""
Ensure that all columns are numeric
"""
# TODO: move into EPCRecord record
uvalue_columns = [
col for col in self.df.columns if "thermal_transmittance" in col
]
for uvalue_col in uvalue_columns:
self.df[uvalue_col] = pd.to_numeric(self.df[uvalue_col])
@staticmethod
def _lambda_function_to_generate_roof_uvalue(row, is_end=False):
"""
Using the apply method, use the get_roof_u_value method to generate the u-value
"""
col_name = (
"roof_insulation_thickness"
if not is_end
else "roof_insulation_thickness_ending"
)
if row["has_dwelling_above"]:
if row["roof_thermal_transmittance"] != 0:
raise ValueError("Should have 0 u-value for roof")
if row["roof_thermal_transmittance_ending"] != 0:
raise ValueError("Should have 0 u-value for roof")
return get_roof_u_value(
insulation_thickness=row[col_name],
has_dwelling_above=row["has_dwelling_above"],
is_loft=row["is_loft"],
is_roof_room=row["is_roof_room"],
is_thatched=row["is_thatched"],
is_flat=row["is_flat"],
is_pitched=row["is_pitched"],
is_at_rafters=row["is_at_rafters"],
age_band=england_wales_age_band_lookup[row["construction_age_band"]],
)
@staticmethod
def _lambda_function_to_generate_wall_uvalue(row, is_end=False):
"""
Using the apply method, use the get_wall_u_value method to generate the u-value
"""
description_col_name = (
"walls_clean_description"
if not is_end
else "walls_clean_description_ending"
)
thermal_transistance_col_name = (
"walls_thermal_transmittance"
if not is_end
else "walls_thermal_transmittance_ending"
)
if pd.isnull(row[thermal_transistance_col_name]):
output = get_wall_u_value(
clean_description=row[description_col_name],
age_band=england_wales_age_band_lookup[row["construction_age_band"]],
is_granite_or_whinstone=row["is_granite_or_whinstone"],
is_sandstone_or_limestone=row["is_sandstone_or_limestone"],
)
else:
output = row[thermal_transistance_col_name]
return output
@staticmethod
def _lambda_function_to_generate_floor_uvalue(row, is_end=False):
"""
Using the apply method, use the get_floor_u_value method to generate the u-value
"""
floor_thermal_col_name = (
"floor_thermal_transmittance"
if not is_end
else "floor_thermal_transmittance_ending"
)
if row["another_property_below"]:
if row["floor_thermal_transmittance"] != 0:
raise ValueError("Should have 0 u-value for floor")
if row["floor_thermal_transmittance_ending"] != 0:
raise ValueError("Should have 0 u-value for floor")
return 0
else:
uvalue = row[floor_thermal_col_name]
if pd.isnull(uvalue):
insulation_col_name = (
"floor_insulation_thickness"
if not is_end
else "floor_insulation_thickness_ending"
)
perimeter_col_name = (
"estimated_perimeter_starting"
if not is_end
else "estimated_perimeter_ending"
)
floor_area_col_name = (
"ground_floor_area_starting"
if not is_end
else "ground_floor_area_ending"
)
uvalue = get_floor_u_value(
floor_type=row["floor_type"],
perimeter=row[perimeter_col_name],
area=row[floor_area_col_name],
insulation_thickness=row[insulation_col_name],
wall_type=row["wall_type"],
age_band=england_wales_age_band_lookup[row["construction_age_band"]],
)
return uvalue
def _generate_u_values_from_features(self):
"""
Generate u-values from the features
"""
# ~~~~~~~~~~~~~~~~~~
# Walls
# ~~~~~~~~~~~~~~~~~~
walls_starting_uvalue = self.df.apply(
lambda row: self._lambda_function_to_generate_wall_uvalue(row), axis=1
)
walls_ending_uvalue = self.df.apply(
lambda row: self._lambda_function_to_generate_wall_uvalue(row, is_end=True),
axis=1,
)
walls_starting_uvalue = self.df["walls_thermal_transmittance"].fillna(
walls_starting_uvalue
)
walls_starting_equals_ending_flag = (
self.df["walls_clean_description"]
== self.df["walls_clean_description_ending"]
)
walls_ending_uvalue[walls_starting_equals_ending_flag] = walls_starting_uvalue[
walls_starting_equals_ending_flag
]
# ~~~~~~~~~~~~~~~~~~
# Roof
# ~~~~~~~~~~~~~~~~~~
roof_starting_uvalue = self.df.apply(
lambda row: self._lambda_function_to_generate_roof_uvalue(row), axis=1
)
roof_ending_uvalue = self.df.apply(
lambda row: self._lambda_function_to_generate_roof_uvalue(row, is_end=True),
axis=1,
)
roof_starting_uvalue = self.df["roof_thermal_transmittance"].fillna(
roof_starting_uvalue
)
roof_ending_uvalue = self.df["roof_thermal_transmittance_ending"].fillna(
roof_ending_uvalue
)
# ~~~~~~~~~~~~~~~~~~
# Floor
# ~~~~~~~~~~~~~~~~~~
self.df["estimated_number_of_floors"] = self.df.apply(
lambda row: estimate_number_of_floors(row["property_type"]), axis=1
)
self.df["ground_floor_area_starting"] = (
self.df["total_floor_area_starting"] / self.df["estimated_number_of_floors"]
)
self.df["ground_floor_area_ending"] = (
self.df["total_floor_area_ending"] / self.df["estimated_number_of_floors"]
)
self.df["estimated_perimeter_starting"] = self.df.apply(
lambda row: estimate_perimeter(
row["ground_floor_area_starting"],
row["number_habitable_rooms_starting"]
/ row["estimated_number_of_floors"],
),
axis=1,
)
self.df["estimated_perimeter_ending"] = self.df.apply(
lambda row: estimate_perimeter(
row["ground_floor_area_starting"],
row["number_habitable_rooms_ending"]
/ row["estimated_number_of_floors"],
),
axis=1,
)
self.df["floor_type"] = self.df["is_suspended"].replace(
{True: "suspended", False: "solid"}
)
self.df["wall_type"] = self.df.apply(
lambda row: get_wall_type(
is_cavity_wall=row["is_cavity_wall"],
is_solid_brick=row["is_solid_brick"],
is_timber_frame=row["is_timber_frame"],
is_granite_or_whinstone=row["is_granite_or_whinstone"],
is_cob=row["is_cob"],
is_sandstone_or_limestone=row["is_sandstone_or_limestone"],
is_system_built=row["is_system_built"],
is_park_home=row["is_park_home"],
),
axis=1,
)
floor_starting_uvalue = self.df.apply(
lambda row: self._lambda_function_to_generate_floor_uvalue(row), axis=1
)
floor_ending_uvalue = self.df.apply(
lambda row: self._lambda_function_to_generate_floor_uvalue(
row, is_end=True
),
axis=1,
)
floor_starting_uvalue = self.df["floor_thermal_transmittance"].fillna(
floor_starting_uvalue
)
floor_ending_uvalue = self.df["floor_thermal_transmittance_ending"].fillna(
floor_ending_uvalue
)
for component in ["walls", "roof", "floor"]:
self.df[f"{component}_thermal_transmittance"] = self.df[
f"{component}_thermal_transmittance"
].fillna(eval(f"{component}_starting_uvalue"))
self.df[f"{component}_thermal_transmittance_ending"] = self.df[
f"{component}_thermal_transmittance_ending"
].fillna(eval(f"{component}_ending_uvalue"))
self.df = self.df.drop(
columns=[
"floor_type",
"wall_type",
"walls_clean_description",
"walls_clean_description_ending",
"estimated_number_of_floors",
"ground_floor_area_starting",
"ground_floor_area_ending",
]
)
def _adjust_assumed_values_in_wall_descriptions(self):
"""
Strip out assumed values for all wall descriptions
"""
for col in ["walls_clean_description", "walls_clean_description_ending"]:
self.df[col] = (
self.df[col].str.replace("(assumed)", "", regex=False).str.rstrip()
)
def _drop_inconsistent_properties(self, expanded_df: pd.DataFrame, component: str):
"""
Drop properties that have inconsistent data, i.e. changing material types
"""
if component == "walls":
expanded_df = expanded_df[
(expanded_df["is_cavity_wall"] == expanded_df["is_cavity_wall_ending"])
& (
expanded_df["is_solid_brick"]
== expanded_df["is_solid_brick_ending"]
)
& (
expanded_df["is_timber_frame"]
== expanded_df["is_timber_frame_ending"]
)
& (
expanded_df["is_granite_or_whinstone"]
== expanded_df["is_granite_or_whinstone_ending"]
)
& (expanded_df["is_cob"] == expanded_df["is_cob_ending"])
& (
expanded_df["is_sandstone_or_limestone"]
== expanded_df["is_sandstone_or_limestone_ending"]
)
]
elif component == "floor":
expanded_df = expanded_df[
(expanded_df["is_suspended"] == expanded_df["is_suspended_ending"])
& (expanded_df["is_solid"] == expanded_df["is_solid_ending"])
& (
expanded_df["another_property_below"]
== expanded_df["another_property_below_ending"]
)
& (
expanded_df["is_to_unheated_space"]
== expanded_df["is_to_unheated_space_ending"]
)
& (
expanded_df["is_to_external_air"]
== expanded_df["is_to_external_air_ending"]
)
]
elif component == "roof":
expanded_df = expanded_df[
(expanded_df["is_pitched"] == expanded_df["is_pitched_ending"])
& (expanded_df["is_roof_room"] == expanded_df["is_roof_room_ending"])
& (expanded_df["is_loft"] == expanded_df["is_loft_ending"])
& (expanded_df["is_flat"] == expanded_df["is_flat_ending"])
& (expanded_df["is_thatched"] == expanded_df["is_thatched_ending"])
& (expanded_df["is_at_rafters"] == expanded_df["is_at_rafters_ending"])
& (
expanded_df["has_dwelling_above"]
== expanded_df["has_dwelling_above_ending"]
)
]
return expanded_df
def _expand_description_to_features(self, cleaned_lookup: dict):
"""
This method will merge on the cleaned lookup table and ensure that the building fabric in the
starting and ending EPC is consistent, so ensure that we are performing our modelling on the cleanest
possible dataset.
# We look for key building fabric features that have changed from one EPC to the next.
# if, for example, we see that a home has gone from being a cavity wall to a solid wall, we
# remove this record, as it indicates that the quality of the EPC conducted in the first instance
# is low
# We also replace descriptions with their cleaned variants
"""
cols_to_drop = {
"walls": [
# We need to cleaned descriptions for pulling out u-values
"original_description",
"thermal_transmittance_unit",
"original_description_ending",
"thermal_transmittance_unit_ending",
"is_cavity_wall_ending",
"is_solid_brick_ending",
"is_system_built_ending",
"is_timber_frame_ending",
"is_granite_or_whinstone_ending",
"is_as_built_ending",
"is_cob_ending",
"is_assumed_ending",
"is_sandstone_or_limestone_ending",
# Re remove the is_assumed columns
"is_assumed",
"is_assumed_ending",
],
"floor": [
"original_description",
"clean_description",
"thermal_transmittance_unit",
"no_data",
"no_data_ending",
"original_description_ending",
"clean_description_ending",
"thermal_transmittance_unit_ending",
"is_suspended_ending",
"is_solid_ending",
"another_property_below_ending",
"is_to_unheated_space_ending",
"is_to_external_air_ending",
"is_assumed",
"is_assumed_ending",
],
"roof": [
"original_description",
"clean_description",
"thermal_transmittance_unit",
"is_assumed",
"is_valid",
"original_description_ending",
"clean_description_ending",
"thermal_transmittance_unit_ending",
"is_pitched_ending",
"is_roof_room_ending",
"is_loft_ending",
"is_flat_ending",
"is_thatched_ending",
"has_dwelling_above_ending",
"is_assumed_ending",
"is_valid_ending",
],
"hotwater": [
"original_description",
"clean_description",
"assumed",
"original_description_ending",
"clean_description_ending",
"assumed_ending",
],
"mainheat": [
"original_description",
"clean_description",
"original_description_ending",
"has_assumed",
"original_description_ending",
"clean_description_ending",
"has_assumed_ending",
],
"mainheatcont": [
"original_description",
"clean_description",
"original_description_ending",
"clean_description_ending",
],
"windows": [
"original_description",
"clean_description",
"original_description_ending",
"clean_description_ending",
# We don't need many of the glazing coverage features because we have the multi_glaze_proportion feature
"has_glazing",
"glazing_coverage",
"no_data",
"has_glazing_ending",
"glazing_coverage_ending",
"no_data_ending",
],
"main-fuel": [
"original_description",
"clean_description",
"original_description_ending",
"clean_description_ending",
],
}
components_to_expand = cols_to_drop.keys()
for component in components_to_expand:
# TODO: change cleaned dataframe to have underscores instead of dashes
if component == "main-fuel":
cleaned_key = "main-fuel"
left_on_starting = "main_fuel_starting"
left_on_ending = "main_fuel_ending"
original_cols = ["main_fuel_starting", "main_fuel_ending"]
else:
cleaned_key = f"{component}-description"
left_on_starting = f"{component}_description_starting"
left_on_ending = f"{component}_description_ending"
original_cols = [
f"{component}_description_starting",
f"{component}_description_ending",
]
cleaned_lookup_df_for_key = pd.DataFrame(cleaned_lookup[cleaned_key])
expanded_df = self.df.merge(
cleaned_lookup_df_for_key,
how="left",
left_on=left_on_starting,
right_on="original_description",
).merge(
cleaned_lookup_df_for_key,
how="left",
left_on=left_on_ending,
right_on="original_description",
suffixes=("", "_ending"),
)
# Drop properties where key material types have changed
expanded_df = self._drop_inconsistent_properties(expanded_df, component)
# Drop original cols and cols to drop
expanded_df = expanded_df.drop(
columns=cols_to_drop[component] + original_cols
)
# Rename columns to component specific names, if they have not been dropped
expanded_df = expanded_df.rename(
columns={
"insulation_thickness": f"{component}_insulation_thickness",
"insulation_thickness_ending": f"{component}_insulation_thickness_ending",
"thermal_transmittance": f"{component}_thermal_transmittance",
"thermal_transmittance_ending": f"{component}_thermal_transmittance_ending",
"tariff_type": f"{component}_tariff_type",
"tariff_type_ending": f"{component}_tariff_type_ending",
"clean_description": f"{component}_clean_description",
"clean_description_ending": f"{component}_clean_description_ending",
}
)
self.df = expanded_df
# We don't need any lighting specific cleaning, we just drop the original description as we use
# LOW_ENERGY_LIGHTING_STARTING, LOW_ENERGY_LIGHTING_ENDING
self.df = self.df.drop(
columns=["lighting_description_starting", "lighting_description_ending"]
)
def _clean_missing_values(self, ignore_cols=None):
missings = pd.isnull(self.df).sum()
missings = missings[missings > 0]
if ignore_cols:
missings = missings[~missings.index.isin(ignore_cols)]
for col in missings.index:
unique_values = self.df[col].unique()
if (
(True in unique_values)
or (False in unique_values)
or (col in BOOLEAN_VARIABLES)
):
self.df[col] = self.df[col].fillna(False)
if "none" in unique_values:
self.df[col] = self.df[col].fillna("none")
else:
self.df[col] = self.df[col].fillna("Unknown")
def _null_validation(self, information: str):
print(f"Null validation after {information}")
if pd.isnull(self.df).sum().sum():
raise ValueError(f"Null values found in dataset, after step {information}")
def _drop_features(self):
"""
Drop features that are not needed for modelling
"""
self.df = self.df.drop(
columns=["lodgement_date_starting", "lodgement_date_ending"]
)
def _feature_generation(self):
"""
Generate features for modelling
"""
self.df["days_to_starting"] = self._calculate_days_to(
self.df["lodgement_date_starting"]
)
self.df["days_to_ending"] = self._calculate_days_to(
self.df["lodgement_date_ending"]
)
def _clean_efficiency_variables(self):
"""
These is scope to clean this by the model per corresponding description.
E.g. for WALLS_ENG_EFF we could look at the mode efficiency rating by description and
fill in the missing values with this.
When looking at this initially, there are a large volume of records with missing energy efficiency
values and therefore a simpler approach was taken just to test including these variables
:param df:
:return:
"""
missings = pd.isnull(self.df).sum()
missings = missings[missings >= 1]
if len(missings) == 0:
return
# Make sure they are all efficiency columns
if any(~missings.index.str.contains("energy_eff")):
raise ValueError("Non efficiency columns are missing")
for m in missings.index:
self.df[m] = self.df[m].fillna("NO_RATING")
@staticmethod
def _calculate_days_to(lodgement_date):
if isinstance(lodgement_date, str):
return (
pd.to_datetime(lodgement_date) - pd.to_datetime(EARLIEST_EPC_DATE)
).days
return (
pd.to_datetime(lodgement_date) - pd.to_datetime(EARLIEST_EPC_DATE)
).dt.days
# def __add__(self, other) -> "TrainingDataset":
# if not isinstance(other, TrainingDataset):
# raise TypeError("Addition can only be performed with another instance of TrainingDataset")
# return TrainingDataset(self.datasets + other.datasets)
# def __radd__(self, other):
# """
# Required for sum() to work
# """
# if isinstance(other, int):
# return self
# else:
# return self.__add__(other)
class NewDataset(BaseDataset):
"""
A collection of EPCDifferenceRecords can be combined into a ScoringDataset.
"""
def __init__(self, datasets: List[EPCDifferenceRecord]) -> None:
# self.pipeline_steps = self.pipeline_factory("newdata")
self.datasets = datasets
def __add__(self, other) -> "NewDataset":
if not isinstance(other, NewDataset):
raise TypeError(
"Addition can only be performed with another instance of ScoringDataset"
)
return NewDataset(self.datasets + other.datasets)
def __radd__(self, other):
"""
Required for sum() to work
"""
if isinstance(other, int):
return self
else:
return self.__add__(other)

410
etl/epc/Pipeline.py Normal file
View file

@ -0,0 +1,410 @@
import msgpack
import pandas as pd
from datetime import datetime
from typing import List
from pathlib import Path
from tqdm import tqdm
import multiprocessing as mp
from etl.epc.DataProcessor import EPCDataProcessor
from etl.epc.Record import EPCRecord, EPCDifferenceRecord
from etl.epc.Dataset import TrainingDataset
from utils.s3 import save_dataframe_to_s3_parquet, read_from_s3
from etl.epc.settings import (
MANDATORY_FIXED_FEATURES,
LATEST_FIELD,
COMPONENT_FEATURES,
RDSAP_RESPONSE,
HEAT_DEMAND_RESPONSE,
CARBON_RESPONSE,
CORE_COMPONENT_FEATURES,
EFFICIENCY_FEATURES,
POTENTIAL_COLUMNS,
ROOM_FEATURES,
)
# TODO: change in setting file
MANDATORY_FIXED_FEATURES = [x.lower() for x in MANDATORY_FIXED_FEATURES]
# LATEST_FIELD = [x.lower() for x in LATEST_FIELD if x.lower() not in ROOM_FEATURES]
LATEST_FIELD = [x.lower() for x in LATEST_FIELD]
COMPONENT_FEATURES = [x.lower() for x in COMPONENT_FEATURES]
RDSAP_RESPONSE = RDSAP_RESPONSE.lower()
HEAT_DEMAND_RESPONSE = HEAT_DEMAND_RESPONSE.lower()
CARBON_RESPONSE = CARBON_RESPONSE.lower()
CORE_COMPONENT_FEATURES = [x.lower() for x in CORE_COMPONENT_FEATURES]
EFFICIENCY_FEATURES = [x.lower() for x in EFFICIENCY_FEATURES]
POTENTIAL_COLUMNS = [x.lower() for x in POTENTIAL_COLUMNS]
VARIABLE_DATA_FEATURES = (
COMPONENT_FEATURES
+ ROOM_FEATURES
+ EFFICIENCY_FEATURES
+ POTENTIAL_COLUMNS
+ ["lodgement_date", RDSAP_RESPONSE, HEAT_DEMAND_RESPONSE, CARBON_RESPONSE]
)
def get_cleaned_description_mapping():
"""
This function will retrieve the cleaned dataset from s3 which has the cleaned
descriptions for the epc dataset
This data is stored in MessagePack format and therefore needs to be decoded
:return:
"""
cleaned = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson", bucket_name="retrofit-data-dev"
)
cleaned = msgpack.unpackb(cleaned, raw=False)
return cleaned
clean_lookup = get_cleaned_description_mapping()
class EPCPipeline:
"""
This class will take a list of directories and process them to create a dataset:
- Load the data
- Pre-process the data
- Create a dataset
- Clean the dataset
- Store the dataset
"""
def __init__(
self,
epc_data_processor: EPCDataProcessor,
api_epc_records: dict = None,
directories: List[Path] | None = None,
run_mode="training",
epc_local_file="certificates.csv",
epc_bucket_name="retrofit-data-dev",
epc_cleaning_dataset_key="sap_change_model/{}/cleaning_dataset_rooms.parquet",
epc_all_equal_rows_key="sap_change_model/{}/all_equal_rows_rooms.parquet",
epc_compiled_dataset_key="sap_change_model/{}/dataset_rooms.parquet",
use_parallel=False,
):
"""
:param directories: List of directories to process
:param epc_data_processor: EPCDataProcessor object
:param run_mode: Either training or newdata
:param epc_local_file: Local file name of the EPC data
:param epc_bucket_name: S3 bucket name
:param epc_cleaning_dataset_key: S3 key for the cleaning dataset
:param epc_all_equal_rows_key: S3 key for the all equal rows dataset
:param epc_compiled_dataset_key: S3 key for the compiled dataset
"""
self.compiled_dataset: pd.DataFrame = pd.DataFrame()
self.compiled_all_equal_rows: list = []
self.compiled_cleaning_averages: list = []
self.directories = directories
self.epc_data_processor = epc_data_processor
self.api_epc_records = api_epc_records
self.run_mode = run_mode
self.epc_local_file = epc_local_file
self.epc_bucket_name = epc_bucket_name
self.use_parallel = use_parallel
self.timeprefix = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
self.epc_cleaning_dataset_key = epc_cleaning_dataset_key.format(self.timeprefix)
self.epc_all_equal_rows_key = epc_all_equal_rows_key.format(self.timeprefix)
self.epc_compiled_dataset_key = epc_compiled_dataset_key.format(self.timeprefix)
def run(self):
"""
Entrypoint to run the pipeline
"""
if self.run_mode == "training":
self.run_training_dataset_pipeline()
elif self.run_mode == "newdata":
self.run_newdata_dataset_pipeline()
else:
raise ValueError("Run mode defined needs to be in 'training' or 'newdata'")
def run_newdata_dataset_pipeline(self):
"""
Main function to run the newdata pipeline
"""
prepared_epc = EPCRecord(
self.api_epc_records, run_mode="newdata"
) # This uses all the epc records to clean the data
self.epc_data_processor.insert_data(prepared_epc)
self.epc_data_processor.prepare_data()
data = self.epc_data_processor.data
epc_records = [
EPCRecord(**x, run_mode="newdata") for x in data.to_dict(orient="records")
]
def run_training_dataset_pipeline(self):
"""
Main function to run the training dataset generation pipeline
"""
if self.directories is None:
raise ValueError(
"Directories not specified - Unable to run Training pipeline"
)
if self.use_parallel:
self.run_training_dataset_parallel_pipeline()
else:
for directory in tqdm(self.directories):
self.process_directory(directory)
save_dataframe_to_s3_parquet(
df=self.compiled_dataset,
bucket_name=self.epc_bucket_name,
file_key=self.epc_compiled_dataset_key,
)
save_dataframe_to_s3_parquet(
df=pd.DataFrame(self.compiled_all_equal_rows),
bucket_name=self.epc_bucket_name,
file_key=self.epc_all_equal_rows_key,
)
save_dataframe_to_s3_parquet(
df=pd.concat(self.compiled_cleaning_averages),
bucket_name=self.epc_bucket_name,
file_key=self.epc_cleaning_dataset_key,
)
def run_training_dataset_parallel_pipeline(self):
"""
Run the training pipeline in parallel
"""
with mp.Pool() as pool:
results = list(
tqdm(
pool.imap(self.process_directory_task, self.directories),
total=len(self.directories),
),
)
for result in tqdm(results):
self.compiled_dataset = pd.concat(
[self.compiled_dataset, result["dataset"]]
)
self.compiled_cleaning_averages.append(result["cleaning_averages"])
self.compiled_all_equal_rows.extend(result["all_equal_rows"])
def process_directory_task(self, directory: str) -> pd.DataFrame:
"""
Task to enable parallel processing
"""
self.process_directory(directory=directory)
output = {
"dataset": self.compiled_dataset,
"cleaning_averages": self.epc_data_processor.cleaning_averages,
"all_equal_rows": self.compiled_all_equal_rows,
}
return output
def process_directory(self, directory: Path):
"""
Process a single directory
:param directory:
:return:
"""
filepath = directory / self.epc_local_file
self.epc_data_processor.prepare_data(filepath=filepath)
constituency_data = self.epc_data_processor.data
self.compiled_cleaning_averages.append(
self.epc_data_processor.cleaning_averages
)
constituency_difference_records = []
for uprn, property_data in constituency_data.groupby("uprn", observed=True):
difference_records = self.process_uprn(
uprn=str(uprn), property_data=property_data, directory=directory
)
if difference_records is not None:
constituency_difference_records.extend(difference_records)
constituency_dataset = TrainingDataset(
datasets=constituency_difference_records, cleaned_lookup=clean_lookup
)
self.compiled_dataset = pd.concat(
[self.compiled_dataset, constituency_dataset.df]
)
def process_uprn(self, uprn: str, property_data: pd.DataFrame, directory: Path):
"""
Process a single UPRN, which may have multiple different EPCs
:param uprn: UPRN
:param property_data: pd.DataFrame, Data for a single UPRN
:param directory: Path, Directory of the UPRN
:return:
"""
# If a property has changed building type, we can ignore the epc rating i.e. this should be 1 unique row
if any(property_data[MANDATORY_FIXED_FEATURES].nunique() > 1) or (
pd.isnull(property_data[MANDATORY_FIXED_FEATURES]).sum().sum() > 0
):
return None
# Fixed features - these are property attributes that shouldn't change over time
# Take the latest row for both the LATEST_FEILDS and MANDATORY FIELDS and combine all fields together
fixed_data = (
property_data[MANDATORY_FIXED_FEATURES + LATEST_FIELD].iloc[-1].to_dict()
)
# We include the lodgement date here as we probably need to factor time into the
# model, since EPC standards and rigour have changed over time
variable_data = property_data[VARIABLE_DATA_FEATURES]
uprn = str(uprn)
epc_records = [
EPCRecord(uprn, **x, run_mode="training")
for x in variable_data.to_dict(orient="records")
]
# TODO: We want to be able to provide value for the u values in the main pipeline so this will need to be part of the EPCRecord
# We can use multiple types of comparison datasets - i.e. Compare consecutive records, or compare all permutations of records
property_difference_records = self._generate_property_difference_records(
epc_records, uprn, directory, fixed_data
)
return property_difference_records
def _generate_property_difference_records(
self, epc_records: List[EPCRecord], uprn: str, directory: Path, fixed_data: dict
):
"""
We can use multiple types of comparison datasets, for example:
- First vs second
- Second vs third
- First vs third
:param epc_records:
:return:
"""
property_difference_records: list = []
# property_difference_records = self._compare_consecutive_epcs(epc_records, uprn, directory, fixed_data, property_difference_records)
property_difference_records = self._compare_all_permutation_epcs(
epc_records, uprn, directory, fixed_data, property_difference_records
)
return property_difference_records
def _compare_all_permutation_epcs(
self,
epc_records: List[EPCRecord],
uprn: str,
directory: Path,
fixed_data: dict,
property_difference_records: list,
):
"""
Compare all permutations of EPCs for a given UPRN
:param epc_records:
:return:
"""
for idx in range(0, len(epc_records) - 1):
for idx2 in range(idx + 1, len(epc_records)):
earliest_record: EPCRecord = epc_records[idx]
latest_record: EPCRecord = epc_records[idx2]
# Auto sort the records so that the record with highest RDSAP score is always record1
difference_record: EPCDifferenceRecord = (
latest_record.create_EPCDifferenceRecord(
other=earliest_record, fixed_data=fixed_data
)
)
# difference_record: EPCDifferenceRecord = latest_record - earliest_record
# # TODO: Use method above instead of overloading operator
# difference_record.append_fixed_data(fixed_data)
# TODO: Pull out RDSAP_CHANGE to a variable
if difference_record.get("rdsap_change") == 0:
if not difference_record.ensure_adequate_data():
# Rdsap hasn't changed but we have enough data to use this record
# i.e. all fields aside from mechnical ventilation are the same]
# self.check_records.append({"uprn": uprn, "directory_name": directory.name, "difference_record": difference_record, "earliest_record": earliest_record, "latest_record": latest_record})
continue
all_equal = difference_record.compare_fields_in_records(
fields=[x.lower() for x in CORE_COMPONENT_FEATURES]
)
if all_equal:
# Keep track of this for the moment so we can analyse
self.compiled_all_equal_rows.append(
{"uprn": uprn, "directory_name": directory.name}
)
continue
property_difference_records.append(difference_record)
return property_difference_records
def _compare_consecutive_epcs(
self,
epc_records: List[EPCRecord],
uprn: str,
directory: Path,
fixed_data: dict,
property_difference_records: list,
):
"""
Compare consecutive EPCs for a given UPRN
:param epc_records:
:return:
"""
for idx in range(0, len(epc_records) - 1):
if idx >= len(epc_records) - 1:
break
earliest_record: EPCRecord = epc_records[idx]
latest_record: EPCRecord = epc_records[idx + 1]
# Auto sort the records so that the record with highest RDSAP score is always record1
difference_record: EPCDifferenceRecord = latest_record - earliest_record
# TODO: Use method above instead of overloading operator
difference_record.append_fixed_data(fixed_data)
# TODO: Pull out RDSAP_CHANGE to a variable
if difference_record.get("rdsap_change") == 0:
if not difference_record.ensure_adequate_data():
# Rdsap hasn't changed but we have enough data to use this record
# i.e. all fields aside from mechnical ventilation are the same]
# self.check_records.append({"uprn": uprn, "directory_name": directory.name, "difference_record": difference_record, "earliest_record": earliest_record, "latest_record": latest_record})
continue
all_equal = difference_record.compare_fields_in_records(
fields=[x.lower() for x in CORE_COMPONENT_FEATURES]
)
if all_equal:
# Keep track of this for the moment so we can analyse
self.compiled_all_equal_rows.append(
{"uprn": uprn, "directory_name": directory.name}
)
continue
# difference_record.append_fixed_data(fixed_data)
property_difference_records.append(difference_record)
return property_difference_records

1167
etl/epc/Record.py Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,61 @@
"""
Specify the validation rules for each field in the differents record.
"""
def validate_walls_description(value):
if value not in ["Cavity", "Solid", "System built", "Timber frame", "Suspended timber", "Other"]:
raise ValueError("Walls description is not valid")
EPCRecordValidationConfiguration = {
"WALLS_DESCRIPTION": {
"type": "string",
"acceptable_values": ["Cavity", "Solid", "System built", "Timber frame", "Suspended timber", "Other"],
"function": validate_walls_description
},
"FLOOR_DESCRIPTION": {
"type": "string",
"acceptable_values": ["Solid", "Suspended", "Other"]
},
"ENERGY_CONSUMPTION_CURRENT": {
"type": "float",
"range": [0, 100]
}
}
EPCDifferenceRecordValidationConfiguration = {
}
EPCDifferenceRecordFixedDataValidationConfiguration = {
"PROPERTY_TYPE": {
"type": "string",
"acceptable_values": ["House", "Flat", "Bungalow", "Maisonette", "Park home", "Other"]
},
"BUILT_FORM": {
"type": "string",
"acceptable_values": ["Detached", "Semi-Detached", "End-Terrace", "Mid-Terrace", "Enclosed Mid-Terrace", "Enclosed End-Terrace", "Enclosed Detached", "Not applicable"]
},
"CONSITUENCY": {
"type": "string",
"acceptable_values": ["England", "Wales", "Scotland", "Northern Ireland"]
},
"NUMBER_HABITABLE_ROOMS": {
"type": "integer",
"range": [0, 100]
},
"NUMBER_HEATED_ROOMS": {
"type": "integer",
"range": [0, 100]
},
"FIXED_LIGHTING_OUTLETS_COUNT": {
"type": "integer",
"range": [0, 100]
},
"CONSTRUCTION_AGE_BAND": {
"type": "string",
"acceptable_values": []
}
}
DatasetValidationConfiguration = {
}

View file

@ -0,0 +1,289 @@
from datetime import datetime
import itertools
import pandas as pd
from etl.epc.Record import EPCRecord
from backend.SearchEpc import SearchEpc
from sqlalchemy.orm import sessionmaker
from backend.app.config import get_settings
from backend.app.db.connection import db_engine
from backend.app.db.functions.materials_functions import get_materials
from backend.app.plan.utils import get_cleaned
from backend.Property import Property
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
from recommendations.Recommendations import Recommendations
from utils.logger import setup_logger
from utils.s3 import read_dataframe_from_s3_parquet, save_dataframe_to_s3_parquet
from datetime import datetime
now = datetime.now().strftime("%d-%m-%Y-%H-%M-%S")
logger = setup_logger()
logger.info("Connecting to db")
session = sessionmaker(bind=db_engine)()
created_at = datetime.now().isoformat()
session.begin()
logger.info("Getting the inputs")
cleaning_data = read_dataframe_from_s3_parquet(
bucket_name=get_settings().DATA_BUCKET,
file_key="sap_change_model/cleaning_dataset.parquet",
)
materials = get_materials(session)
cleaned = get_cleaned()
uprn_filenames = read_dataframe_from_s3_parquet(
bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
)
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(
bucket=get_settings().DATA_BUCKET
)
scenario_properties = [
{
"address": "2 South Terrace",
"postcode": "NN1 5JY",
"lmk-key": "1459796789102016070507274146560098",
"measures": [
[
["internal_wall_insulation"],
"11",
{"walls_insulation_thickness_ending": "average"},
[0],
],
[
["external_wall_insulation"],
"10",
{"walls_insulation_thickness_ending": "average"},
[0],
],
[["solar", "windows"], "15", {"photo_supply_ending": 50}, [0, 1]],
],
},
{
"address": "8 Lindlings",
"postcode": "HP1 2HA",
"lmk-key": "c14029235739827d5f627dc8aa9bb567d026b267e851e0db0001db24638667b1",
"measures": [
[
["cavity_wall_insulation", "loft_insulation"],
"15",
{"walls_insulation_thickness_ending": "average"},
[0, 1],
],
],
},
{
"address": "44 Lindlings",
"postcode": "HP1 2HE",
"lmk-key": "99296a6dda21314fef3a61cda59e441e9a2aacf115eb96f4a0fa85696bf7b117",
"measures": [
[
["cavity_wall_insulation", "loft_insulation"],
"15",
{"walls_insulation_thickness_ending": "average"},
[0, 1],
],
],
},
{
"address": "46 Chaulden Terrace",
"postcode": "HP1 2AN",
"lmk-key": "d1e0534be3a44c33003323b21d0e322e3daddc65b5ee71936f89c59ddab96b50",
"measures": [
[
["cavity_wall_insulation", "loft_insulation"],
"15",
{"walls_insulation_thickness_ending": "average"},
[0, 1],
],
],
},
{
"address": "73 Long Chaulden",
"postcode": "HP1 2HX",
"lmk-key": "1eae354db522a95188018d9cd0502ed8c609910b6c88f8797d3a25f59b11770a",
"measures": [
[
["cavity_wall_insulation", "loft_insulation"],
"15",
{"walls_insulation_thickness_ending": "average"},
[0, 1],
],
],
},
]
recommendations_scoring_data = []
for scenario_property in scenario_properties:
# We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
epc_searcher = SearchEpc(
address1=scenario_property["address"],
postcode=scenario_property["postcode"],
auth_token=get_settings().EPC_AUTH_TOKEN,
os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY,
)
epc_searcher.find_property()
# Find the epc with the same LMK key
all_epcs = epc_searcher.older_epcs.copy()
all_epcs.extend([epc_searcher.newest_epc, epc_searcher.full_sap_epc])
original_epc = [
epc
for epc in all_epcs
if epc.get("lmk-key", None) == scenario_property.get("lmk-key")
][0]
epc_records = {
"original_epc": original_epc,
"full_sap_epc": {},
"old_data": [],
}
prepared_epc = EPCRecord(
epc_records=epc_records, run_mode="newdata", cleaning_data=cleaning_data
)
p = Property(
id=prepared_epc.uprn,
address=epc_searcher.address_clean,
postcode=epc_searcher.postcode_clean,
epc_record=prepared_epc,
)
p.get_spatial_data(uprn_filenames)
p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
recommender = Recommendations(property_instance=p, materials=materials)
property_recommendations = recommender.recommend("0")
wall_recommendations = recommender.wall_recomender.recommendations
loft_recommendations = recommender.roof_recommender.recommendations
solar_recommendations = recommender.solar_recommender.recommendation
windows_recommendations = recommender.windows_recommender.recommendation
p.create_base_difference_epc_record(cleaned_lookup=cleaned)
scoring_list = []
# Create the record for each of the different measures
for measure_impact_override in scenario_property["measures"]:
measure = measure_impact_override[0]
impact = measure_impact_override[1]
override = measure_impact_override[2]
wall_recs = []
loft_recs = []
solar_recs = []
windows_recs = []
if "internal_wall_insulation" in measure:
for rec in wall_recommendations:
if rec["type"] == "internal_wall_insulation":
wall_recs.append(rec)
if "external_wall_insulation" in measure:
for rec in wall_recommendations:
if rec["type"] == "external_wall_insulation":
wall_recs.append(rec)
if "cavity_wall_insulation" in measure:
for rec in wall_recommendations:
if rec["type"] == "cavity_wall_insulation":
wall_recs.append(rec)
if "loft_insulation" in measure:
loft_recs = []
for rec in loft_recommendations:
if rec["type"] == "loft_insulation":
loft_recs.append(rec)
if "solar" in measure:
for rec in solar_recommendations:
if rec["type"] == "solar_pv":
solar_recs.append(rec)
if "windows" in measure:
for rec in windows_recommendations:
if rec["type"] == "windows_glazing":
windows_recs.append(rec)
combi_list = [wall_recs, loft_recs, solar_recs, windows_recs]
combi_list = [element for element in combi_list if len(element) != 0]
all_combi_recommendations = list(itertools.product(*combi_list))
for i, combi in enumerate(all_combi_recommendations):
recommendation_record = p.base_difference_record.df.to_dict("records")[
0
].copy()
recommendation_record = p.create_recommendation_scoring_data(
property_id=i,
primary_recommendation_id=i,
recommendation_record=recommendation_record,
recommendations=combi,
)
if override is not None:
for key, value in override.items():
recommendation_record[key] = value
recommendation_record["id"] = "&".join(measure) + "+" + str(i)
recommendation_record["impact"] = impact
scoring_list.append(recommendation_record)
recommendations_scoring_data.extend(scoring_list)
recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
recommendations_scoring_data["impact"] = recommendations_scoring_data["impact"].astype(
int
)
recommendations_scoring_data = recommendations_scoring_data.drop(
columns=[
"rdsap_change",
"heat_demand_change",
"carbon_change",
"sap_ending",
"heat_demand_ending",
"carbon_ending",
]
)
impact_col = recommendations_scoring_data.pop("impact")
recommendations_scoring_data.insert(0, "impact", impact_col)
id_col = recommendations_scoring_data.pop("id")
recommendations_scoring_data.insert(0, "id", id_col)
from backend.ml_models.api import ModelApi
model_api = ModelApi(portfolio_id="generate-scenarios-data", timestamp=created_at)
all_predictions = model_api.predict_all(
df=recommendations_scoring_data,
bucket=get_settings().DATA_BUCKET,
prediction_buckets={
"sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET,
"heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET,
"carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET,
},
)
save_dataframe_to_s3_parquet(
recommendations_scoring_data,
"retrofit-data-dev",
f"scenario_data/{now}/recommendations_scoring_data.parquet",
)

View file

@ -1,636 +1,39 @@
import pandas as pd
import numpy as np
from tqdm import tqdm
import msgpack
from pathlib import Path
from etl.epc.settings import (
MANDATORY_FIXED_FEATURES,
LATEST_FIELD,
COMPONENT_FEATURES,
RDSAP_RESPONSE,
HEAT_DEMAND_RESPONSE,
COLUMNS_TO_MERGE_ON,
CARBON_RESPONSE,
CORE_COMPONENT_FEATURES,
EFFICIENCY_FEATURES,
POTENTIAL_COLUMNS,
MINIMUM_FLOOR_HEIGHT
)
from etl.epc.DataProcessor import DataProcessor
from utils.s3 import save_dataframe_to_s3_parquet, read_from_s3
from recommendations.rdsap_tables import england_wales_age_band_lookup
from recommendations.recommendation_utils import (
get_wall_u_value, get_roof_u_value, get_floor_u_value, estimate_perimeter,
get_wall_type
)
from etl.epc.DataProcessor import EPCDataProcessor
from etl.epc.Pipeline import EPCPipeline
DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"
def get_cleaned():
def main():
"""
This function will retrieve the cleaned dataset from s3 which has the cleaned
descriptions for the epc dataset
This data is stored in MessagePack format and therefore needs to be decoded
:return:
Orchestration function
"""
cleaned = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-dev"
)
cleaned = msgpack.unpackb(cleaned, raw=False)
return cleaned
def process_and_prune_desriptions(df, cleaned_lookup):
"""
This method will merge on the cleaned lookup table and ensure that the building fabric in the
starting and ending EPC is consistent, so ensure that we are performing our modelling on the cleanest
possible dataset.
:param df:
:param cleaned_lookup:
:return:
"""
cols_to_drop = {
"walls": [
# We need to cleaned descriptions for pulling out u-values
'original_description', 'thermal_transmittance_unit',
'original_description_ENDING',
'thermal_transmittance_unit_ENDING',
'is_cavity_wall_ENDING', 'is_filled_cavity_ENDING',
'is_solid_brick_ENDING', 'is_system_built_ENDING',
'is_timber_frame_ENDING', 'is_granite_or_whinstone_ENDING',
'is_as_built_ENDING', 'is_cob_ENDING', 'is_assumed_ENDING',
'is_sandstone_or_limestone_ENDING',
# Re remove the is_assumed columns
"is_assumed", "is_assumed_ENDING"
],
"floor": [
"original_description", "clean_description", "thermal_transmittance_unit",
"no_data", "no_data_ENDING", "original_description_ENDING",
"clean_description_ENDING", "thermal_transmittance_unit_ENDING",
"is_suspended_ENDING", "is_solid_ENDING", "another_property_below_ENDING",
"is_to_unheated_space_ENDING", "is_to_external_air_ENDING", "is_assumed",
"is_assumed_ENDING"
],
"roof": [
"original_description", "clean_description", "thermal_transmittance_unit",
"is_assumed", "is_valid", "original_description_ENDING", "clean_description_ENDING",
"thermal_transmittance_unit_ENDING", "is_pitched_ENDING", "is_roof_room_ENDING",
"is_loft_ENDING", "is_flat_ENDING", "is_thatched_ENDING", "is_at_rafters_ENDING",
"has_dwelling_above_ENDING", "is_assumed_ENDING", "is_valid_ENDING"
],
"hotwater": [
"original_description", "clean_description", "assumed", "original_description_ENDING",
"clean_description_ENDING", "assumed_ENDING"
],
"mainheat": [
"original_description", "clean_description", "original_description_ENDING",
"has_assumed", "original_description_ENDING", "clean_description_ENDING",
"has_assumed_ENDING",
],
"mainheatcont": [
"original_description", "clean_description", "original_description_ENDING", "clean_description_ENDING"
],
"windows": [
"original_description", "clean_description", "original_description_ENDING", "clean_description_ENDING",
# We don't need many of the glazing coverage features because we have the multi_glaze_proportion feature
"has_glazing", "glazing_coverage", "no_data", "has_glazing_ENDING", "glazing_coverage_ENDING",
"no_data_ENDING"
],
"main-fuel": [
"original_description", "clean_description", "original_description_ENDING", "clean_description_ENDING"
],
}
for component in ["walls", "floor", "roof", "hotwater", "mainheat", "mainheatcont", "windows", "main-fuel"]:
component_upper = component.upper()
if component == "main-fuel":
component_upper = component_upper.replace("-", "_")
cleaned_key = "main-fuel" if component == "main-fuel" else f"{component}-description"
left_on_starting = (
f"{component_upper}_STARTING" if component == "main-fuel" else f"{component_upper}_DESCRIPTION_STARTING"
)
left_on_ending = (
f"{component_upper}_ENDING" if component == "main-fuel" else f"{component_upper}_DESCRIPTION_ENDING"
)
df = df.merge(
pd.DataFrame(cleaned_lookup[cleaned_key]),
how="left",
left_on=left_on_starting,
right_on="original_description",
).merge(
pd.DataFrame(cleaned_lookup[cleaned_key]),
how="left",
left_on=left_on_ending,
right_on="original_description",
suffixes=("", "_ENDING")
)
if component == "walls":
# We make sure the wall construction hasn't changed
df = df[
(df["is_cavity_wall"] == df["is_cavity_wall_ENDING"]) &
(df["is_solid_brick"] == df["is_solid_brick_ENDING"]) &
(df["is_timber_frame"] == df["is_timber_frame_ENDING"]) &
(df["is_granite_or_whinstone"] == df["is_granite_or_whinstone_ENDING"]) &
(df["is_cob"] == df["is_cob_ENDING"]) &
(df["is_sandstone_or_limestone"] == df["is_sandstone_or_limestone_ENDING"])
]
elif component == "floor":
df = df[
(df["is_suspended"] == df["is_suspended_ENDING"]) &
(df["is_solid"] == df["is_solid_ENDING"]) &
(df["another_property_below"] == df["another_property_below_ENDING"]) &
(df["is_to_unheated_space"] == df["is_to_unheated_space_ENDING"]) &
(df["is_to_external_air"] == df["is_to_external_air_ENDING"])
]
elif component == "roof":
df = df[
(df["is_pitched"] == df["is_pitched_ENDING"]) &
(df["is_roof_room"] == df["is_roof_room_ENDING"]) &
(df["is_loft"] == df["is_loft_ENDING"]) &
(df["is_flat"] == df["is_flat_ENDING"]) &
(df["is_thatched"] == df["is_thatched_ENDING"]) &
(df["is_at_rafters"] == df["is_at_rafters_ENDING"]) &
(df["has_dwelling_above"] == df["has_dwelling_above_ENDING"])
]
# Drop the binary indicators and replace the original description with the cleaned version
# Drop original cols
original_cols = [
f"{component_upper}_DESCRIPTION_STARTING", f"{component_upper}_DESCRIPTION_ENDING"
] if component != "main-fuel" else [
f"{component_upper}_STARTING", f"{component_upper}_ENDING"
]
df = df.drop(columns=cols_to_drop[component] + original_cols)
# If we have an insulation_thickness column, rename it
if "insulation_thickness" in cleaned_lookup[cleaned_key][0]:
df = df.rename(
columns={
"insulation_thickness": f"{component}_insulation_thickness",
"insulation_thickness_ENDING": f"{component}_insulation_thickness_ENDING",
}
)
# If we have thermal transmittance, rename it
if "thermal_transmittance" in cleaned_lookup[cleaned_key][0]:
df = df.rename(
columns={
"thermal_transmittance": f"{component}_thermal_transmittance",
"thermal_transmittance_ENDING": f"{component}_thermal_transmittance_ENDING",
}
)
# If we have tarrif, rename it
if "tariff_type" in cleaned_lookup[cleaned_key][0]:
df = df.rename(
columns={
"tariff_type": f"{component}_tariff_type",
"tariff_type_ENDING": f"{component}_tariff_type_ENDING",
}
)
# We need the walls descriptions so we rename them to distinguish them
if component == "walls":
df = df.rename(
columns={
"clean_description": f"{component}_clean_description",
"clean_description_ENDING": f"{component}_clean_description_ENDING",
}
)
# We don't need any lighting specific cleaning, we just drop the original description as we use
# LOW_ENERGY_LIGHTING_STARTING, LOW_ENERGY_LIGHTING_ENDING
df = df.drop(columns=["LIGHTING_DESCRIPTION_STARTING", "LIGHTING_DESCRIPTION_ENDING"])
return df
def make_uvalues(df):
df["row_index"] = df.index
uvalues = []
for _, x in df.iterrows():
uprn = x["UPRN"]
row_index = x["row_index"]
age_band = england_wales_age_band_lookup[x["CONSTRUCTION_AGE_BAND"]]
# ~~~~~~~~~~~~~~~~~~
# Walls
# ~~~~~~~~~~~~~~~~~~
starting_wall_uvalue = x["walls_thermal_transmittance"]
if pd.isnull(starting_wall_uvalue):
starting_wall_uvalue = get_wall_u_value(
clean_description=x["walls_clean_description"],
age_band=age_band,
is_granite_or_whinstone=x["is_granite_or_whinstone"],
is_sandstone_or_limestone=x["is_sandstone_or_limestone"],
)
ending_wall_uvalue = x["walls_thermal_transmittance_ENDING"]
if pd.isnull(ending_wall_uvalue):
if x["walls_clean_description"] != x["walls_clean_description_ENDING"]:
ending_wall_uvalue = get_wall_u_value(
clean_description=x["walls_clean_description_ENDING"],
age_band=age_band,
is_granite_or_whinstone=x["is_granite_or_whinstone"],
is_sandstone_or_limestone=x["is_sandstone_or_limestone"],
)
else:
ending_wall_uvalue = starting_wall_uvalue
# ~~~~~~~~~~~~~~~~~~
# Roof
# ~~~~~~~~~~~~~~~~~~
if x["has_dwelling_above"]:
if x["roof_thermal_transmittance"] != 0:
raise ValueError("Should have 0 u-value for roof")
if x["roof_thermal_transmittance_ENDING"] != 0:
raise ValueError("Should have 0 u-value for roof")
starting_roof_uvalue = x["roof_thermal_transmittance"]
if pd.isnull(starting_roof_uvalue):
starting_roof_uvalue = get_roof_u_value(
insulation_thickness=x["roof_insulation_thickness"],
has_dwelling_above=x["has_dwelling_above"],
is_loft=x["is_loft"],
is_roof_room=x["is_roof_room"],
is_thatched=x["is_thatched"],
is_flat=x["is_flat"],
is_pitched=x["is_pitched"],
is_at_rafters=x["is_at_rafters"],
age_band=age_band
)
ending_roof_uvalue = x["roof_thermal_transmittance_ENDING"]
if pd.isnull(ending_roof_uvalue):
ending_roof_uvalue = get_roof_u_value(
insulation_thickness=x["roof_insulation_thickness_ENDING"],
has_dwelling_above=x["has_dwelling_above"],
is_loft=x["is_loft"],
is_roof_room=x["is_roof_room"],
is_thatched=x["is_thatched"],
is_flat=x["is_flat"],
is_pitched=x["is_pitched"],
is_at_rafters=x["is_at_rafters"],
age_band=age_band
)
# ~~~~~~~~~~~~~~~~~~
# Floor
# ~~~~~~~~~~~~~~~~~~
perimeters = {}
for suffix in ["_STARTING", "_ENDING"]:
floor_area = x[f"TOTAL_FLOOR_AREA{suffix}"]
n_rooms = x["NUMBER_HABITABLE_ROOMS"]
perimeters[f"estimated_perimeter{suffix}"] = estimate_perimeter(floor_area, n_rooms)
floor_type = "suspended" if x["is_suspended"] else "solid"
wall_type = get_wall_type(**x)
if x["another_property_below"]:
if x["floor_thermal_transmittance"] != 0:
raise ValueError("Should have 0 u-value for floor")
if x["floor_thermal_transmittance_ENDING"] != 0:
raise ValueError("Should have 0 u-value for floor")
starting_floor_uvalue, ending_floor_uvalue = 0, 0
else:
starting_floor_uvalue = x["floor_thermal_transmittance"]
ending_floor_uvalue = x["floor_thermal_transmittance_ENDING"]
if pd.isnull(starting_floor_uvalue):
starting_floor_uvalue = get_floor_u_value(
floor_type=floor_type,
perimeter=perimeters["estimated_perimeter_STARTING"],
area=x[f"TOTAL_FLOOR_AREA_STARTING"],
insulation_thickness=x["floor_insulation_thickness"],
wall_type=wall_type,
age_band=age_band
)
if pd.isnull(ending_floor_uvalue):
ending_floor_uvalue = get_floor_u_value(
floor_type=floor_type,
perimeter=perimeters["estimated_perimeter_ENDING"],
area=x[f"TOTAL_FLOOR_AREA_ENDING"],
insulation_thickness=x["floor_insulation_thickness_ENDING"],
wall_type=wall_type,
age_band=age_band
)
uvalues.append(
{
"UPRN": uprn,
"row_index": row_index,
"starting_walls_uvalue": starting_wall_uvalue,
"ending_walls_uvalue": ending_wall_uvalue,
"starting_roof_uvalue": starting_roof_uvalue,
"ending_roof_uvalue": ending_roof_uvalue,
"starting_floor_uvalue": starting_floor_uvalue,
"ending_floor_uvalue": ending_floor_uvalue,
**perimeters
}
)
uvalues = pd.DataFrame(uvalues)
df = df.merge(
uvalues, how="left", on=["UPRN", "row_index"]
).drop(columns="row_index")
# Fill missings
for component in ["walls", "floor", "roof"]:
for suffix in ["", "_ENDING"]:
fill_col = f"starting_{component}_uvalue" if suffix == "" else f"ending_{component}_uvalue"
df[f"{component}_thermal_transmittance{suffix}"] = np.where(
pd.isnull(df[f"{component}_thermal_transmittance{suffix}"]),
df[fill_col],
df[f"{component}_thermal_transmittance{suffix}"]
)
df = df.drop(
columns=[
"starting_walls_uvalue", "ending_walls_uvalue", "starting_roof_uvalue",
"ending_roof_uvalue", "starting_floor_uvalue", "ending_floor_uvalue"
]
)
return df
def compare_records(earliest_record: pd.Series, latest_record: pd.Series, columns: list):
"""
For a list of columns, check if the earliest and latest record are the same
If they are the same, we indicate this, because we have example of SAP scores changing
without any feature changes
:param earliest_record: pd.Series
:param latest_record: pd.Series
:param columns: list of columns to compare
:return: boolean indicating whether or not all features are the same
"""
all_equal = True
for col in columns:
if earliest_record[col] != latest_record[col]:
return False
if all_equal:
return True
def app():
# Get all the files in the directory
# Data glossary:
# https://epc.opendatacommunities.org/docs/guidance#glossary
cleaned_lookup = get_cleaned()
# List all subdirectories
directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
# directories = directories[0:3]
dataset = []
cleaning_dataset = []
# Keep track of the all equals
all_equal_rows = []
for directory in tqdm(directories):
filepath = directory / "certificates.csv"
data_processor = DataProcessor(filepath=filepath)
df = data_processor.pre_process()
cleaning_averages = data_processor.make_cleaning_averages()
# We have some odd cases with missing constituency so we fill
df = df.fillna({"CONSTITUENCY": df["CONSTITUENCY"].mode().values[0]})
df = DataProcessor.apply_averages_cleaning(
data_to_clean=df,
cleaning_data=cleaning_averages,
cols_to_merge_on=COLUMNS_TO_MERGE_ON
)
data_by_urpn = []
for uprn, property_data in df.groupby("UPRN", observed=True):
# Fixed features - these are property attributes that shouldn't change over time
fixed_data = {}
# If a property has changed building type, we can ignore the epc rating i.e. this should be 1 unique row
if any(property_data[MANDATORY_FIXED_FEATURES].nunique() > 1) or (
pd.isnull(property_data[MANDATORY_FIXED_FEATURES]).sum().sum() > 0
):
continue
# Take the latest row for both the LATEST_FEILDS and MANDATORY FIELDS
latest_field_data = property_data[LATEST_FIELD].iloc[-1].to_dict()
mandatory_field_data = (
property_data[MANDATORY_FIXED_FEATURES].iloc[-1].to_dict()
)
# Combine all fields together
fixed_data.update(mandatory_field_data)
fixed_data.update(latest_field_data)
# We include the lodgement date here as we probably need to factor time into the
# model, since EPC standards and rigour have changed over time
variable_data = property_data[
COMPONENT_FEATURES + EFFICIENCY_FEATURES + POTENTIAL_COLUMNS + [
"LODGEMENT_DATE", RDSAP_RESPONSE, HEAT_DEMAND_RESPONSE, CARBON_RESPONSE
]
]
# Note: we look at changes between subsequent EPCS, however we could look at other permutations
# e.g. first vs second, second vs third and also first vs third
property_model_data = []
for idx in range(0, property_data.shape[0] - 1):
if idx >= property_data.shape[0] - 1:
break
earliest_record = variable_data.iloc[idx]
latest_record = variable_data.iloc[idx + 1]
# Check if the sap gets better or worse
gets_better = earliest_record[RDSAP_RESPONSE] <= latest_record[RDSAP_RESPONSE]
component_variables = COMPONENT_FEATURES + EFFICIENCY_FEATURES
if gets_better:
starting_sap = earliest_record[RDSAP_RESPONSE]
starting_heat_demand = earliest_record[HEAT_DEMAND_RESPONSE]
starting_carbon = earliest_record[CARBON_RESPONSE]
ending_sap = latest_record[RDSAP_RESPONSE]
ending_heat_demand = latest_record[HEAT_DEMAND_RESPONSE]
ending_carbon = latest_record[CARBON_RESPONSE]
rdsap_change = latest_record[RDSAP_RESPONSE] - starting_sap
heat_demand_change = latest_record[HEAT_DEMAND_RESPONSE] - starting_heat_demand
carbon_change = latest_record[CARBON_RESPONSE] - starting_carbon
starting_record = earliest_record[component_variables + ["LODGEMENT_DATE"]].add_suffix("_STARTING")
ending_record = latest_record[component_variables + ["LODGEMENT_DATE"]].add_suffix("_ENDING")
else:
starting_sap = latest_record[RDSAP_RESPONSE]
starting_heat_demand = latest_record[HEAT_DEMAND_RESPONSE]
starting_carbon = latest_record[CARBON_RESPONSE]
ending_sap = earliest_record[RDSAP_RESPONSE]
ending_heat_demand = earliest_record[HEAT_DEMAND_RESPONSE]
ending_carbon = earliest_record[CARBON_RESPONSE]
rdsap_change = earliest_record[RDSAP_RESPONSE] - starting_sap
heat_demand_change = earliest_record[HEAT_DEMAND_RESPONSE] - starting_heat_demand
carbon_change = earliest_record[CARBON_RESPONSE] - starting_carbon
starting_record = latest_record[component_variables + ["LODGEMENT_DATE"]].add_suffix("_STARTING")
ending_record = earliest_record[component_variables + ["LODGEMENT_DATE"]].add_suffix("_ENDING")
if rdsap_change == 0:
continue
all_equal = compare_records(
earliest_record=earliest_record,
latest_record=latest_record,
columns=CORE_COMPONENT_FEATURES
)
if all_equal:
# Keep track of this for the moment so we can analyse
all_equal_rows.append({"uprn": uprn, "directory_name": directory.name})
continue
features = pd.concat([starting_record, ending_record])
property_model_data.append(
{
"UPRN": uprn,
"RDSAP_CHANGE": rdsap_change,
"HEAT_DEMAND_CHANGE": heat_demand_change,
"CARBON_CHANGE": carbon_change,
"SAP_STARTING": starting_sap,
"SAP_ENDING": ending_sap,
"HEAT_DEMAND_STARTING": starting_heat_demand,
"HEAT_DEMAND_ENDING": ending_heat_demand,
"CARBON_STARTING": starting_carbon,
"CARBON_ENDING": ending_carbon,
"POTENTIAL_ENERGY_EFFICIENCY": earliest_record["POTENTIAL_ENERGY_EFFICIENCY"],
"ENVIRONMENT_IMPACT_POTENTIAL": earliest_record["ENVIRONMENT_IMPACT_POTENTIAL"],
"ENERGY_CONSUMPTION_POTENTIAL": earliest_record["ENERGY_CONSUMPTION_POTENTIAL"],
"CO2_EMISSIONS_POTENTIAL": earliest_record["CO2_EMISSIONS_POTENTIAL"],
**fixed_data,
**features.to_dict(),
}
)
data_by_urpn.extend(property_model_data)
data_by_urpn_df = pd.DataFrame(data_by_urpn)
data_by_urpn_df["DAYS_TO_STARTING"] = DataProcessor.calculate_days_to(
data_by_urpn_df["LODGEMENT_DATE_STARTING"]
)
data_by_urpn_df["DAYS_TO_ENDING"] = DataProcessor.calculate_days_to(
data_by_urpn_df["LODGEMENT_DATE_ENDING"]
)
data_by_urpn_df = data_by_urpn_df.drop(columns=["LODGEMENT_DATE_STARTING", "LODGEMENT_DATE_ENDING"])
data_by_urpn_df = DataProcessor.clean_efficiency_variables(data_by_urpn_df)
# We look for key building fabric features that have changed from one EPC to the next.
# if, for example, we see that a home has gone from being a cavity wall to a solid wall, we
# remove this record, as it indicates that the quality of the EPC conducted in the first instance
# is low
# We also replace descriptions with their cleaned variants
if pd.isnull(data_by_urpn_df).sum().sum():
raise ValueError("Null values found in dataset")
data_by_urpn_df = process_and_prune_desriptions(data_by_urpn_df, cleaned_lookup)
# Apply u-values
for col in ["walls_clean_description", "walls_clean_description_ENDING"]:
data_by_urpn_df[col] = data_by_urpn_df[col].str.replace("(assumed)", "").str.rstrip()
data_by_urpn_df = make_uvalues(data_by_urpn_df).drop(
columns=["walls_clean_description", "walls_clean_description_ENDING"]
)
# TODO: For some of the features that we clean, we have either a true, false or possibly null value
# Those nulls should be False. clean_missings_after_description_process handles this but shouldn't
# need to
data_by_urpn_df = DataProcessor.clean_missings_after_description_process(data_by_urpn_df)
if pd.isnull(data_by_urpn_df).sum().sum():
raise ValueError("Null values found in dataset after process_and_prune_desriptions")
dataset.append(data_by_urpn_df)
cleaning_averages["LOCAL_AUTHORITY"] = df["LOCAL_AUTHORITY"].values[0]
cleaning_dataset.append(cleaning_averages)
print("Final all equal count: %s" % str(len(all_equal_rows)))
# Store cleaning dataset in s3 as a parquet file
cleaning_dataset = pd.concat(cleaning_dataset)
save_dataframe_to_s3_parquet(
df=cleaning_dataset,
bucket_name="retrofit-data-dev",
file_key="sap_change_model/cleaning_dataset.parquet",
epc_pipeline = EPCPipeline(
directories=directories,
use_parallel=True,
epc_data_processor=EPCDataProcessor(run_mode="training"),
)
output = pd.concat(dataset)
epc_pipeline.run()
# Remove any records that have huge swings in their floor area
output["tfa_diff_abs"] = abs(output["TOTAL_FLOOR_AREA_ENDING"] - output["TOTAL_FLOOR_AREA_STARTING"])
output["tfa_diff_prop"] = output["tfa_diff_abs"] / output["TOTAL_FLOOR_AREA_STARTING"]
output = output[output["tfa_diff_prop"] < 0.5]
output = output.drop(columns=["tfa_diff_abs", "tfa_diff_prop"])
# For testing
# dataset_df = epc_pipeline.compiled_dataset
# dataset_df.to_parquet("refactor_datasets/dataset_with0perm_all.parquet")
# pd.DataFrame(epc_pipeline.compiled_all_equal_rows).to_parquet("refactor_datasets/all_equal_rows_with0perm_all.parquet")
# pd.concat(epc_pipeline.compiled_cleaning_averages).to_parquet("refactor_datasets/cleaning_averages_with0perm_all.parquet")
uvalue_columns = [col for col in output.columns if "thermal_transmittance" in col]
for uvalue_col in uvalue_columns:
output[uvalue_col] = pd.to_numeric(output[uvalue_col])
save_dataframe_to_s3_parquet(
df=output,
bucket_name="retrofit-data-dev",
file_key="sap_change_model/dataset.parquet",
)
# Store all_equal_rows
all_equal_rows = pd.DataFrame(all_equal_rows)
save_dataframe_to_s3_parquet(
df=all_equal_rows,
bucket_name="retrofit-data-dev",
file_key="sap_change_model/all_equal_rows.parquet",
)
# from utils.s3 import read_dataframe_from_s3_parquet
# dataset = read_dataframe_from_s3_parquet(
# bucket_name="retrofit-data-dev",
# file_key="sap_change_model/dataset_test.parquet",
# )
if __name__ == "__main__":
app()
main()

View file

@ -0,0 +1,5 @@
pandas==2.1.3
tqdm==4.66.1
msgpack==1.0.7
boto3==1.29.6
pyarrow==15.0.2

View file

@ -2,6 +2,63 @@
# TODO: migrate to dynaconf
from pathlib import Path
DATA_ANOMALY_MATCHES = {
# Invalid reports are where the value provided is out of bounds, e.g. a negative energy rating of -1199 or a
# non-integer, there is no valid energy band for this, so it is marked as INVALID!
"INVALID",
"INVALID!",
# When the energy certificate was first lodged on the register there was no requirement to lodge this data
# item, i.e. a non-mandatory item.
"NO DATA!",
"NODATA!",
# When the energy certificate was first lodged on the register there was no requirement to lodge this data item,
# i.e.a non - mandatory item.
"N/A",
# A value generated by the register to account for a data item that was not mandatory when the lodgement of
# the energy certificate occurred. When the data item became mandatory the register operator, for backwards
# compatibility purposes, populated the data field with a value of not recorded to ensure that the energy
# certificate retrieval process is successfully completed. Mandatory data items cannot be applied
# retrospectively to energy certificates lodged before the date of the change.
"Not recorded",
# The data also contains DECs with an operational rating of 9999 (a default DEC). The production of a
# default DEC value was allowed to enable building occupiers, with poor quality or no energy data,
# the opportunity to comply with the regulations. From April 2011 the ability to lodge a default DEC was no
# longer allowed.
"9999",
# The Building Emission Rate (BER) data field for non-domestic buildings may contain a blank value. The BER
# was only lodged on the register from 7 March 2010.
"Blank"
# There are currently just over 8,600 records where the local authority identifier is null. This is due to
# the Register Operator not being able to match the building address in the Markermap Ordinance Survey (GB)
# lookup tables or OS MasterMap Address Layer 2 data. The majority of these addresses have been requested
# manually by energy assessors for inclusion by the Register Operator in the registers (e.g. new builds,
# etc). These records are being published for completeness. An ongoing process to manage these manually added
# addresses will take time to develop to deal with these and future anomalies.
#
# There are several fields within the lodged data where it is possible to enter multiple entries to cater for
# different data_types of build within a single property, i.e. extensions. This results in multiple entries for
# the description fields for floor, roof and wall. For the purposes of this data release only the information
# contained within the first of these multiple entries is being provided. As there are no restrictions on the
# value in this first field it means that sometimes the first field in a multiple entry description field may
# contain a null value. A resolution to correct these anomalies will be considered for future data releases.
"NULL",
# We sometimes see fields populated with just an empty string.
"",
# We sometimes find None values - particulatly when we produce an estimated EPC
None,
# An older value which rarely shows up but has been seen in the data.
"UNKNOWN",
}
DATA_ANOMALY_SUBSTRINGS = {
# Where values in a pick list that have been superseded by another value. For example, where a value for
# pitched roof has been replaced by three sub-categories of pitched roof. The original value is retained
# but for backward compatibility only it is appended to ensure that the energy certificate retrieval
# process can be successfully completed. Replacement data items cannot be applied retrospectively to energy
# certificates lodged on the register before the date of the change.
"for backward compatibility only"
}
METRIC_FILENAME = "metrics.csv"
OPTIMISE_METRIC = "mean_absolute_error"
@ -106,17 +163,20 @@ CORE_COMPONENT_FEATURES = [
]
EFFICIENCY_FEATURES = [
'HOT_WATER_ENERGY_EFF',
'FLOOR_ENERGY_EFF',
'WINDOWS_ENERGY_EFF',
'WALLS_ENERGY_EFF',
'SHEATING_ENERGY_EFF',
'ROOF_ENERGY_EFF',
'MAINHEAT_ENERGY_EFF',
'MAINHEATC_ENERGY_EFF',
'LIGHTING_ENERGY_EFF'
"HOT_WATER_ENERGY_EFF",
"FLOOR_ENERGY_EFF",
"WINDOWS_ENERGY_EFF",
"WALLS_ENERGY_EFF",
"SHEATING_ENERGY_EFF",
"ROOF_ENERGY_EFF",
"MAINHEAT_ENERGY_EFF",
"MAINHEATC_ENERGY_EFF",
"LIGHTING_ENERGY_EFF",
]
ROOM_FEATURES = ["number_habitable_rooms", "number_heated_rooms"]
COMPONENT_FEATURES = CORE_COMPONENT_FEATURES + [
"TRANSACTION_TYPE",
"ENERGY_TARIFF", # Not sure if this is relevant
@ -127,10 +187,10 @@ COMPONENT_FEATURES = CORE_COMPONENT_FEATURES + [
]
POTENTIAL_COLUMNS = [
'POTENTIAL_ENERGY_EFFICIENCY',
'ENVIRONMENT_IMPACT_POTENTIAL',
'ENERGY_CONSUMPTION_POTENTIAL',
'CO2_EMISSIONS_POTENTIAL',
"POTENTIAL_ENERGY_EFFICIENCY",
"ENVIRONMENT_IMPACT_POTENTIAL",
"ENERGY_CONSUMPTION_POTENTIAL",
"CO2_EMISSIONS_POTENTIAL",
# We don't include cost features for the moment
# 'LIGHTING_COST_POTENTIAL',
# 'HEATING_COST_POTENTIAL',
@ -155,6 +215,14 @@ MANDATORY_FIXED_FEATURES = ["PROPERTY_TYPE", "BUILT_FORM", "CONSTITUENCY"]
# and Wales from 31 July 2014
EARLIEST_EPC_DATE = "2014-08-01"
IGNORED_TRANSACTION_TYPES = "new dwelling"
IGNORED_FLOOR_LEVELS = ["top floor", "mid floor"]
IGNORED_PROPERTY_TYPES = "Park home"
IGNORED_TENURES = [
"Not defined - use in the case of a new dwelling for which the intended tenure in not known. It is not to be used "
"for an existing dwelling"
]
RDSAP_RESPONSE = "CURRENT_ENERGY_EFFICIENCY"
HEAT_DEMAND_RESPONSE = "ENERGY_CONSUMPTION_CURRENT"
CARBON_RESPONSE = "CO2_EMISSIONS_CURRENT"
@ -172,30 +240,55 @@ DATA_PROCESSOR_SETTINGS = {
# This has a manual mapping of the column types required
COLUMNTYPES = {
'UPRN': 'object', 'TOTAL_FLOOR_AREA': 'float64', 'FLOOR_HEIGHT': 'float64', 'PROPERTY_TYPE': 'object',
'BUILT_FORM': 'object', 'CONSTITUENCY': 'object', 'NUMBER_HABITABLE_ROOMS': 'float64',
'NUMBER_HEATED_ROOMS': 'float64', 'FIXED_LIGHTING_OUTLETS_COUNT': 'float64',
'CONSTRUCTION_AGE_BAND': 'object',
'TRANSACTION_TYPE': 'object',
'WALLS_DESCRIPTION': 'object',
'FLOOR_DESCRIPTION': 'object',
'LIGHTING_DESCRIPTION': 'object',
'ROOF_DESCRIPTION': 'object',
'MAINHEAT_DESCRIPTION': 'object',
'HOTWATER_DESCRIPTION': 'object', 'MAIN_FUEL': 'object',
'MECHANICAL_VENTILATION': 'object',
'SECONDHEAT_DESCRIPTION': 'object', 'ENERGY_TARIFF': 'object',
'SOLAR_WATER_HEATING_FLAG': 'object', 'PHOTO_SUPPLY': 'float64',
'WINDOWS_DESCRIPTION': 'object',
'GLAZED_TYPE': 'object',
'MULTI_GLAZE_PROPORTION': 'float64',
'LOW_ENERGY_LIGHTING': 'float64',
'NUMBER_OPEN_FIREPLACES': 'float64',
'MAINHEATCONT_DESCRIPTION': 'object',
'EXTENSION_COUNT': 'float64',
'LODGEMENT_DATE': 'object',
**dict(zip(EFFICIENCY_FEATURES, ['object', ] * len(EFFICIENCY_FEATURES))),
**dict(zip(POTENTIAL_COLUMNS, ['float64', ] * len(POTENTIAL_COLUMNS)))
"UPRN": "object",
"TOTAL_FLOOR_AREA": "float64",
"FLOOR_HEIGHT": "float64",
"PROPERTY_TYPE": "object",
"BUILT_FORM": "object",
"CONSTITUENCY": "object",
"NUMBER_HABITABLE_ROOMS": "float64",
"NUMBER_HEATED_ROOMS": "float64",
"FIXED_LIGHTING_OUTLETS_COUNT": "float64",
"CONSTRUCTION_AGE_BAND": "object",
"TRANSACTION_TYPE": "object",
"WALLS_DESCRIPTION": "object",
"FLOOR_DESCRIPTION": "object",
"LIGHTING_DESCRIPTION": "object",
"ROOF_DESCRIPTION": "object",
"MAINHEAT_DESCRIPTION": "object",
"HOTWATER_DESCRIPTION": "object",
"MAIN_FUEL": "object",
"MECHANICAL_VENTILATION": "object",
"SECONDHEAT_DESCRIPTION": "object",
"ENERGY_TARIFF": "object",
"SOLAR_WATER_HEATING_FLAG": "object",
"PHOTO_SUPPLY": "float64",
"WINDOWS_DESCRIPTION": "object",
"GLAZED_TYPE": "object",
"MULTI_GLAZE_PROPORTION": "float64",
"LOW_ENERGY_LIGHTING": "float64",
"NUMBER_OPEN_FIREPLACES": "float64",
"MAINHEATCONT_DESCRIPTION": "object",
"EXTENSION_COUNT": "float64",
"LODGEMENT_DATE": "object",
**dict(
zip(
EFFICIENCY_FEATURES,
[
"object",
]
* len(EFFICIENCY_FEATURES),
)
),
**dict(
zip(
POTENTIAL_COLUMNS,
[
"float64",
]
* len(POTENTIAL_COLUMNS),
)
),
}
# For modelling, we don't allow records with more than 100 SAP points
@ -215,7 +308,7 @@ fill_na_map = {
"LOW_ENERGY_LIGHTING": 0,
"MAINHEATCONT_DESCRIPTION": "Unknown",
"EXTENSION_COUNT": 0,
"NUMBER_OPEN_FIREPLACES": 0
"NUMBER_OPEN_FIREPLACES": 0,
}
################################################################################################
@ -224,62 +317,212 @@ fill_na_map = {
################################################################################################
STARTING_SUFFIX_COMPONENT_COLS = [
"SAP", "HEAT_DEMAND", "CARBON", "TRANSACTION_TYPE", "MECHANICAL_VENTILATION",
"SECONDHEAT_DESCRIPTION", "ENERGY_TARIFF", "SOLAR_WATER_HEATING_FLAG", "PHOTO_SUPPLY",
"GLAZED_TYPE", "MULTI_GLAZE_PROPORTION", "LOW_ENERGY_LIGHTING", "NUMBER_OPEN_FIREPLACES",
"EXTENSION_COUNT", "TOTAL_FLOOR_AREA", "FLOOR_HEIGHT", "DAYS_TO", "estimated_perimeter"
"SAP",
"HEAT_DEMAND",
"CARBON",
"TRANSACTION_TYPE",
"MECHANICAL_VENTILATION",
"SECONDHEAT_DESCRIPTION",
"ENERGY_TARIFF",
"SOLAR_WATER_HEATING_FLAG",
"PHOTO_SUPPLY",
"GLAZED_TYPE",
"MULTI_GLAZE_PROPORTION",
"LOW_ENERGY_LIGHTING",
"NUMBER_OPEN_FIREPLACES",
"EXTENSION_COUNT",
"TOTAL_FLOOR_AREA",
"FLOOR_HEIGHT",
"DAYS_TO",
"estimated_perimeter",
]
NO_SUFFIX_COMPONENT_COLS = [
"walls_thermal_transmittance",
"is_cavity_wall",
"is_filled_cavity",
"is_solid_brick",
"is_system_built",
"is_timber_frame",
"is_granite_or_whinstone",
"is_as_built",
"is_cob",
"is_sandstone_or_limestone",
"is_park_home",
"walls_insulation_thickness",
"external_insulation",
"internal_insulation",
"floor_thermal_transmittance",
"is_to_unheated_space",
"is_to_external_air",
"is_suspended",
"is_solid",
"another_property_below",
"floor_insulation_thickness",
"roof_thermal_transmittance",
"is_pitched",
"is_roof_room",
"is_loft",
"is_flat",
"is_thatched",
"is_at_rafters",
"has_dwelling_above",
"roof_insulation_thickness",
"heater_type",
"system_type",
"thermostat_characteristics",
"heating_scope",
"energy_recovery",
"hotwater_tariff_type",
"extra_features",
"chp_systems",
"distribution_system",
"no_system_present",
"appliance",
"has_radiators",
"has_fan_coil_units",
"has_pipes_in_screed_above_insulation",
"has_pipes_in_insulated_timber_floor",
"has_pipes_in_concrete_slab",
"has_boiler",
"has_air_source_heat_pump",
"has_room_heaters",
"has_electric_storage_heaters",
"has_warm_air",
"has_electric_underfloor_heating",
"has_electric_ceiling_heating",
"has_community_scheme",
"has_ground_source_heat_pump",
"has_no_system_present",
"has_portable_electric_heaters",
"has_water_source_heat_pump",
"has_electric_heat_pump",
"has_micro-cogeneration",
"has_solar_assisted_heat_pump",
"has_exhaust_source_heat_pump",
"has_community_heat_pump",
"has_electric",
"has_mains_gas",
"has_wood_logs",
"has_coal",
"has_oil",
"has_wood_pellets",
"has_anthracite",
"has_dual_fuel_mineral_and_wood",
"has_smokeless_fuel",
"has_lpg",
"has_b30k",
"has_electricaire",
"has_assumed_for_most_rooms",
"has_underfloor_heating",
"thermostatic_control",
"charging_system",
"switch_system",
"no_control",
"dhw_control",
"community_heating",
"multiple_room_thermostats",
"auxiliary_systems",
"trvs",
"rate_control",
"glazing_type",
"fuel_type",
"main-fuel_tariff_type",
"is_community",
"no_individual_heating_or_community_network",
"complex_fuel_type",
]
NO_SUFFIX_COMPONENT_COLS = ['walls_thermal_transmittance', 'is_cavity_wall',
'is_filled_cavity', 'is_solid_brick', 'is_system_built', 'is_timber_frame',
'is_granite_or_whinstone', 'is_as_built', 'is_cob', 'is_sandstone_or_limestone',
'is_park_home', 'walls_insulation_thickness', 'external_insulation', 'internal_insulation',
'floor_thermal_transmittance', 'is_to_unheated_space', 'is_to_external_air', 'is_suspended',
'is_solid', 'another_property_below', 'floor_insulation_thickness',
'roof_thermal_transmittance', 'is_pitched', 'is_roof_room', 'is_loft', 'is_flat',
'is_thatched', 'is_at_rafters', 'has_dwelling_above', 'roof_insulation_thickness',
'heater_type', 'system_type', 'thermostat_characteristics', 'heating_scope',
'energy_recovery',
'hotwater_tariff_type', 'extra_features', 'chp_systems', 'distribution_system',
'no_system_present', 'appliance', 'has_radiators', 'has_fan_coil_units',
'has_pipes_in_screed_above_insulation', 'has_pipes_in_insulated_timber_floor',
'has_pipes_in_concrete_slab', 'has_boiler', 'has_air_source_heat_pump', 'has_room_heaters',
'has_electric_storage_heaters', 'has_warm_air', 'has_electric_underfloor_heating',
'has_electric_ceiling_heating', 'has_community_scheme', 'has_ground_source_heat_pump',
'has_no_system_present', 'has_portable_electric_heaters', 'has_water_source_heat_pump',
'has_electric_heat_pump', 'has_micro-cogeneration', 'has_solar_assisted_heat_pump',
'has_exhaust_source_heat_pump', 'has_community_heat_pump', 'has_electric', 'has_mains_gas',
'has_wood_logs', 'has_coal', 'has_oil', 'has_wood_pellets', 'has_anthracite',
'has_dual_fuel_mineral_and_wood', 'has_smokeless_fuel', 'has_lpg', 'has_b30k',
'has_electricaire', 'has_assumed_for_most_rooms', 'has_underfloor_heating',
'thermostatic_control', 'charging_system', 'switch_system', 'no_control', 'dhw_control',
'community_heating', 'multiple_room_thermostats', 'auxiliary_systems', 'trvs',
'rate_control',
'glazing_type', 'fuel_type', 'main-fuel_tariff_type', 'is_community',
'no_individual_heating_or_community_network', 'complex_fuel_type',
]
ENDING_SUFFIX_COMPONENT_COLS = [
'SAP', 'HEAT_DEMAND', 'CARBON', 'TRANSACTION_TYPE', 'MECHANICAL_VENTILATION', 'SECONDHEAT_DESCRIPTION',
'ENERGY_TARIFF', 'SOLAR_WATER_HEATING_FLAG', 'PHOTO_SUPPLY', 'GLAZED_TYPE', 'MULTI_GLAZE_PROPORTION',
'LOW_ENERGY_LIGHTING', 'NUMBER_OPEN_FIREPLACES', 'EXTENSION_COUNT', 'TOTAL_FLOOR_AREA', 'FLOOR_HEIGHT',
'DAYS_TO', 'walls_thermal_transmittance', 'is_park_home', 'walls_insulation_thickness',
'external_insulation', 'internal_insulation', 'floor_thermal_transmittance', 'floor_insulation_thickness',
'roof_thermal_transmittance', 'roof_insulation_thickness', 'heater_type', 'system_type',
'thermostat_characteristics', 'heating_scope', 'energy_recovery', 'hotwater_tariff_type', 'extra_features',
'chp_systems', 'distribution_system', 'no_system_present', 'appliance', 'has_radiators',
'has_fan_coil_units', 'has_pipes_in_screed_above_insulation', 'has_pipes_in_insulated_timber_floor',
'has_pipes_in_concrete_slab', 'has_boiler', 'has_air_source_heat_pump', 'has_room_heaters',
'has_electric_storage_heaters', 'has_warm_air', 'has_electric_underfloor_heating',
'has_electric_ceiling_heating', 'has_community_scheme', 'has_ground_source_heat_pump',
'has_no_system_present', 'has_portable_electric_heaters', 'has_water_source_heat_pump',
'has_electric_heat_pump', 'has_micro-cogeneration', 'has_solar_assisted_heat_pump',
'has_exhaust_source_heat_pump', 'has_community_heat_pump', 'has_electric', 'has_mains_gas', 'has_wood_logs',
'has_coal', 'has_oil', 'has_wood_pellets', 'has_anthracite', 'has_dual_fuel_mineral_and_wood',
'has_smokeless_fuel', 'has_lpg', 'has_b30k', 'has_electricaire', 'has_assumed_for_most_rooms',
'has_underfloor_heating', 'thermostatic_control', 'charging_system', 'switch_system', 'no_control',
'dhw_control', 'community_heating', 'multiple_room_thermostats', 'auxiliary_systems', 'trvs',
'rate_control', 'glazing_type', 'fuel_type', 'main-fuel_tariff_type', 'is_community',
'no_individual_heating_or_community_network', 'complex_fuel_type', 'estimated_perimeter'
"SAP",
"HEAT_DEMAND",
"CARBON",
"TRANSACTION_TYPE",
"MECHANICAL_VENTILATION",
"SECONDHEAT_DESCRIPTION",
"ENERGY_TARIFF",
"SOLAR_WATER_HEATING_FLAG",
"PHOTO_SUPPLY",
"GLAZED_TYPE",
"MULTI_GLAZE_PROPORTION",
"LOW_ENERGY_LIGHTING",
"NUMBER_OPEN_FIREPLACES",
"EXTENSION_COUNT",
"TOTAL_FLOOR_AREA",
"FLOOR_HEIGHT",
"DAYS_TO",
"walls_thermal_transmittance",
"is_park_home",
"walls_insulation_thickness",
"external_insulation",
"internal_insulation",
"floor_thermal_transmittance",
"floor_insulation_thickness",
"roof_thermal_transmittance",
"roof_insulation_thickness",
"heater_type",
"system_type",
"thermostat_characteristics",
"heating_scope",
"energy_recovery",
"hotwater_tariff_type",
"extra_features",
"chp_systems",
"distribution_system",
"no_system_present",
"appliance",
"has_radiators",
"has_fan_coil_units",
"has_pipes_in_screed_above_insulation",
"has_pipes_in_insulated_timber_floor",
"has_pipes_in_concrete_slab",
"has_boiler",
"has_air_source_heat_pump",
"has_room_heaters",
"has_electric_storage_heaters",
"has_warm_air",
"has_electric_underfloor_heating",
"has_electric_ceiling_heating",
"has_community_scheme",
"has_ground_source_heat_pump",
"has_no_system_present",
"has_portable_electric_heaters",
"has_water_source_heat_pump",
"has_electric_heat_pump",
"has_micro-cogeneration",
"has_solar_assisted_heat_pump",
"has_exhaust_source_heat_pump",
"has_community_heat_pump",
"has_electric",
"has_mains_gas",
"has_wood_logs",
"has_coal",
"has_oil",
"has_wood_pellets",
"has_anthracite",
"has_dual_fuel_mineral_and_wood",
"has_smokeless_fuel",
"has_lpg",
"has_b30k",
"has_electricaire",
"has_assumed_for_most_rooms",
"has_underfloor_heating",
"thermostatic_control",
"charging_system",
"switch_system",
"no_control",
"dhw_control",
"community_heating",
"multiple_room_thermostats",
"auxiliary_systems",
"trvs",
"rate_control",
"glazing_type",
"fuel_type",
"main-fuel_tariff_type",
"is_community",
"no_individual_heating_or_community_network",
"complex_fuel_type",
"estimated_perimeter",
]
# We found that without performing any filtering, the bottom 0.5% of homes had a floor height of 1.65m. We'll therefore

10001
etl/epc/testfile.csv Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,358 @@
import pytest
from utils.s3 import read_dataframe_from_s3_parquet
from etl.epc.Record import EPCRecord
from etl.epc.settings import DATA_ANOMALY_MATCHES
import random
class TestEpcRecord:
@pytest.fixture()
def cleaning_data(self):
cleaning_data = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
)
return cleaning_data
@pytest.fixture()
def epc_records_1(self):
epc_records_1 = {
'original_epc': {
'low-energy-fixed-light-count': '', 'address': '139 School Road, Hall Green',
'uprn-source': 'Energy Assessor', 'floor-height': '2.6', 'heating-cost-potential': '1138',
'unheated-corridor-length': '', 'hot-water-cost-potential': '175',
'construction-age-band': 'England and Wales: 1900-1929', 'potential-energy-rating': 'B',
'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Very Good',
'environment-impact-potential': '82', 'glazed-type': 'double glazing, unknown install date',
'heating-cost-current': '2711', 'address3': '',
'mainheatcont-description': 'Programmer, TRVs and bypass',
'sheating-energy-eff': 'N/A', 'property-type': 'House', 'local-authority-label': 'Birmingham',
'fixed-lighting-outlets-count': '11', 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural',
'hot-water-cost-current': '310', 'county': '', 'postcode': 'B28 8JF', 'solar-water-heating-flag': 'N',
'constituency': 'E14000562', 'co2-emissions-potential': '2.0', 'number-heated-rooms': '4',
'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '107',
'local-authority': 'E08000025', 'built-form': 'Semi-Detached', 'number-open-fireplaces': '0',
'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2023-07-05',
'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '65', 'address1': '139 School Road',
'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Birmingham, Hall Green',
'roof-energy-eff': 'Average', 'total-floor-area': '103.0', 'building-reference-number': '10004697322',
'environment-impact-current': '43', 'co2-emissions-current': '6.7',
'roof-description': 'Pitched, 100 mm loft insulation', 'floor-energy-eff': 'N/A',
'number-habitable-rooms': '4', 'address2': 'Hall Green', 'hot-water-env-eff': 'Good',
'posttown': 'BIRMINGHAM', 'mainheatc-energy-eff': 'Average', 'main-fuel': 'mains gas (not community)',
'lighting-env-eff': 'Very Good', 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A',
'sheating-env-eff': 'N/A', 'lighting-description': 'Low energy lighting in 82% of fixed outlets',
'roof-env-eff': 'Average', 'walls-energy-eff': 'Very Poor', 'photo-supply': '0.0',
'lighting-cost-potential': '182', 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100',
'main-heating-controls': '', 'lodgement-datetime': '2023-07-13 08:23:07', 'flat-top-storey': '',
'current-energy-rating': 'E', 'secondheat-description': 'None', 'walls-env-eff': 'Very Poor',
'transaction-type': 'rental', 'uprn': '100070505235', 'current-energy-efficiency': '51',
'energy-consumption-current': '366', 'mainheat-description': 'Boiler and radiators, mains gas',
'lighting-cost-current': '182', 'lodgement-date': '2023-07-13', 'extension-count': '0',
'mainheatc-env-eff': 'Average',
'lmk-key': 'c1d137711da433fb3cced74b1a6848da8bbc1159d076455d26d7b4668982601e',
'wind-turbine-count': '0',
'tenure': 'Rented (social)', 'floor-level': '', 'potential-energy-efficiency': '84',
'hot-water-energy-eff': 'Good', 'low-energy-lighting': '82',
'walls-description': 'Solid brick, as built, no insulation (assumed)',
'hotwater-description': 'From main system'}, 'full_sap_epc': {}, 'old_data': []
}
return epc_records_1
def test_clean_mechanical_ventilation(self, cleaning_data, epc_records_1):
# We have an epc with Natural ventilation - the resulting epc should also have natural ventulation
record = EPCRecord(cleaning_data=cleaning_data)
record.prepared_epc = {
"mechanical-ventilation": "natural"
}
record._clean_ventilation()
assert record.prepared_epc["mechanical-ventilation"] == "natural"
record2 = EPCRecord(cleaning_data=cleaning_data)
record2.prepared_epc = {
"mechanical-ventilation": ""
}
record2._clean_ventilation()
assert record2.prepared_epc["mechanical-ventilation"] is None
record3 = EPCRecord(cleaning_data=cleaning_data)
record3.prepared_epc = {
"mechanical-ventilation": None
}
record3._clean_ventilation()
assert record3.prepared_epc["mechanical-ventilation"] is None
record4 = EPCRecord(cleaning_data=cleaning_data)
record4.prepared_epc = {
"mechanical-ventilation": "INVALID"
}
record4._clean_ventilation()
assert record4.prepared_epc["mechanical-ventilation"] is None
def test_clean_energy_valid_values(self, cleaning_data, epc_records_1):
record = EPCRecord(cleaning_data=cleaning_data)
record.prepared_epc = {
"energy-consumption-current": "200",
"co2-emissions-current": "5.5"
}
record._clean_energy()
assert record.prepared_epc["energy-consumption-current"] == 200.0
assert record.prepared_epc["co2-emissions-current"] == 5.5
def test_clean_energy_empty_values(self, cleaning_data):
# We cannot have invalid values so this should raise an exception
record = EPCRecord(cleaning_data=cleaning_data)
record.prepared_epc = {
"energy-consumption-current": "",
"co2-emissions-current": ""
}
with pytest.raises(ValueError):
record._clean_energy()
def test_clean_built_form_valid_remap(self, cleaning_data, epc_records_1):
record = EPCRecord(cleaning_data=cleaning_data)
# Assuming "Semi" should be remapped to "Semi-Detached"
record.prepared_epc = {
"built-form": "Semi-Detached",
"property-type": "Flat" # Assuming this affects the remapping
}
record._clean_built_form()
assert record.prepared_epc["built-form"] == "Semi-Detached"
def test_clean_built_form_anomaly(self, cleaning_data, epc_records_1):
record = EPCRecord(cleaning_data=cleaning_data)
record.prepared_epc = {
"built-form": "",
"property-type": "Flat"
}
record._clean_built_form()
assert record.prepared_epc["built-form"] == "End-Terrace"
def test_clean_floor_area_valid(self, cleaning_data):
record = EPCRecord(cleaning_data=cleaning_data)
record.prepared_epc = {
"total-floor-area": "120.5"
}
record._clean_floor_area()
assert record.prepared_epc["total-floor-area"] == 120.5
def test_clean_floor_area_empty(self, cleaning_data):
record = EPCRecord(cleaning_data=cleaning_data)
record.prepared_epc = {
"total-floor-area": ""
}
# We have no known case of missing floor area
with pytest.raises(ValueError):
record._clean_floor_area()
def test_clean_heat_loss_corridor_valid(self, cleaning_data):
record = EPCRecord(cleaning_data=cleaning_data)
record.prepared_epc = {
"heat-loss-corridor": "unheated corridor",
"unheated-corridor-length": ""
}
record._clean_heat_loss_corridor()
assert record.prepared_epc["heat-loss-corridor"] == "unheated corridor"
record = EPCRecord(cleaning_data=cleaning_data)
record.prepared_epc = {
"heat-loss-corridor": "unheated corridor",
"unheated-corridor-length": None
}
record._clean_heat_loss_corridor()
assert record.prepared_epc["heat-loss-corridor"] == "unheated corridor"
assert record.prepared_epc["unheated-corridor-length"] is None
def test_clean_heat_loss_corridor_anomaly(self, cleaning_data):
record = EPCRecord(cleaning_data=cleaning_data)
# Assuming "InvalidCorridor" is an anomaly
record.prepared_epc = {
"heat-loss-corridor": "InvalidCorridor",
"unheated-corridor-length": ""
}
record._clean_heat_loss_corridor()
assert record.prepared_epc["heat-loss-corridor"] == "no corridor"
def test_clean_mains_gas_valid(self, cleaning_data):
record = EPCRecord(cleaning_data=cleaning_data)
record.prepared_epc = {
"mains-gas-flag": "Y"
}
record._clean_mains_gas()
assert record.prepared_epc["mains-gas-flag"] is True
def test_clean_mains_gas_anomaly(self, cleaning_data):
record = EPCRecord(cleaning_data=cleaning_data)
record.prepared_epc = {
"mains-gas-flag": "InvalidValue"
}
# It should always be Y or N or an anomally value
with pytest.raises(KeyError):
record._clean_mains_gas()
record = EPCRecord(cleaning_data=cleaning_data)
record.prepared_epc = {
"mains-gas-flag": random.choice(list(DATA_ANOMALY_MATCHES))
}
record._clean_mains_gas()
assert record.prepared_epc["mains-gas-flag"] is None
def test_clean_solar_hot_water_valid(self, cleaning_data):
record = EPCRecord(cleaning_data=cleaning_data)
record.prepared_epc = {
"solar-water-heating-flag": "Y"
}
record._clean_solar_hot_water()
assert record.prepared_epc["solar-water-heating-flag"] == "Y"
assert record.solar_water_heating_flag_bool is True
def test_clean_solar_hot_water_empty(self, cleaning_data):
record = EPCRecord(cleaning_data=cleaning_data)
record.prepared_epc = {
"solar-water-heating-flag": ""
}
record._clean_solar_hot_water()
assert record.prepared_epc["solar-water-heating-flag"] == "N"
assert record.solar_water_heating_flag_bool is False
def test_clean_number_lighting_outlets_valid(self, cleaning_data, epc_records_1):
record = EPCRecord(cleaning_data=cleaning_data, epc_records=epc_records_1)
record.prepared_epc = {
"fixed-lighting-outlets-count": "5"
}
record._clean_number_lighting_outlets()
assert record.prepared_epc["fixed-lighting-outlets-count"] == 5.0
def test_clean_number_lighting_outlets_empty(self, cleaning_data, epc_records_1):
record = EPCRecord(cleaning_data=cleaning_data)
record.run_mode = "newdata"
record.prepared_epc = {
"fixed-lighting-outlets-count": "",
"property-type": "Flat",
"built-form": "Semi-Detached",
"construction-age-band": "England and Wales: 1900-1929",
"local-authority": "E08000025",
"number-habitable-rooms": "4",
"number-heated-rooms": "4",
}
record.old_data = []
record.full_sap_epc = []
record._clean_number_lighting_outlets()
assert record.prepared_epc["fixed-lighting-outlets-count"] == 8.0
def test_clean_count_variables(self, cleaning_data):
record = EPCRecord(cleaning_data=cleaning_data)
record.prepared_epc = {
"number-open-fireplaces": "1",
"extension-count": None,
"flat-storey-count": "",
"number-habitable-rooms": "INVALID!",
}
record._clean_count_variables()
assert record.prepared_epc["number-open-fireplaces"] == 1.0
assert record.prepared_epc["extension-count"] == 0
assert record.prepared_epc["flat-storey-count"] is None
assert record.prepared_epc["number-habitable-rooms"] is None
def test_clean_floor_level(self, cleaning_data):
record = EPCRecord(cleaning_data=cleaning_data)
record.prepared_epc = {
"floor-level": "1",
}
record._clean_floor_level()
assert record.prepared_epc["floor-level"] == 1.0
record = EPCRecord(cleaning_data=cleaning_data)
record.prepared_epc = {
"floor-level": "",
}
record._clean_floor_level()
assert record.prepared_epc["floor-level"] is None
record = EPCRecord(cleaning_data=cleaning_data)
record.prepared_epc = {
"floor-level": None,
}
record._clean_floor_level()
assert record.prepared_epc["floor-level"] is None
def test_clean_solar_hot_water(self, cleaning_data):
record = EPCRecord(cleaning_data=cleaning_data)
record.prepared_epc = {
"solar-water-heating-flag": "Y",
}
record._clean_solar_hot_water()
assert record.prepared_epc["solar-water-heating-flag"] == "Y"
assert record.solar_water_heating_flag_bool is True
record = EPCRecord(cleaning_data=cleaning_data)
record.prepared_epc = {
"solar-water-heating-flag": "N",
}
record._clean_solar_hot_water()
assert record.prepared_epc["solar-water-heating-flag"] == "N"
assert record.solar_water_heating_flag_bool is False
record = EPCRecord(cleaning_data=cleaning_data)
record.prepared_epc = {
"solar-water-heating-flag": "",
}
record._clean_solar_hot_water()
assert record.prepared_epc["solar-water-heating-flag"] == "N"
assert record.solar_water_heating_flag_bool is False
record = EPCRecord(cleaning_data=cleaning_data)
record.prepared_epc = {
"solar-water-heating-flag": None,
}
record._clean_solar_hot_water()
assert record.prepared_epc["solar-water-heating-flag"] == "N"
assert record.solar_water_heating_flag_bool is False

View file

@ -35,9 +35,12 @@ def app():
cleaned_data = {}
epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
for directory in tqdm(epc_directories):
WALLS = []
for directory in tqdm(epc_directories):
data = pd.read_csv(directory / "certificates.csv", low_memory=False)
z = data["WALLS_DESCRIPTION"].unique().tolist()
WALLS.extend(z)
# Rename the columns to the same format as the api returns
data.columns = [c.replace("_", "-").lower() for c in data.columns]
# Take just date before the date threshold

View file

@ -61,7 +61,8 @@ class MainHeatAttributes(Definitions):
REMAP = {
"electric ceiling": "electric ceiling heating",
"electric heat pumps": "electric heat pump",
"solar-assisted heat pump": "solar assisted heat pump"
"solar-assisted heat pump": "solar assisted heat pump",
"portable electric heating": "portable electric heaters",
}
edge_case_result = {}
@ -138,6 +139,8 @@ class MainHeatAttributes(Definitions):
result.update({f'has_{ft.replace(" ", "_")}': False for ft in self.FUEL_TYPES})
result.update({f'has_{ot.replace(" ", "_")}': False for ot in self.OTHERS})
result['has_underfloor_heating'] = False
# We re-map entries that are the same
# We just drop those keys
if self.nodata:
return result

View file

@ -33,6 +33,12 @@ class RoofAttributes(Definitions):
"ystafell(oedd) to, dim inswleiddio": "roof room(s), no insulation",
}
DEFAULT_KEYS = [
'thermal_transmittance', 'thermal_transmittance_unit', 'is_pitched', 'is_roof_room',
'is_loft', 'is_flat', 'is_thatched', 'is_at_rafters', 'is_assumed', 'has_dwelling_above',
'is_valid', 'insulation_thickness'
]
def __init__(self, description: str):
"""
:param description: Description of the roof.
@ -95,6 +101,8 @@ class RoofAttributes(Definitions):
result: Dict[str, Union[float, str, bool, None]] = {}
if self.nodata:
for key in self.DEFAULT_KEYS:
result[key] = False
return result
description = self.description
@ -114,6 +122,13 @@ class RoofAttributes(Definitions):
result["is_valid"] = "invalid" not in description
description = description.replace("invalid", "")
# We handle an edge case where the description is "pitched, 150 loft insulation" and is missing the mm
if result["is_pitched"] or result["is_loft"]:
# Search for a regular expression that matches 150 insulation
match = re.search(r"(\d+\+?)\s*insulation", description)
if match:
result['insulation_thickness'] = match.group(1)
# insulation thickness
thickness_map = {
"ceiling insulated": "average",
@ -129,11 +144,11 @@ class RoofAttributes(Definitions):
# Remove the match from the description
# description = description.replace(key, "")
break
else:
# Extract insulation thickness in mm, if present
match = re.search(r'(\d+\+?)\s*mm', description)
if match:
result['insulation_thickness'] = match.group(1)
# Extract insulation thickness in mm, if present
match = re.search(r'(\d+\+?)\s*mm', description)
if match:
result['insulation_thickness'] = match.group(1)
if "insulation_thickness" not in result:
result['insulation_thickness'] = None

View file

@ -68,6 +68,13 @@ class WallAttributes(Definitions):
'Cowith external insulation': 'Cob, with external insulation',
}
DEFAULT_KEYS = [
'thermal_transmittance', 'thermal_transmittance_unit', 'is_cavity_wall', 'is_filled_cavity',
'is_solid_brick', 'is_system_built', 'is_timber_frame', 'is_granite_or_whinstone',
'is_as_built', 'is_cob', 'is_assumed', 'is_sandstone_or_limestone',
'insulation_thickness', 'external_insulation', 'internal_insulation'
]
def __init__(self, description: str):
"""
:param description: Description of the walls.
@ -98,6 +105,9 @@ class WallAttributes(Definitions):
def process(self) -> Dict[str, Union[float, str, bool, None]]:
result: Dict[str, Union[float, str, bool, None]] = {}
if self.nodata:
for key in self.DEFAULT_KEYS:
result[key] = False
return result
description = self.description.lower()
@ -142,4 +152,7 @@ class WallAttributes(Definitions):
else:
result["insulation_thickness"] = "average"
if result["is_cavity_wall"] & result["is_as_built"] & (result["insulation_thickness"] == "average"):
result["is_filled_cavity"] = True
return result

View file

@ -52,7 +52,7 @@ class WindowAttributes(Definitions):
raise ValueError('Invalid description')
def process(self) -> Dict[str, Union[str, bool]]:
result: Dict[str, Union[str, bool]] = {
result: Dict[str, Union[str, bool, None]] = {
"has_glazing": False,
"glazing_coverage": None,
"glazing_type": None,
@ -80,7 +80,11 @@ class WindowAttributes(Definitions):
break
# If we didn't find any coverage or type, we assume full coverage
if not result["glazing_coverage"]:
if (not result["glazing_coverage"]) & (result["glazing_type"] != "single"):
result["glazing_coverage"] = "full"
# We reset some values if the glazing is single
if result["glazing_type"] == "single":
result["has_glazing"] = False
return result

View file

@ -1652,4 +1652,17 @@ mainheat_cases = [
'has_electricaire': False, 'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False,
"has_electric_heat_pumps": False,
"has_micro-cogeneration": False},
{'original_description': 'Portable electric heating assumed for most rooms', 'has_radiators': False,
'has_fan_coil_units': False, 'has_pipes_in_screed_above_insulation': False,
'has_pipes_in_insulated_timber_floor': False, 'has_pipes_in_concrete_slab': False, 'has_boiler': False,
'has_air_source_heat_pump': False, 'has_room_heaters': False, 'has_electric_storage_heaters': False,
'has_warm_air': False, 'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False,
'has_community_scheme': False, 'has_ground_source_heat_pump': False, 'has_no_system_present': False,
'has_portable_electric_heaters': False, 'has_water_source_heat_pump': False, 'has_electric_heat_pump': False,
'has_micro-cogeneration': False, 'has_solar_assisted_heat_pump': False, 'has_exhaust_source_heat_pump': False,
'has_community_heat_pump': False, 'has_portable_electric_heating': True, 'has_electric': True,
'has_mains_gas': False, 'has_wood_logs': False, 'has_coal': False, 'has_oil': False, 'has_wood_pellets': False,
'has_anthracite': False, 'has_dual_fuel_mineral_and_wood': False, 'has_smokeless_fuel': False, 'has_lpg': False,
'has_b30k': False, 'has_assumed': True, 'has_electricaire': False, 'has_assumed_for_most_rooms': True,
'has_underfloor_heating': False}
]

View file

@ -550,7 +550,7 @@ wall_cases = [
'is_as_built': False, 'is_cob': False, 'is_assumed': False, 'is_sandstone_or_limestone': False,
'insulation_thickness': None, 'external_insulation': False, 'internal_insulation': False},
{'original_description': 'Cavity wall, as built, insulated (assumed)', 'thermal_transmittance': None,
'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': False, 'is_solid_brick': False,
'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': True, 'is_solid_brick': False,
'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': True,
'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average',
'external_insulation': False, 'internal_insulation': False},
@ -727,7 +727,7 @@ wall_cases = [
'external_insulation': False, 'internal_insulation': False},
{'original_description': 'Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)',
'thermal_transmittance': None,
'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': False, 'is_solid_brick': False,
'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': True, 'is_solid_brick': False,
'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': True,
'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average',
'external_insulation': False, 'internal_insulation': False},

View file

@ -30,7 +30,8 @@ windows_cases = [
'glazing_type': 'triple', 'no_data': False},
{'original_description': 'Gwydrau triphlyg rhannol', 'has_glazing': True, 'glazing_coverage': 'partial',
'glazing_type': 'triple', 'no_data': False},
{'original_description': 'Single glazed', 'has_glazing': True, 'glazing_coverage': 'full', 'glazing_type': 'single',
{'original_description': 'Single glazed', 'has_glazing': False, 'glazing_coverage': None,
'glazing_type': 'single',
'no_data': False},
{'original_description': 'Some double glazing', 'has_glazing': True, 'glazing_coverage': 'partial',
'glazing_type': 'double', 'no_data': False},
@ -46,7 +47,8 @@ windows_cases = [
'glazing_type': 'double', 'no_data': False},
{'original_description': 'Gwydrau dwbl gan mwyaf', 'has_glazing': True, 'glazing_coverage': 'most',
'glazing_type': 'double', 'no_data': False},
{'original_description': 'Gwydrau sengl', 'has_glazing': True, 'glazing_coverage': 'full', 'glazing_type': 'single',
{'original_description': 'Gwydrau sengl', 'has_glazing': False, 'glazing_coverage': None,
'glazing_type': 'single',
'no_data': False},
{'original_description': 'Ffenestri perfformiad uchel', 'has_glazing': True, 'glazing_coverage': 'full',
'glazing_type': 'high performance', 'no_data': False},

View file

@ -3,12 +3,13 @@ from pathlib import Path
from etl.epc_clean.tests.test_data.test_roof_attributes_cases import clean_roof_test_cases
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
# For local testing
if __file__ == "<input>":
input_data_path = Path("./model_data/tests/test_data/EpcClean_inputs.obj")
else:
current_file_path = Path(__file__)
input_data_path = current_file_path.parent / 'test_data' / 'EpcClean_inputs.obj'
# if __file__ == "<input>":
# input_data_path = Path("./model_data/tests/test_data/EpcClean_inputs.obj")
# else:
# current_file_path = Path(__file__)
# input_data_path = current_file_path.parent / 'test_data' / 'EpcClean_inputs.obj'
class TestRoofAttributes:
@ -88,7 +89,12 @@ class TestRoofAttributes:
def test_clean_roof_no_description(self):
roof = RoofAttributes('').process()
assert roof == {}
assert roof == {
'thermal_transmittance': False, 'thermal_transmittance_unit': False, 'is_pitched': False,
'is_roof_room': False, 'is_loft': False, 'is_flat': False, 'is_thatched': False,
'is_at_rafters': False, 'is_assumed': False, 'has_dwelling_above': False, 'is_valid': False,
'insulation_thickness': False
}
def test_clean_roof_edge_cases(self):
# Insulation thickness edge case

View file

@ -7,7 +7,7 @@ from pathlib import Path
import pandas as pd
from tqdm import tqdm
from etl.epc.settings import EARLIEST_EPC_DATE
from etl.epc.DataProcessor import DataProcessor
from etl.epc.DataProcessor import EPCDataProcessor
from BaseUtility import Definitions
from utils.s3 import save_dataframe_to_s3_parquet
@ -21,24 +21,31 @@ BUCKET = os.environ.get("BUCKET", "retrofit-data-dev")
def app():
directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
sample = []
for directory in tqdm(directories):
data = pd.read_csv(directory / "certificates.csv", low_memory=False)
data = data[data["LODGEMENT_DATE"] >= EARLIEST_EPC_DATE]
data = data[~pd.isnull(data["UPRN"])]
data["TOTAL_FLOOR_AREA"] = data["TOTAL_FLOOR_AREA"].astype(float)
data["CONSTRUCTION_AGE_BAND"] = data["CONSTRUCTION_AGE_BAND"].apply(
lambda x: DataProcessor.clean_construction_age_band(x)
lambda x: EPCDataProcessor.clean_construction_age_band(x)
)
data = data[~pd.isnull(data["CONSTRUCTION_AGE_BAND"])]
data = data[~data["CONSTRUCTION_AGE_BAND"].isin(Definitions.DATA_ANOMALY_MATCHES)]
data = data[~pd.isnull(data["TOTAL_FLOOR_AREA"])]
data = data[~pd.isnull(data["NUMBER_HABITABLE_ROOMS"])]
data = data[~pd.isnull(data["FLOOR_HEIGHT"])]
data = data[~pd.isnull(data["NUMBER_HEATED_ROOMS"])]
df = (
data.groupby(GROUPBY)
.agg({"NUMBER_HABITABLE_ROOMS": "median", "TOTAL_FLOOR_AREA": "mean", "FLOOR_HEIGHT": "mean"})
.agg(
{"NUMBER_HEATED_ROOMS": "median", "NUMBER_HABITABLE_ROOMS": "median", "TOTAL_FLOOR_AREA": "mean",
"FLOOR_HEIGHT": "mean"}
)
.reset_index()
)

View file

@ -0,0 +1,244 @@
import pandas as pd
from tqdm import tqdm
from utils.s3 import save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
from utils.logger import setup_logger
logger = setup_logger()
class SolarPhotoSupply:
DATASET_COLUMNS = [
"UPRN", "PROPERTY_TYPE", "TENURE", "BUILT_FORM", "ROOF_DESCRIPTION", "PHOTO_SUPPLY", "TOTAL_FLOOR_AREA",
"CONSTRUCTION_AGE_BAND", "SOLAR_WATER_HEATING_FLAG"
]
def __init__(self, file_directories, cleaned_lookup):
"""
Initialize the SolarPhotoSupply class with file directories and a cleaned lookup. Currently, this class
just works with locally stored data, but this could be extended to work with data stored in S3.
:param file_directories: A list of directories where files are stored.
:param cleaned_lookup: A dictionary containing cleaned lookup data.
"""
self.file_directories = file_directories
self.results = []
self.decile_thresholds = None
self.roof_lookup = pd.DataFrame(cleaned_lookup.get("roof-description"))
self.photo_supply_lookup = pd.DataFrame()
self.floor_area_decile_thresholds = pd.DataFrame()
def create_dataset(self):
"""
Create a dataset from the provided file directories. This method processes the data files,
applies transformations, and aggregates data into a useful format.
"""
if self.roof_lookup.empty:
raise ValueError("No roof lookup data")
results = []
logger.info("Creating solar photo supply dataset")
for dir in tqdm(self.file_directories):
filepath = dir / "certificates.csv"
df = pd.read_csv(filepath, low_memory=False)
df = df[~pd.isnull(df["UPRN"])]
df["UPRN"] = df["UPRN"].astype(int).astype(str)
# Drop rows that have a missing PROPERTY_TYPE, BUILT_FORM, CONSTRUCTION_AGE_BAND, TOTAL_FLOOR_AREA
for col in ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA"]:
df = df[~pd.isnull(df[col])]
# Take newest LODGEMENT_DATE per UPRN
df = df.sort_values(by="LODGEMENT_DATE", ascending=False).drop_duplicates(subset=["UPRN"])
data = df[self.DATASET_COLUMNS].copy()
data["PHOTO_SUPPLY"] = data["PHOTO_SUPPLY"].fillna(0)
data = data[data["PHOTO_SUPPLY"] != 0]
results.append(data)
self.results = pd.concat(results)
# Convert total floor area to deciles
self.decile_thresholds = self.results["TOTAL_FLOOR_AREA"].quantile(
[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
).values
self.results["floor_area_decile"] = pd.cut(
self.results["TOTAL_FLOOR_AREA"],
bins=[0] + list(self.decile_thresholds) + [float('inf')],
labels=False,
include_lowest=True
)
# Convert tenure to lower
self.results["TENURE"] = self.results["TENURE"].str.lower()
self.results = self.results.merge(
self.roof_lookup.drop(
columns=[
"clean_description", "thermal_transmittance", "thermal_transmittance_unit", "insulation_thickness",
"is_assumed"
]
),
left_on="ROOF_DESCRIPTION",
right_on="original_description",
how="left"
)
self.photo_supply_lookup = self.results.groupby(
[
"PROPERTY_TYPE", "BUILT_FORM", "TENURE", "is_pitched", "is_roof_room", "is_flat",
"CONSTRUCTION_AGE_BAND", "floor_area_decile"
],
observed=True
).agg(
{
"PHOTO_SUPPLY": ["median", "mean"],
}
).reset_index()
self.photo_supply_lookup.columns = ['_'.join(col).strip() for col in self.photo_supply_lookup.columns.values]
# Remove trailing underscore from columns
self.photo_supply_lookup.columns = [
col[:-1] if col.endswith("_") else col for col in self.photo_supply_lookup.columns.values
]
# Convert columns to lowercase
self.photo_supply_lookup.columns = [col.lower() for col in self.photo_supply_lookup.columns.values]
self.floor_area_decile_thresholds = pd.DataFrame(
self.decile_thresholds,
columns=["floor_area_decile_thresholds"]
)
@staticmethod
def classify_floor_area(new_area, thresholds):
"""
Classify a given floor area into a decile based on provided thresholds.
:param new_area: The new floor area to be classified.
:param thresholds: A list of thresholds used for classification.
:return: An integer representing the decile index.
"""
for i, threshold in enumerate(thresholds):
if new_area <= threshold:
return i # Returns the decile index (0 to 9)
return len(thresholds)
def save(self):
"""
Save the processed data to an S3 bucket in the parquet format. This method also handles
logging and validation to ensure data is present before saving.
"""
if self.photo_supply_lookup.empty:
raise ValueError("No data to save")
logger.info("Storing outputs to S3")
# Store this data in s3 as a parquet file
save_dataframe_to_s3_parquet(
df=self.photo_supply_lookup,
bucket_name="retrofit-data-dev",
file_key="solar_pv_supply/photo_supply_lookup.parquet",
)
save_dataframe_to_s3_parquet(
df=self.floor_area_decile_thresholds,
bucket_name="retrofit-data-dev",
file_key=f"solar_pv_supply/floor_area_decile_thresholds.parquet",
)
@staticmethod
def load(bucket):
"""
Load datasets from an S3 bucket.
:param bucket: The name of the S3 bucket to load data from.
:return: A tuple containing photo supply lookup and floor area decile thresholds dataframes.
"""
photo_supply_lookup = read_dataframe_from_s3_parquet(
bucket_name=bucket, file_key="solar_pv_supply/photo_supply_lookup.parquet",
)
floor_area_decile_thresholds = read_dataframe_from_s3_parquet(
bucket_name=bucket, file_key="solar_pv_supply/floor_area_decile_thresholds.parquet",
)
return photo_supply_lookup, floor_area_decile_thresholds
@classmethod
def filter_photo_supply_lookup(
cls,
photo_supply_lookup: pd.DataFrame,
floor_area_decile_thresholds: pd.DataFrame,
tenure: str,
built_form: str,
property_type: str,
construction_age_band: str,
is_flat: bool,
is_pitched: bool,
is_roof_room: bool,
floor_area: float
):
"""
Filter the photo supply lookup to find the most appropriate photo supply for a given property.
:param photo_supply_lookup: The photo supply lookup dataframe.
:param floor_area_decile_thresholds: The floor area decile thresholds dataframe.
:param tenure: The tenure of the property.
:param built_form: The built form of the property.
:param property_type: The property type of the property.
:param construction_age_band: The construction age band of the property.
:param is_flat: Whether the property has a flat roof.
:param is_pitched: Whether the property has a pitched roof.
:param is_roof_room: Whether the property has a roof room.
:param floor_area: The floor area of the property.
:return:
"""
# Convert the tenure to lower case, as is done in the creation of the dataset
tenure = tenure.lower()
# We remap the "not defined"
tenure = {
"not defined - use in the case of a new dwelling for which the intended tenure in not known. it is not to "
"be used for an existing dwelling":
"not defined - use in the case of a new dwelling for which the intended tenure in not known. it is no"
}.get(tenure, tenure)
photo_supply_matched = photo_supply_lookup[
(photo_supply_lookup["tenure"] == tenure) &
(photo_supply_lookup["built_form"] == built_form) &
(photo_supply_lookup["property_type"] == property_type) &
(photo_supply_lookup["construction_age_band"] == construction_age_band) &
(photo_supply_lookup["is_flat"] == is_flat) &
(photo_supply_lookup["is_pitched"] == is_pitched) &
(photo_supply_lookup["is_roof_room"] == is_roof_room)
]
if photo_supply_matched.empty:
# There are a small number of cases where we don't get a full match so try again with a more aggregated
# average
photo_supply_matched = photo_supply_lookup[
(photo_supply_lookup["tenure"] == tenure) &
(photo_supply_lookup["built_form"] == built_form) &
(photo_supply_lookup["property_type"] == property_type)
]
if construction_age_band in photo_supply_matched["construction_age_band"].values:
photo_supply_matched = photo_supply_matched[
photo_supply_matched["construction_age_band"] == construction_age_band
]
if photo_supply_matched.empty:
raise ValueError("No photo supply matches")
floor_area_decile = cls.classify_floor_area(
floor_area, floor_area_decile_thresholds["floor_area_decile_thresholds"].values
)
if floor_area_decile in photo_supply_matched["floor_area_decile"].values:
photo_supply_matched = photo_supply_matched[
photo_supply_matched["floor_area_decile"] == floor_area_decile
]
return photo_supply_matched

31
etl/solar/app.py Normal file
View file

@ -0,0 +1,31 @@
from pathlib import Path
from etl.epc.property_change_app import get_cleaned
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"
def app():
"""
This code reads in the EPC data and attempt to produce a reasonable figure for the photo-supply variable, which
is the following:
"Percentage of photovoltaic area as a percentage of total roof area. 0% indicates that a Photovoltaic Supply
is not present in the property."
When recommending solar, we want to simulate the retrofit by increasing this value from 0, so we need a sensible
figure to increase this to. This script will pull the data for that, to allow us to try and deduce what
a sensible figure would be
:return:
"""
directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
cleaned_lookup = get_cleaned()
solar_data_client = SolarPhotoSupply(
file_directories=directories,
cleaned_lookup=cleaned_lookup
)
solar_data_client.create_dataset()
solar_data_client.save()

View file

@ -0,0 +1,109 @@
import unittest
import pandas as pd
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
class TestSolarPhotoSupply(unittest.TestCase):
def setUp(self):
# Mock data for photo_supply_lookup and floor_area_decile_thresholds
self.photo_supply_lookup = pd.DataFrame({
"tenure": ["leasehold", "freehold"],
"built_form": ["detached", "semi-detached"],
"property_type": ["house", "flat"],
"construction_age_band": ["pre-1900", "1900-1929"],
"is_flat": [False, True],
"is_pitched": [True, False],
"is_roof_room": [False, True],
"floor_area_decile": [0, 1],
"photo_supply": [100, 200]
})
self.floor_area_decile_thresholds = pd.DataFrame({
"floor_area_decile_thresholds": [50, 100]
})
self.solar_photo_supply = SolarPhotoSupply([], {})
def test_correct_filtering(self):
result = self.solar_photo_supply.filter_photo_supply_lookup(
self.photo_supply_lookup,
self.floor_area_decile_thresholds,
"leasehold",
"detached",
"house",
"pre-1900",
False,
True,
False,
45
)
self.assertEqual(len(result), 1)
self.assertEqual(result.iloc[0]["photo_supply"], 100)
def test_no_matches(self):
with self.assertRaises(ValueError):
self.solar_photo_supply.filter_photo_supply_lookup(
self.photo_supply_lookup,
self.floor_area_decile_thresholds,
"leasehold",
"unknown",
"house",
"pre-1900",
False,
True,
False,
45
)
def test_floor_area_decile_matching(self):
result = self.solar_photo_supply.filter_photo_supply_lookup(
self.photo_supply_lookup,
self.floor_area_decile_thresholds,
"freehold",
"semi-detached",
"flat",
"1900-1929",
True,
False,
True,
60
)
self.assertEqual(len(result), 1)
self.assertEqual(result.iloc[0]["photo_supply"], 200)
def test_invalid_parameters(self):
with self.assertRaises(AttributeError):
self.solar_photo_supply.filter_photo_supply_lookup(
self.photo_supply_lookup,
self.floor_area_decile_thresholds,
123, # Invalid type for tenure
"detached",
"house",
"pre-1900",
False,
True,
False,
45
)
def test_classify_floor_area(self):
# Setup
thresholds = [10, 20, 30, 40, 50]
solar_photo_supply = SolarPhotoSupply([], {})
# Test Case 1: Valid floor area
floor_area = 25
expected_decile = 2
result = solar_photo_supply.classify_floor_area(floor_area, thresholds)
self.assertEqual(result, expected_decile, "Decile classification did not match expected result")
# Test Case 2: Out of range floor area
floor_area = 60
expected_decile = len(thresholds)
result = solar_photo_supply.classify_floor_area(floor_area, thresholds)
self.assertEqual(result, expected_decile, "Decile classification for out of range value is incorrect")
if __name__ == '__main__':
unittest.main()

View file

@ -0,0 +1,179 @@
"""
This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
testing
"""
import os
import numpy as np
import pandas as pd
from epc_api.client import EpcClient
from utils.s3 import save_csv_to_s3
FILE_SIZE = 5
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN", None)
USER_ID = 8
PORTFOLIO_ID = 54
def app():
# For this dataset, we want 3 properties, all hourses. A mid-terrace, and end-terrace and a semi-detached
epc_client = EpcClient(auth_token=EPC_AUTH_TOKEN)
# Birmingham has a Local Authority Code of E08000025
# ~~~~~~~~~~~~~~~~~~~~
# First example
# ~~~~~~~~~~~~~~~~~~~~
# Let's take an EPC D property
example_1_reponse = epc_client.domestic.search(
params={
"local-authority": "E08000025",
"property-type": "house",
},
size=1000
)
example_1_reponse = example_1_reponse["rows"]
# Get a property with a cavity wall
example_1_reponse_filtered = [
x for x in example_1_reponse if
"cavity wall, as built, no insulation (assumed)" in x["walls-description"].lower()
]
example_1_reponse_filtered = [
x for x in example_1_reponse_filtered if "pitched, no insulation (assumed)" in x["roof-description"].lower()
]
# Get a social housing property
example_1_reponse_filtered = [
x for x in example_1_reponse_filtered if x["tenure"] == "Rented (social)"
]
print(example_1_reponse_filtered[0]["postcode"])
# B13 9LT
print(example_1_reponse_filtered[0]["address1"])
# 113 Tenby Road
print(example_1_reponse_filtered[0]["built-form"])
# Mid-Terrace
print(example_1_reponse_filtered[0]["current-energy-rating"])
# 'D'
# ~~~~~~~~~~~~~~~~~~~~
# Second example
# ~~~~~~~~~~~~~~~~~~~~
# Let's take an EPC E property
example_2_reponse = epc_client.domestic.search(
params={
"local-authority": "E08000025",
"property-type": "house",
"energy-band": "e"
},
size=1000
)
example_2_reponse = example_2_reponse["rows"]
# Get a solid wall example
example_2_reponse_filtered = [
x for x in example_2_reponse if
"solid brick, as built, no insulation (assumed)" in x["walls-description"].lower()
]
# With some existing loft insulation
example_2_reponse_filtered = [
x for x in example_2_reponse_filtered if "pitched, 100 mm loft insulation" in x["roof-description"].lower()
]
# Get a social housing property
example_2_reponse_filtered = [
x for x in example_2_reponse_filtered if x["tenure"] == "Rented (social)"
]
print(example_2_reponse_filtered[0]["postcode"])
# B28 8JF
print(example_2_reponse_filtered[0]["address1"])
# 139 School Road
print(example_2_reponse_filtered[0]["built-form"])
# Semi-Detached
print(example_2_reponse_filtered[0]["current-energy-rating"])
# E
# ~~~~~~~~~~~~~~~~~~~~
# Third example
# ~~~~~~~~~~~~~~~~~~~~
example_3_reponse = epc_client.domestic.search(
params={
"local-authority": "E08000025",
"property-type": "house",
"energy-band": "f"
},
size=1000
)
example_3_reponse = example_3_reponse["rows"]
# Get a social housing property]
example_3_reponse_filtered = [
x for x in example_3_reponse if x["tenure"] == "Rented (social)"
]
print(example_3_reponse_filtered[4]["walls-description"])
print(example_3_reponse_filtered[4]["floor-description"])
print(example_3_reponse_filtered[4]["roof-description"])
print(example_3_reponse_filtered[4]["postcode"])
# B32 1SL
print(example_3_reponse_filtered[4]["address1"])
# 77 Simmons Drive
print(example_3_reponse_filtered[4]["built-form"])
# Semi-Detached
# ~~~~~~~~~~~~~~~~~~~~
# Final example
# ~~~~~~~~~~~~~~~~~~~~
# Let's take a flat that is a D
example_4_reponse = epc_client.domestic.search(
params={
"local-authority": "E08000025",
"property-type": "flat",
"energy-band": "d"
},
size=1000
)
example_4_reponse = example_4_reponse["rows"]
example_4_reponse_filtered = [
x for x in example_4_reponse if
"cavity wall, as built, no insulation (assumed)" in x["walls-description"].lower()
]
# Get a social housing property
example_4_reponse_filtered = [
x for x in example_4_reponse_filtered if x["tenure"] == "Rented (social)"
]
print(example_4_reponse_filtered[0]["postcode"])
# B32 1LS
print(example_4_reponse_filtered[0]["address1"])
# Flat 2
print(example_4_reponse_filtered[0]["floor-description"])
print(example_4_reponse_filtered[0]["property-type"])
# Flat
test_file = pd.DataFrame(
[
# New properties
{"address": "113 Tenby Road", "postcode": "B13 9LT", "Notes": None},
{"address": "139 School Road", "postcode": "B28 8JF", "Notes": None},
{"address": "77 Simmons Drive", "postcode": "B32 1SL", "Notes": None},
{"address": "Flat 2, 54 Wedgewood Road", "postcode": "B32 1LS", "Notes": None},
]
)
# Store the data in s3
filename = f"{USER_ID}/{PORTFOLIO_ID}/test_inputs.csv"
save_csv_to_s3(
dataframe=test_file,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Social",
"goal": "Increase EPC",
"goal_value": "C",
"trigger_file_path": filename
}
print(body)

View file

@ -0,0 +1,194 @@
from pathlib import Path
from random import choices, sample
import os
import pandas as pd
from tqdm import tqdm
from dotenv import load_dotenv
from utils.logger import setup_logger
from backend.SearchEpc import SearchEpc, vartypes
from BaseUtility import Definitions
from etl.epc.settings import BUILT_FORM_REMAP
ENV_FILE = Path(__file__).parent / "backend" / ".env"
logger = setup_logger()
DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"
DIR_SAMPLE_SIZE = 500
N_DIRECTORIES = 50
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
load_dotenv(ENV_FILE)
CATETORICALS_TO_IGNORE = [
"postcode", "constituency", "local-authority", "built-form", "property-type", "address1", "constituency-label",
"building-reference-number", "address2", "posttown", "transaction-type", "lmk-key", "address3",
"local-authority-label", "county",
]
def check_numeric_performance(estimated_value, actual_value):
# If we don't have anything to compare against, return None
if pd.isnull(actual_value):
return None
if pd.isnull(estimated_value):
return 1
if actual_value == 0 and estimated_value == 0:
return 0
if actual_value == 0 and estimated_value != 0:
return 1
return abs(estimated_value - actual_value) / actual_value
def app():
"""
This script is used to test the EPC estimation process.
"""
numerical_vartypes = {key: value for key, value in vartypes.items() if value in ["float", "Int64"]}
str_var_types = {key: value for key, value in vartypes.items() if value == "str"}
# Make sure we have missed any keys
if len(numerical_vartypes) + len(str_var_types) != len(vartypes):
raise ValueError("Not all vartypes have been accounted for")
# Drop some keys that aren't important
for k in CATETORICALS_TO_IGNORE:
str_var_types.pop(k, None)
directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
directory_sample = choices(directories, k=N_DIRECTORIES)
results = []
for directory in tqdm(directory_sample):
filepath = directory / "certificates.csv"
df = pd.read_csv(filepath, low_memory=False)
df["UPRN"] = df["UPRN"].astype("Int64").astype("str")
df = df[~pd.isnull(df["UPRN"])]
# uprn_sample = sample(df["UPRN"].unique().tolist(), DIR_SAMPLE_SIZE)
# Take a fixed sample based on the first DIR_SAMPLE_SIZE uprns
uprn_sample = sorted(df["UPRN"].unique().tolist())[:DIR_SAMPLE_SIZE]
df_sample = df[df["UPRN"].isin(uprn_sample)]
# Take the record with the newest LODGEMENT_DATETIME by uprn
df_sample = df_sample.sort_values("LODGEMENT_DATETIME", ascending=False).drop_duplicates("UPRN")
# Convert the columns to lower case and replace underscores with hyphens, the same as the api
df_sample.columns = df_sample.columns.str.lower().str.replace("_", "-")
# For each epc, we test the estimation process
for _, epc in df_sample.iterrows():
epc = epc.to_dict()
address1 = epc["address1"]
postcode = epc["postcode"]
# Get all EPCs for this urpn and we make sure they get dropped from the estimate_epc function
epcs_for_uprn = df[df["UPRN"] == epc["uprn"]]
lmks_to_drop = epcs_for_uprn["LMK_KEY"].tolist()
searcher = SearchEpc(address1, postcode, auth_token=EPC_AUTH_TOKEN, os_api_key="")
searcher.uprn = epc["uprn"]
# Perform the same remapping for built-form as in the Property class for this test, in case we get (e.g.)
# Enclosed End-Terrace
built_form = BUILT_FORM_REMAP.get(epc["built-form"], epc["built-form"])
if ((epc["property-type"] == "Maisonette") & (built_form == "Detached")) or (
built_form in Definitions.DATA_ANOMALY_MATCHES
):
built_form = ""
estimated_epc = searcher.estimate_epc(
property_type=epc["property-type"], built_form=built_form, lmks_to_drop=lmks_to_drop
)
# We now compare the difference between the estimated and original
# TODO: We can convert windows and lighting to numeric versions and estimate how close we are
numeric_performance = {
key: check_numeric_performance(estimated_epc[key], epc[key]) for key, value in
numerical_vartypes.items()
}
# Remove Nones
numeric_performance = {key: value for key, value in numeric_performance.items() if value is not None}
# Get an average
numeric_performance = sum(numeric_performance.values()) / len(numeric_performance)
numeric_success = 1 - numeric_performance
# categorical performance
categorical_performance = {
key: 0 if estimated_epc[key] != epc[key] else 1 for key, value in str_var_types.items()
}
# Get an average
categorical_success = sum(categorical_performance.values()) / len(categorical_performance)
results.append(
{
"uprn": epc["uprn"],
"numeric_success": numeric_success,
"categorical_success": categorical_success,
"property_type": epc["property-type"],
"built_form": epc["built-form"],
"tenure": epc["tenure"],
}
)
# Get aggregate performance figures
results_df = pd.DataFrame(results)
results_df["tenure"] = results_df["tenure"].replace("Rented (social)", "rental (social)")
avg_numeric_succes = results_df["numeric_success"].median()
avg_categorical_sucess = results_df["categorical_success"].median()
# With 20 nearest homes
# 0.7718100840549558
# 0.5116279069767442
# 100 nearest homes
# 0.7859617377809409
# 0.5348837209302325
# Fixed sample, sqrt weights
# Group by tenure
by_tenure = results_df.groupby("tenure").agg(
{"numeric_success": "median", "categorical_success": "median", "uprn": "count"}
)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
# With 20 nearest homes
# numeric_success categorical_success uprn
# tenure
# NO DATA! 0.847840 0.581395 278
# Not defined - use in the case of a new dwelling... 0.930282 0.651163 617
# Owner-occupied 0.770330 0.511628 2588
# Rented (private) 0.791885 0.558140 1232
# owner-occupied 0.741088 0.488372 10912
# rental (private) 0.749064 0.488372 3252
# rental (social) 0.822109 0.581395 3878
# unknown 0.895840 0.627907 1820
# 100 nearest homes
# tenure
# NO DATA! 0.899566 0.604651 233
# Not defined - use in the case of a new dwelling... 0.927518 0.674419 608
# Owner-occupied 0.777026 0.511628 3167
# Rented (private) 0.805646 0.534884 1316
# owner-occupied 0.762180 0.488372 10835
# rental (private) 0.760503 0.511628 3181
# rental (social) 0.830057 0.604651 3705
# unknown 0.899948 0.627907 1571
# By property type - we also want to see how many properties we have for each property type
by_property_type = results_df.groupby("property_type").agg(
{"numeric_success": "median", "categorical_success": "median", "uprn": "count"}
)
# By property_type & built form
by_property_type_built_form = results_df.groupby(["property_type", "built_form"]).agg(
{"numeric_success": "median", "categorical_success": "median", "uprn": "count"}
)

View file

@ -0,0 +1,38 @@
"""
This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
testing
"""
import os
import pandas as pd
from utils.s3 import save_csv_to_s3
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN", None)
USER_ID = 8
PORTFOLIO_ID = 61
def app():
pilot_file = pd.DataFrame(
[
{"address": "42, Foxes Field", "postcode": "TR18 3RJ", "Notes": None},
{"address": "11, Cranley Gardens", "postcode": "TQ13 8UT", "Notes": None},
]
)
# Store the data in s3
filename = f"{USER_ID}/{PORTFOLIO_ID}/livewest_pilot_file.csv"
save_csv_to_s3(
dataframe=pilot_file,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Social",
"goal": "Increase EPC",
"goal_value": "C",
"trigger_file_path": filename
}
print(body)

View file

@ -0,0 +1,42 @@
"""
This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
testing
"""
import pandas as pd
from utils.s3 import save_csv_to_s3
USER_ID = 8
PORTFOLIO_ID = 57
def app():
"""
This portfolio is for testing windows recommendations
:return:
"""
test_file = pd.DataFrame(
[
{"address": "21 Butler House", "postcode": "E2 0PN", "Notes": None},
{"address": "22 Butler House", "postcode": "E2 0PN", "Notes": None},
{"address": "23 Butler House", "postcode": "E2 0PN", "Notes": None},
{"address": "24 Butler House", "postcode": "E2 0PN", "Notes": None},
]
)
# Store the data in s3
filename = f"{USER_ID}/{PORTFOLIO_ID}/no_epc.csv"
save_csv_to_s3(
dataframe=test_file,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Social",
"goal": "Increase EPC",
"goal_value": "A",
"trigger_file_path": filename
}
print(body)

View file

@ -0,0 +1,61 @@
"""
This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
testing
"""
import pandas as pd
from utils.s3 import save_csv_to_s3
USER_ID = 8
PORTFOLIO_ID = 62
def app():
"""
This portfolio contains propertyies that we have demo'd in pilots, or properties that were provided to us
as proprties that are being treated under funding scehemes and we have pre/post EPRs for
:return:
"""
test_file = pd.DataFrame(
[
# Live West Properties
{"address": "42, Foxes Field", "postcode": "TR18 3RJ", "Notes": None},
{"address": "11, Cranley Gardens", "postcode": "TQ13 8UT", "Notes": None},
# Keyzy properties
{'address': '2 South Terrace', 'postcode': 'NN1 5JY', 'Notes': ''},
{'address': '25 Albert Street', 'postcode': 'PO12 4TY', 'Notes': ''},
# Pilot properties
{'address': '113 Tenby Road', 'postcode': 'B13 9LT', 'Notes': ''},
{'address': '139 School Road', 'postcode': 'B28 8JF', 'Notes': ''},
{'address': '77 Simmons Drive', 'postcode': 'B32 1SL', 'Notes': ''},
{'address': 'Flat 2, 54 Wedgewood Road', 'postcode': 'B32 1LS', 'Notes': ''},
# Warmfront ECO4 Properties
{'address': '73 Long Chaulden', 'postcode': 'HP1 2HX', 'Notes': ''},
{'address': '8 Lindlings', 'postcode': 'HP1 2HA', 'Notes': ''},
{'address': '44 Lindlings', 'postcode': 'HP1 2HE', 'Notes': ''},
{'address': '46 Chaulden Terrace', 'postcode': 'HP1 2AN', 'Notes': ''},
# Osmosis SHDF Properties
{'address': '4, Heather Shaw', 'postcode': 'BA14 7JS', 'Notes': ''},
{'address': '16 Glastonbury Road', 'postcode': 'M32 9PE', 'Notes': ''},
{'address': '31 Loddon Way', 'postcode': 'BA15 1HG', 'Notes': ''},
{'address': '62 Pearmain Drive', 'postcode': 'NG3 3DJ', 'Notes': ''},
]
)
# Store the data in s3
filename = f"{USER_ID}/{PORTFOLIO_ID}/eco4_shdf_retrofits.csv"
save_csv_to_s3(
dataframe=test_file,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Social",
"goal": "Increase EPC",
"goal_value": "A",
"trigger_file_path": filename
}
print(body)

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,38 @@
"""
This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
testing
"""
import os
import pandas as pd
from utils.s3 import save_csv_to_s3
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN", None)
USER_ID = 8
PORTFOLIO_ID = 59
def app():
pilot_file = pd.DataFrame(
[
{"address": "10 Elm Close", "postcode": "CV37 8XL", "Notes": None},
{"address": "21, Spring Lane", "postcode": "MK17 0QP", "Notes": None},
]
)
# Store the data in s3
filename = f"{USER_ID}/{PORTFOLIO_ID}/the_guiness_partnership_pilot_file.csv"
save_csv_to_s3(
dataframe=pilot_file,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Social",
"goal": "Increase EPC",
"goal_value": "C",
"trigger_file_path": filename
}
print(body)

View file

@ -0,0 +1,43 @@
"""
This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
testing
"""
import pandas as pd
from utils.s3 import save_csv_to_s3
USER_ID = 8
PORTFOLIO_ID = 56
def app():
"""
This portfolio is for testing windows recommendations
:return:
"""
test_file = pd.DataFrame(
[
{"address": "3 Church Terrace", "postcode": "LE13 0PW", "Notes": None},
{"address": "3, Main Street, Redmile", "postcode": "NG13 0GA", "Notes": None},
{"address": "Manor House, Kennel Lane, Reepham", "postcode": "LN3 4DZ", "Notes": None},
{"address": "13 Main Street", "postcode": "LE14 2JU", "Notes": None},
{"address": "8 The Crescent, Coston Road, Buckminster", "postcode": "NG33 5SF", "Notes": None},
]
)
# Store the data in s3
filename = f"{USER_ID}/{PORTFOLIO_ID}/windows_portfolio_inputs.csv"
save_csv_to_s3(
dataframe=test_file,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Social",
"goal": "Increase EPC",
"goal_value": "A",
"trigger_file_path": filename
}
print(body)

View file

@ -181,4 +181,16 @@ module "lambda_carbon_prediction_ecr" {
module "lambda_heat_prediction_ecr" {
ecr_name = "lambda-heat-prediction-${var.stage}"
source = "./modules/ecr"
}
##############################################
# CDN - Cloudfront
##############################################
module "cloudfront_distribution" {
source = "./modules/cloudfront"
bucket_name = module.s3.bucket_name
bucket_id = module.s3.bucket_id
bucket_arn = module.s3.bucket_arn
bucket_domain_name = module.s3.bucket_domain_name
stage = var.stage
}

View file

@ -0,0 +1,65 @@
resource "aws_cloudfront_distribution" "s3_distribution" {
origin {
domain_name = var.bucket_domain_name
origin_id = "S3-${var.bucket_name}"
s3_origin_config {
origin_access_identity = aws_cloudfront_origin_access_identity.oai.cloudfront_access_identity_path
}
}
enabled = true
default_cache_behavior {
allowed_methods = ["GET", "HEAD"]
cached_methods = ["GET", "HEAD"]
target_origin_id = "S3-${var.bucket_name}"
viewer_protocol_policy = "redirect-to-https"
compress = true
forwarded_values {
query_string = false
cookies {
forward = "none"
}
}
min_ttl = 0
default_ttl = 86400
max_ttl = 31536000
}
price_class = "PriceClass_All"
restrictions {
geo_restriction {
restriction_type = "none"
}
}
viewer_certificate {
cloudfront_default_certificate = true
}
}
resource "aws_cloudfront_origin_access_identity" "oai" {
comment = "OAI for ${var.bucket_name}"
}
resource "aws_s3_bucket_policy" "bucket_policy" {
bucket = var.bucket_id
policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Principal = {
AWS = "arn:aws:iam::cloudfront:user/CloudFront Origin Access Identity ${aws_cloudfront_origin_access_identity.oai.id}"
}
Action = "s3:GetObject"
Resource = "${var.bucket_arn}/*"
},
]
})
}

View file

@ -0,0 +1,24 @@
variable "bucket_name" {
description = "The name of the bucket"
type = string
}
variable "stage" {
description = "The deployment stage"
type = string
}
variable "bucket_id" {
description = "The ID of the S3 bucket"
type = string
}
variable "bucket_arn" {
description = "The ARN of the S3 bucket"
type = string
}
variable "bucket_domain_name" {
description = "The regional domain name of the S3 bucket"
type = string
}

View file

@ -2,3 +2,15 @@ output "bucket_name" {
description = "The name of the S3 bucket"
value = aws_s3_bucket.bucket.bucket
}
output "bucket_id" {
value = aws_s3_bucket.bucket.id
}
output "bucket_arn" {
value = aws_s3_bucket.bucket.arn
}
output "bucket_domain_name" {
value = aws_s3_bucket.bucket.bucket_regional_domain_name
}

3
keyzy_pilot.csv Normal file
View file

@ -0,0 +1,3 @@
address,postcode,Notes,,,,
2 South Terrace,NN1 5JY,,,,,
25 Albert Street,PO12 4TY,,,,,
1 address postcode Notes
2 2 South Terrace NN1 5JY
3 25 Albert Street PO12 4TY

View file

@ -1,27 +1,96 @@
import numpy as np
from recommendations.county_to_region import county_to_region_map
# This data comes from SPONs
# This data comes from SPONs 2023
regional_labour_variations = [
{"Region": "Outer London (Spons 2023)", "Adjustment_Factor": 1.00},
{"Region": "Outer London", "Adjustment_Factor": 1.00},
{"Region": "Inner London", "Adjustment_Factor": 1.05},
{"Region": "South East", "Adjustment_Factor": 0.96},
{"Region": "South West", "Adjustment_Factor": 0.90},
{"Region": "South East England", "Adjustment_Factor": 0.96},
{"Region": "South West England", "Adjustment_Factor": 0.90},
{"Region": "East of England", "Adjustment_Factor": 0.93},
{"Region": "East Midlands", "Adjustment_Factor": 0.88},
{"Region": "West Midlands", "Adjustment_Factor": 0.87},
{"Region": "North East", "Adjustment_Factor": 0.83},
{"Region": "North West", "Adjustment_Factor": 0.88},
{"Region": "Yorkshire and Humberside", "Adjustment_Factor": 0.86},
{"Region": "North East England", "Adjustment_Factor": 0.83},
{"Region": "North West England", "Adjustment_Factor": 0.88},
{"Region": "Yorkshire and the Humber", "Adjustment_Factor": 0.86},
{"Region": "Wales", "Adjustment_Factor": 0.88},
{"Region": "Scotland", "Adjustment_Factor": 0.88},
{"Region": "Northern Ireland", "Adjustment_Factor": 0.76}
]
county_map = {
"Northamptonshire": "East Midlands",
"Hampshire": "South East",
# This data is based on the MCS database
MCS_SOLAR_PV_COST_DATA = {
"last_updated": "2024-01-04",
"average_cost_per_kwh": 2013.94,
"average_cost_per_kwh-Outer London": 2618.75,
"average_cost_per_kwh-Inner London": 2618.75,
"average_cost_per_kwh-South East England": 2083.33,
"average_cost_per_kwh-South West England": 2113,
"average_cost_per_kwh-East of England": 1973.86,
"average_cost_per_kwh-East Midlands": 1981.86,
"average_cost_per_kwh-West Midlands": 1926.55,
"average_cost_per_kwh-North East England": 2028.49,
"average_cost_per_kwh-North West England": 1620.42,
"average_cost_per_kwh-Yorkshire and the Humber": 2060.9,
"average_cost_per_kwh-Wales": 1898.83,
"average_cost_per_kwh-Scotland": 1967.97,
"average_cost_per_kwh-Northern Ireland": 2126.09,
}
# This is based on quotes from installers
BATTERY_COST = 3500
# This is based on https://www.checkatrade.com/blog/cost-guides/cost-smart-thermostat/
SMART_APPLIANCE_THERMOSTAT_COST = 400
PROGRAMMER_COST = 120
ROOM_THERMOSTAT_COST = 150
TRVS_COST = 35
# Cost for TTZC
# Smart thermostat based on checkatrade https://www.checkatrade.com/blog/cost-guides/cost-smart-thermostat/
# Based on the Nest system
TTZC_SMART_THERMOSTAT_COST = 205
TTZC_SMART_THERMOSTAT_LABOUR_HOURS = 2
TTZC_ELECTRICIAN_HOURLY_RATE = 45
# Based on cost of a Nest temperature sensor
TTZC_ROOM_TEMPERATURE_SENSOR_COST = 50
TTZC_ROOM_TEMPERATURE_SENSOR_LABOUR_HOURS = 0.17 # (Assume ~ 10 mins install per sensor)
# Basedon an average cost of smart radiator values
TTZC_SMART_RADIATOR_VALUES = 50
TTZC_SMART_RADIATOR_VALUES_LABOUR_HOURS = 0.37 # (Assume ~ 15-30 mins install per valve)
# Low carbon combi boiler - median value based on £2200 - £3000 range
LOW_CARBON_COMBI_BOILER = 2200
# boiler prices based on
# https://www.greenmatch.co.uk/boilers/30kw-boiler
# https://www.greenmatch.co.uk/boilers/35kw-boiler
# https://www.greenmatch.co.uk/boilers/40kw-boiler
# These are exclusive of installation costs
COMBI_BOILER_COSTS = {
"30kw": 1550,
"35kw": 1610,
"40kw": 1625
}
CONVENTIONAL_BOILER_COSTS = {
"30kw": 1117,
"35kw": 1546,
"40kw": 1776
}
# Assumes 3 hours to remove each heater (including re-decorating)
ROOM_HEATER_REMOVAL_COST = 120
ROOM_HEATER_REMOVAL_LABOUR_HOURS = 3
# This is a cost quoted by Jim for a system flush - existig system will run more efficiently
SYSTEM_FLUSH_COST = 250
SINGLE_RADIATOR_COST = 150
DOUBLE_RADIATOR_COST = 300
FLUE_COST = 600
PIPEWORK_COST = 750 # Min cost is £500
class Costs:
"""
@ -40,8 +109,16 @@ class Costs:
# We assume a conservative 10% contingency for all works which is a rate defined by SPONs
CONTINGENCY = 0.1
# For flat roof, we assume it's a high risk project as it's very weather dependent and also is heavily
# dependent on the quality of the existing roof
FLAT_ROOF_CONTINGENCY = 0.15
# We use a higher contingency rate for internal wall insulation because of the potential for issues with moving
# fittings and trimming doors, as well as scope for damage to the existing wall during preparation.
IWI_CONTINGENCY = 0.2
# Where there is more uncertainty, a higher contingency rate is used
HIGH_RISK_CONTINGENCY = 0.15
HIGH_RISK_CONTINGENCY = 0.2
# When there is less uncertainty, a lower contingency rate is used
LOW_RISK_CONTINGENCY = 0.05
@ -54,11 +131,21 @@ class Costs:
# have a preliminaries of 12-14% so we use 12% as the median for the preliminaries rate.
# For External wall insulation (EWI), we use 15% as the preliminaries rate if we think the property might
# need scaffolding, otherwise we use 12%. This is to account for any site preparation that might be required
EWI_NO_SCAFFOLDING_PRELIMINARIES = 0.12
EWI_SCAFFOLDING_PRELIMINARIES = 0.15
EWI_NO_SCAFFOLDING_PRELIMINARIES = 0.2
EWI_SCAFFOLDING_PRELIMINARIES = 0.25
VAT_RATE = 0.2
PROFIT_MARGIN = 0.15
PROFIT_MARGIN = 0.2
# Based on this greenmatch article, on average, a Sash window is around 50% more expensive than a casement window.
# Therefore, for a conservative cost estimate, and allowance for a more premium window type, we inflate the material
# cost of the windows to allow for a sash window type
# https://www.greenmatch.co.uk/windows/double-glazing/cost
SASH_WINDOW_INFLATION_FACTOR = 1.5
# Typically, secondary glazing can be installed for 25% of the cost of double glazed windows - to be conservative,
# we scale the cost by half
SECONDARY_GLAZING_SCALING_FACTOR = 0.5
def __init__(self, property_instance):
"""
@ -71,13 +158,16 @@ class Costs:
self.property = property_instance
self.regional_labour_variations = regional_labour_variations
self.county = county_map.get(self.property.data["county"], None)
if self.county is None:
raise ValueError("County not found in county map")
self.region = county_to_region_map.get(self.property.data["county"], None)
if self.region is None:
# Try and grab using the local-authority-label
self.region = county_to_region_map.get(self.property.data["local-authority-label"], None)
if self.region is None:
raise ValueError("Region not found in county map")
self.labour_adjustment_factor = [
x["Adjustment_Factor"] for x in self.regional_labour_variations if
x["Region"] == self.county
x["Region"] == self.region
][0]
if not self.labour_adjustment_factor:
@ -115,6 +205,9 @@ class Costs:
labour_hours = material["labour_hours_per_unit"] * wall_area
# Assume a team of 2
labour_days = (labour_hours / 8) / 2
return {
"total": total_cost,
"subtotal": subtotal_before_vat,
@ -124,7 +217,8 @@ class Costs:
"material": base_material_cost,
"profit": profit_cost,
"labour_hours": labour_hours,
"labour_cost": labour_cost
"labour_cost": labour_cost,
"labour_days": labour_days
}
def loft_insulation(self, floor_area, material):
@ -136,12 +230,16 @@ class Costs:
"""
material_cost_per_m2 = material["material_cost"]
# We inflate material costs due to recent price increases
material_cost_per_m2 = material_cost_per_m2 * 1.5
base_material_cost = material_cost_per_m2 * floor_area
labour_cost = material["labour_cost"] * floor_area * self.labour_adjustment_factor
subtotal_before_profit = base_material_cost + labour_cost
contingency_cost = subtotal_before_profit * self.CONTINGENCY
# We use high risk contingency because of the possibility of access issues and clearing existing insulation
contingency_cost = subtotal_before_profit * self.HIGH_RISK_CONTINGENCY
preliminaries_cost = subtotal_before_profit * self.PRELIMINARIES
profit_cost = subtotal_before_profit * self.PROFIT_MARGIN
@ -153,6 +251,9 @@ class Costs:
labour_hours = material["labour_hours_per_unit"] * floor_area
# Assume a team of 1 person
labour_days = labour_hours / 8
return {
"total": total_cost,
"subtotal": subtotal_before_vat,
@ -162,7 +263,8 @@ class Costs:
"material": base_material_cost,
"profit": profit_cost,
"labour_hours": labour_hours,
"labour_cost": labour_cost
"labour_cost": labour_cost,
"labour_days": labour_days
}
def internal_wall_insulation(self, wall_area, material, non_insulation_materials):
@ -224,8 +326,7 @@ class Costs:
subtotal_before_profit = labour_costs + materials_costs + demolition_plant_costs
# We use high risk contingency for iwi
contingency_cost = subtotal_before_profit * self.HIGH_RISK_CONTINGENCY
contingency_cost = subtotal_before_profit * self.IWI_CONTINGENCY
preliminaries_cost = subtotal_before_profit * self.PRELIMINARIES
profit_cost = subtotal_before_profit * self.PROFIT_MARGIN
@ -301,7 +402,9 @@ class Costs:
subtotal_before_profit = labour_costs + materials_costs
contingency_cost = subtotal_before_profit * self.CONTINGENCY
# Because of the possiblity of damage to the existing floor, or difficulties associated to moving fittings,
# we use a higher contingency rate
contingency_cost = subtotal_before_profit * self.HIGH_RISK_CONTINGENCY
preliminaries_cost = subtotal_before_profit * self.PRELIMINARIES
profit_cost = subtotal_before_profit * self.PROFIT_MARGIN
@ -569,3 +672,566 @@ class Costs:
"labour_days": labour_days,
"labour_cost": labour_costs
}
def low_energy_lighting(self, number_of_lights, number_current_lel_lights, material):
"""
Calculates the total cost for low energy lighting based on material and labor costs,
including contingency, preliminaries, profit, and VAT.
:param number_of_lights: Int, number of light
:param number_current_lel_lights: Int, number of low energy lights currently installed in the home
:material: Dict, material data containing costs of fittings
"""
# If there are no lights fitted in the property, we increase the contingency in case there are potential wiring
# blockers
if number_current_lel_lights == 0:
contingency = self.HIGH_RISK_CONTINGENCY
else:
contingency = self.CONTINGENCY
material_cost = material["material_cost"] * number_of_lights
labour_cost = material["labour_cost"] * number_of_lights * self.labour_adjustment_factor
subtotal_before_profit = material_cost + labour_cost
contingency_cost = subtotal_before_profit * contingency
preliminaries_cost = subtotal_before_profit * self.PRELIMINARIES
profit_cost = subtotal_before_profit * self.PROFIT_MARGIN
subtotal_before_vat = subtotal_before_profit + contingency_cost + preliminaries_cost + profit_cost
vat_cost = subtotal_before_vat * self.VAT_RATE
total_cost = subtotal_before_vat + vat_cost
labour_hours = material["labour_hours_per_unit"] * number_of_lights
# Assume a single electrician installing
labour_days = (labour_hours / 8)
return {
"total": total_cost,
"subtotal": subtotal_before_vat,
"vat": vat_cost,
"contingency": contingency_cost,
"preliminaries": preliminaries_cost,
"material": material_cost,
"profit": profit_cost,
"labour_hours": labour_hours,
"labour_days": labour_days,
"labour_cost": labour_cost
}
def flat_roof_insulation(self, floor_area, material, non_insulation_materials):
"""
A model of a warm, flat roof construction can be seen in this video:
https://www.youtube.com/watch?v=WZ6Ng6YI9OA
Warm, flat roof insulation will normally be 100-125mm in depth
We break this measure down into the following jobs to be done
1) Preparation of the room. This involves cleaning the existing roof surface, removing any debris and repairing
any damage. Additionally, an edge barrier will likely need to be installed, to protect the sides of the
roof from water ingress.
2) Primer Application. A layer of primer is applied to the clean roof surface to enhance the adhestia of
subsequent layers, and seal the existing roof surface.
3) Vapour Proof Layer Installation. Lay a vapour control layer to prevent moisture ingress from inside the
building, which is essential in warm roof construction.
4) Insulation Layer Application. Place and securely fix insulation boards over the roof. These could be rigid
boards like PIR (Polyisocyanurate).
5) Waterproofing Membrane Installation: Cover the insulation (and timber layer, if used) with a
waterproofing membrane, like EPDM, PVC, or bituminous felt. Carefully seal all joints, edges, and around any
roof penetrations to ensure water tightness
:param floor_area: Area of the flat roof to be insulated, based on the area of the floor
:param material: Selected insulation material
:param non_insulation_materials: Non-insulation materials required for the job
:return:
"""
preparation_data_m2 = [
x for x in non_insulation_materials if
(x["type"] == "flat_roof_preparation") and (x["cost_unit"] == "gbp_per_m2")
]
vapour_barrier_data = [x for x in non_insulation_materials if x["type"] == "flat_roof_vapour_barrier"]
waterproofing_data = [x for x in non_insulation_materials if x["type"] == "flat_roof_waterproofing"]
if (len(preparation_data_m2) != 2) or (len(vapour_barrier_data) != 1) or (
len(waterproofing_data) != 1):
raise ValueError("Incorrect number of data entries for non-insulation materials")
# Break out the individual material costs
preparation_m2_material_costs = sum([x["material_cost"] * floor_area for x in preparation_data_m2])
vapour_barrier_material_costs = vapour_barrier_data[0]["material_cost"] * floor_area
insulation_material_costs = material["material_cost"] * floor_area
preparation_m2_labour_costs = sum([x["labour_cost"] * floor_area for x in preparation_data_m2])
vapour_barrier_labour_costs = vapour_barrier_data[0]["labour_cost"] * floor_area
# For waterproofing and upstand, we only have a total cost
waterproofing_total_costs = waterproofing_data[0]["total_cost"] * floor_area
labour_costs = preparation_m2_labour_costs + vapour_barrier_labour_costs
labour_costs = labour_costs * self.labour_adjustment_factor
materials_costs = preparation_m2_material_costs + vapour_barrier_material_costs + insulation_material_costs
subtotal_before_profit = labour_costs + materials_costs + waterproofing_total_costs
contingency_cost = subtotal_before_profit * self.FLAT_ROOF_CONTINGENCY
preliminaries_cost = subtotal_before_profit * self.PRELIMINARIES
profit_cost = subtotal_before_profit * self.PROFIT_MARGIN
subtotal_before_vat = subtotal_before_profit + contingency_cost + preliminaries_cost + profit_cost
vat_cost = subtotal_before_vat * self.VAT_RATE
total_cost = subtotal_before_vat + vat_cost
preparation_m2_labour_hours = sum([x["labour_hours_per_unit"] * floor_area for x in preparation_data_m2])
vapour_barrier_labour_hours = vapour_barrier_data[0]["labour_hours_per_unit"] * floor_area
waterproofing_labour_hours = waterproofing_data[0]["labour_hours_per_unit"] * floor_area
labour_hours = preparation_m2_labour_hours + vapour_barrier_labour_hours + waterproofing_labour_hours
# To install flat roof insulation, assume a small/medium project might be conducted by a team of 2-4.
# We'll assume a team of 2 since a lot of the roofs will be on the smaller side and will review this later
labour_days = (labour_hours / 8) / 2
return {
"total": total_cost,
"subtotal": subtotal_before_vat,
"vat": vat_cost,
"contingency": contingency_cost,
"preliminaries": preliminaries_cost,
"material": materials_costs,
"profit": profit_cost,
"labour_hours": labour_hours,
"labour_days": labour_days,
"labour_cost": labour_costs
}
def window_glazing(self, number_of_windows, material, is_secondary_glazing=False):
"""
We characterise the jobs to be done for window glazing as the following:
1) Initial Assessment and Measurements: Before removing the existing window, it's essential to assess the
condition of the window frame and opening. Precise measurements are taken to ensure the new double glazed
windows fit perfectly.
2) Remove the Existing Window: This involves carefully dismantling and removing the old single glazed window. It
requires skill to avoid damaging the surrounding wall and the window frame (if it's to be reused).
3) Dispose of the Existing Window: The old window, especially if it's a single glazed unit, needs to be
disposed of responsibly. Glass and other materials should be recycled where possible.
4) Surface Preparation: The window opening might need some preparation, especially if there's damage or if
adjustments are needed to accommodate the new window. This can include repairing or replacing parts of the
window frame, sealing gaps, and ensuring the opening is level and square.
5) Install the Window Frame (if new frames are used): In many cases, double glazed windows come with their
frames. These need to be installed securely into the window opening. This process involves aligning, leveling,
and fixing the frame in place.
6) Install the Window Sill: If a new window sill is required, it is installed at this stage. It needs to be
correctly aligned with the frame and securely attached.
7) Install the Double Glazed Glass Units: The glass units are carefully inserted into the frame. This step
requires precision to ensure a snug fit without causing stress on the glass, which could lead to cracking or
breaking.
8) Sealing and Weatherproofing: After the glass units are in place, it's crucial to seal around the frame and
between the glass and frame to ensure there are no drafts and that the installation is weather-tight. This
typically involves applying silicone sealant or other appropriate sealing materials.
9) Finishing Touches: This includes any cosmetic work, such as trimming, painting, or staining the frame and
sill to match the rest of the property. It might also involve cleaning up any mess created during the
installation.
10) Inspection and Testing: Finally, the new windows should be inspected to ensure they open, close, and lock
correctly. This is also a good time to check for any gaps or issues with the sealing.
For this cost estimation process, we factor in initial assement into the preliminaries
"""
material_cost = material["material_cost"] * number_of_windows
labour_cost = (
material["labour_cost"] * number_of_windows * self.labour_adjustment_factor
)
multiplier = self.SECONDARY_GLAZING_SCALING_FACTOR if is_secondary_glazing else (
self.SASH_WINDOW_INFLATION_FACTOR)
subtotal = (material_cost + labour_cost) * multiplier
contingency_cost = subtotal * self.CONTINGENCY
preliminaries_cost = subtotal * self.PRELIMINARIES
profit_cost = subtotal * self.PROFIT_MARGIN
subtotal_before_vat = subtotal + contingency_cost + preliminaries_cost + profit_cost
vat_cost = subtotal_before_vat * self.VAT_RATE
total_cost = subtotal_before_vat + vat_cost
labour_hours = material["labour_hours_per_unit"] * number_of_windows
labour_hours = labour_hours * self.SECONDARY_GLAZING_SCALING_FACTOR if is_secondary_glazing else labour_hours
# Assume a team of 2
labour_days = (labour_hours / 8) / 2
return {
"total": total_cost,
"subtotal": subtotal_before_vat,
"vat": vat_cost,
"contingency": contingency_cost,
"preliminaries": preliminaries_cost,
"material": material_cost,
"profit": profit_cost,
"labour_hours": labour_hours,
"labour_cost": labour_cost,
"labour_days": labour_days
}
def solar_pv(self, wattage: float, has_battery: bool = False):
"""
Calculates the total cost for solar PV based data provided by the MCS dashboard, which contains
costing data for installations of renewable and clean energy measures.
The data in the dashboard is filtered on domestic building installations and then the data across the
various regions is manually collected. There is currently no automated way to get the data from the MCS
dashboard
Price can also be benchmarked against this checkatrade article:
https://www.checkatrade.com/blog/cost-guides/cost-of-solar-panel-installation/
:param wattage: Peak wattage of the solar PV system]
:param has_battery: Bool, whether the system includes a battery
"""
# Get the cost data relevant to the region
regional_cost = MCS_SOLAR_PV_COST_DATA["-".join(["average_cost_per_kwh", self.region])]
kw = wattage / 1000
total_cost = kw * regional_cost
if has_battery:
# The battery cost is based on the £3500 quote, recieved from installers
total_cost += BATTERY_COST
subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
vat = total_cost - subtotal_before_vat
# Labour hours are based on estimates from online research but an average team seems to consist of 3 people
# and most jobs take around 2 days. Assuming an 8 hour day for 3 people across 2 days, gives us 72 hours of
# labour
return {
"total": total_cost,
"subtotal": subtotal_before_vat,
"vat": vat,
"labour_hours": 72,
"labour_days": 2,
}
def programmer_and_appliance_thermostat(self, has_programmer):
"""
Calculate the total cost of installing a programmer and appliance thermostat
If the property already has a programmer, then the only thing we need to calculate the cost for is the
appliance thermostat
"""
if has_programmer:
labour_hours = 2
total_cost = SMART_APPLIANCE_THERMOSTAT_COST
else:
labour_hours = 4
total_cost = SMART_APPLIANCE_THERMOSTAT_COST + PROGRAMMER_COST
subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
vat = total_cost - subtotal_before_vat
# We estimate the cost of an appliance thermostat at £400, which is the upper end of the range
return {
"total": total_cost,
"subtotal": subtotal_before_vat,
"vat": vat,
"labour_hours": labour_hours,
"labour_days": 1,
}
def electric_room_heaters(self, number_heated_rooms):
"""
We base the estimates for the cost of electric room heaters on the cost per room as estimated by the
following article:
https://www.bestelectricradiators.co.uk/blog/cost-to-install-a-new-heating-system-uk/
:param number_heated_rooms: int, number of rooms to be heated
:return:
"""
total_cost = 500 * number_heated_rooms
subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
vat = total_cost - subtotal_before_vat
# TODO: Rough estimate to be reviewed
labour_hours = 1 * number_heated_rooms
labour_days = np.ceil(labour_hours / 8)
return {
"total": total_cost,
"subtotal": subtotal_before_vat,
"vat": vat,
"labour_hours": labour_hours,
"labour_days": labour_days,
}
def high_heat_electric_storage_heaters(self, number_heated_rooms):
"""
We base the estimates for the cost of electric storage heaters on the cost per room as estimated by the
energy saving trust
https://energysavingtrust.org.uk/advice/electric-heating/
The cost is based on the number of heated rooms
:param number_heated_rooms: int, number of rooms to be heated
"""
total_cost = 1500 * number_heated_rooms
subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
vat = total_cost - subtotal_before_vat
# TODO: Rough estimate to be reviewed
labour_hours = 3 * number_heated_rooms
labour_days = np.ceil(labour_hours / 8)
return {
"total": total_cost,
"subtotal": subtotal_before_vat,
"vat": vat,
"labour_hours": labour_hours,
"labour_days": labour_days,
}
def celect_type_controls(self):
"""
Calculate the cost of installing Celect type controls
"""
# The £50 cost is a rough estimate based on internet research
total_cost = 50
subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
vat = total_cost - subtotal_before_vat
# We estimate the labour hours to be 4
return {
"total": total_cost,
"subtotal": subtotal_before_vat,
"vat": vat,
"labour_hours": 4,
"labour_days": 1,
}
def hot_water_tank_insulation(self):
"""
Calculate the cost of installing hot water tank insulation
"""
# The £50 cost is a rough estimate based on internet research
total_cost = 50
subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
vat = total_cost - subtotal_before_vat
return {
"total": total_cost,
"subtotal": subtotal_before_vat,
"vat": vat,
"labour_hours": 0,
"labour_days": 0,
}
def roomstat_programmer_trvs(
self, number_heated_rooms, has_programmer, has_trvs, has_room_thermostat
):
"""
:return:
"""
total_cost = 0
labour_hours = 0
if not has_programmer:
total_cost += PROGRAMMER_COST
labour_hours += 1
if not has_trvs:
total_cost += TRVS_COST * number_heated_rooms
labour_hours += 0.25 * number_heated_rooms
if not has_room_thermostat:
total_cost += ROOM_THERMOSTAT_COST
labour_hours += 0.5
subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
vat = total_cost - subtotal_before_vat
return {
"total": total_cost,
"subtotal": subtotal_before_vat,
"vat": vat,
"labour_hours": labour_hours,
"labour_days": 1,
}
def time_and_temperature_zone_control(self, number_heated_rooms):
# The product costs are inclusive of VAT
product_costs = (
TTZC_SMART_THERMOSTAT_COST +
TTZC_ROOM_TEMPERATURE_SENSOR_COST * number_heated_rooms +
TTZC_SMART_RADIATOR_VALUES * number_heated_rooms
)
labour_hours = (
TTZC_SMART_THERMOSTAT_LABOUR_HOURS +
TTZC_ROOM_TEMPERATURE_SENSOR_LABOUR_HOURS * number_heated_rooms +
TTZC_SMART_RADIATOR_VALUES_LABOUR_HOURS * number_heated_rooms
)
labour_costs = TTZC_ELECTRICIAN_HOURLY_RATE * labour_hours
# Add continency and preliminaries to the labour to account for the complexity of the job
labour_costs = labour_costs * (1 + self.CONTINGENCY + self.PRELIMINARIES)
vat = labour_costs * self.VAT_RATE
subtotal_before_vat = product_costs + labour_costs
total_cost = subtotal_before_vat + vat
labour_days = np.ceil(labour_hours / 8)
return {
"total": total_cost,
"subtotal": subtotal_before_vat,
"vat": vat,
"labour_hours": labour_hours,
"labour_days": labour_days,
}
def heater_removal(self, n_rooms):
"""
Estimates the costs of removal of heaters, including the redecoration costs of the space behind the heater
:return:
"""
removal_cost = ROOM_HEATER_REMOVAL_COST * n_rooms
removal_labour_hours = ROOM_HEATER_REMOVAL_LABOUR_HOURS * n_rooms
vat = removal_cost * self.VAT_RATE
subtotal_before_vat = removal_cost
total_cost = subtotal_before_vat + vat
return {
"total": total_cost,
"subtotal": subtotal_before_vat,
"vat": vat,
"labour_hours": removal_labour_hours,
"labour_days": np.ceil(removal_labour_hours / 8),
}
@staticmethod
def _estimate_n_radiators(number_habitable_rooms, total_floor_area, property_type, built_form):
# Base number of radiators: one per habitable room
base_radiators = number_habitable_rooms
# Additional radiators for non-habitable essential areas (e.g., kitchens, hallways)
additional_radiators = 3 # Initial assumption
# Adjust additional radiators based on property type
if property_type == 'Flat':
additional_radiators -= 1 # Flats may need fewer radiators due to less exposure
elif property_type in ['House', 'Bungalow', 'Maisonette']:
# Multiple floors in Maisonette may require additional heating points
additional_radiators += 2 # Houses and bungalows might need more due to greater exposure
else:
raise Exception("Invalid property type")
# Adjust total radiator needs based on built form
form_factor = {
'Mid-Terrace': 0.95,
'Semi-Detached': 1.05,
'Detached': 1.25,
'End-Terrace': 1.05
}
# Calculate total heating power needed and number of radiators based on standard output
total_heating_power_required = total_floor_area * 80 # Watts per square meter
radiator_output = 1000 # Average wattage per radiator
total_radiators_based_on_power = (total_heating_power_required / radiator_output) * form_factor[built_form]
# Final estimation taking the higher of calculated needs or base room count
estimated_radiators = max(total_radiators_based_on_power, base_radiators + additional_radiators)
return round(estimated_radiators)
def boiler(self, is_combi, size, exising_room_heaters, system_change, n_heated_rooms, n_rooms):
"""
Based on a basic estimate of median value £2600 to install a low carbon combi boiler
First time central heating vosts can als be found here:
https://www.checkatrade.com/blog/cost-guides/central-heating-installation-cost/
:return:
"""
unit_cost = COMBI_BOILER_COSTS[size] if is_combi else CONVENTIONAL_BOILER_COSTS[size]
# The unit cost is the cost without VAT
# We now need to estimate the cost of the works
labour_days = 2
labour_hours = labour_days * 8
labour_rate = 300
# Average cost of installation is 1 (maybe 2days) at £300 per day
# https://www.checkatrade.com/blog/cost-guides/new-boiler-cost/
# To be pessimistic, assume 2 days work
labour_cost = labour_rate * self.labour_adjustment_factor * labour_days
# Add contingency and preliminaries
labour_cost = labour_cost * (1 + self.CONTINGENCY + self.PRELIMINARIES)
# labour_days = labour_days + (removal_labour_hours / 8)
vat = labour_cost * self.VAT_RATE
subtotal_before_vat = unit_cost + labour_cost
total_cost = subtotal_before_vat + vat
# if there are existing room heaters, we need to add the cost of removing them
if exising_room_heaters:
removal_costing = self.heater_removal(n_rooms=n_heated_rooms)
# Add the totals to the existing totals
total_cost += removal_costing["total"]
subtotal_before_vat += removal_costing["subtotal"]
labour_hours += removal_costing["labour_hours"]
labour_days += removal_costing["labour_days"]
vat += removal_costing["vat"]
if system_change:
# We need the cost of radiators
n_radiators = self._estimate_n_radiators(
number_habitable_rooms=n_rooms,
total_floor_area=self.property.floor_area,
property_type=self.property.data["property-type"],
built_form=self.property.data["built-form"]
)
additionals_labour_cost = labour_rate * self.labour_adjustment_factor
radiator_cost = DOUBLE_RADIATOR_COST * n_radiators
system_change_cost = radiator_cost + FLUE_COST + PIPEWORK_COST + additionals_labour_cost
system_change_cost_before_vat = system_change_cost / (1 + self.VAT_RATE)
system_change_vat = system_change_cost - system_change_cost_before_vat
# We add an extra labour day for the system change
labour_days += 1
labour_hours += 8
total_cost += system_change_cost
subtotal_before_vat += system_change_cost_before_vat
vat += system_change_vat
return {
"total": total_cost,
"subtotal": subtotal_before_vat,
"vat": vat,
"labour_hours": labour_hours,
"labour_days": labour_days,
}

View file

@ -20,7 +20,7 @@ class FireplaceRecommendations(Definitions):
self.has_ventilaion = None
self.recommendation = None
def recommend(self):
def recommend(self, phase=0):
"""
Based on the number of open fireplcaes found, we recommend sealing each one at a cost of
around £500
@ -32,19 +32,23 @@ class FireplaceRecommendations(Definitions):
if number_open_fireplaces == 0:
return
estimated_cost = number_open_fireplaces * self.COST_OF_WORK
already_installed = "sealing_open_fireplace" in self.property.already_installed
estimated_cost = number_open_fireplaces * self.COST_OF_WORK if not already_installed else 0
# We recommend installing two mechanical ventilation systems
self.recommendation = [
{
"phase": phase,
"parts": [],
"type": "sealing_open_fireplace",
"description": "Seal %s open fireplaces" % str(number_open_fireplaces),
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
"already_installed": already_installed,
"total": estimated_cost,
# Take a very basic estimate of 6 hours, multipled by the number of open fireplaces to seal
"labour_hours": 6 * number_open_fireplaces
"labour_hours": 6 * number_open_fireplaces,
"labour_days": 6 * number_open_fireplaces / 8, # Assume 8 hour day
}
]

View file

@ -8,9 +8,8 @@ from datatypes.enums import QuantityUnits
from backend.Property import Property
from recommendations.recommendation_utils import (
r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns, update_lowest_selected_u_value,
get_recommended_part, get_floor_u_value
get_recommended_part, get_floor_u_value, override_costs
)
from recommendations.rdsap_tables import FLOOR_LEVEL_MAP
from recommendations.Costs import Costs
@ -51,8 +50,9 @@ class FloorRecommendations(Definitions):
]
]
# For solid floor, we don't use materials that are too thick
self.solid_floor_insulation_materials = [
part for part in materials if part["type"] == "solid_floor_insulation"
part for part in materials if part["type"] == "solid_floor_insulation" if float(part["depth"]) <= 75
]
self.solid_floor_non_insulation_materials = [
@ -69,15 +69,9 @@ class FloorRecommendations(Definitions):
# TODO: To be completed
self.exposed_floor_non_insulation_materials = []
def recommend(self):
def recommend(self, phase=0):
u_value = self.property.floor["thermal_transmittance"]
floor_level = (
FLOOR_LEVEL_MAP[self.property.data["floor-level"]] if
self.property.data["floor-level"] not in self.DATA_ANOMALY_MATCHES else None
)
property_type = self.property.data["property-type"]
floor_area = self.property.insulation_floor_area
year_built = self.property.year_built
@ -89,7 +83,13 @@ class FloorRecommendations(Definitions):
return
# If the property is a flat that isn't at ground level, it's likely impractical to recommend a floor upgrade
if (floor_level != 0) and (property_type == "Flat"):
if (self.property.floor_level != 0) and (property_type == "Flat") and (
self.property.floor["another_property_below"]
):
return
# If the property is a new build flat, we won't recommend floor upgrades
if len(self.property.full_sap_epc) and (property_type == "Flat"):
return
if u_value:
@ -103,15 +103,17 @@ class FloorRecommendations(Definitions):
# The floor is already compliant
return
u_value = get_floor_u_value(
floor_type=self.property.floor_type,
area=floor_area,
perimeter=self.property.perimeter,
age_band=self.property.age_band,
insulation_thickness=self.property.floor["insulation_thickness"],
wall_type=self.property.wall_type
)
self.estimated_u_value = u_value
if u_value is None:
u_value = get_floor_u_value(
floor_type=self.property.floor_type,
area=floor_area,
perimeter=self.property.perimeter,
age_band=self.property.age_band,
insulation_thickness=self.property.floor["insulation_thickness"],
wall_type=self.property.wall_type
)
self.estimated_u_value = u_value
if u_value < self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
return
@ -119,6 +121,7 @@ class FloorRecommendations(Definitions):
if self.property.floor["is_suspended"]:
# Given the U-value, we recommend underfloor insulation
self.recommend_floor_insulation(
phase=phase,
u_value=u_value,
insulation_materials=self.suspended_floor_insulation_materials,
non_insulation_materials=self.suspended_floor_non_insulation_materials
@ -130,7 +133,8 @@ class FloorRecommendations(Definitions):
self.recommend_floor_insulation(
u_value=u_value,
insulation_materials=self.solid_floor_insulation_materials,
non_insulation_materials=self.solid_floor_non_insulation_materials
non_insulation_materials=self.solid_floor_non_insulation_materials,
phase=phase
)
return
@ -142,9 +146,22 @@ class FloorRecommendations(Definitions):
@staticmethod
def _make_floor_description(material):
return f"Install {int(material['depth'])}{material['depth_unit']} {material['description']} insulation"
def recommend_floor_insulation(self, u_value, insulation_materials, non_insulation_materials):
if material["type"] == "suspended_floor_insulation":
return (f"Install {int(material['depth'])}{material['depth_unit']} {material['description']} insulation in "
f"suspended floor")
if material["type"] == "solid_floor_insulation":
return (f"Install {int(material['depth'])}{material['depth_unit']} {material['description']} insulation on "
f"solid floor")
if material["type"] == "exposed_floor_insulation":
return (f"Install {int(material['depth'])}{material['depth_unit']} {material['description']} insulation in "
f"exposed floor")
raise ValueError("Invalid material type - implement me!")
def recommend_floor_insulation(self, u_value, insulation_materials, non_insulation_materials, phase):
"""
This method is tasked with estimating the impact of performing suspended floor insulation
:return:
@ -175,17 +192,27 @@ class FloorRecommendations(Definitions):
material=material.to_dict(),
non_insulation_materials=non_insulation_materials
)
already_installed = "suspended_floor_insulation" in self.property.already_installed
if already_installed:
cost_result = override_costs(cost_result)
elif material["type"] == "solid_floor_insulation":
cost_result = self.costs.solid_floor_insulation(
insulation_floor_area=self.property.insulation_floor_area,
material=material.to_dict(),
non_insulation_materials=non_insulation_materials
)
already_installed = "solid_floor_insulation" in self.property.already_installed
if already_installed:
cost_result = override_costs(cost_result)
else:
raise NotImplementedError("Implement me!")
self.recommendations.append(
{
"phase": phase,
"parts": [
get_recommended_part(
part=material.to_dict(),
@ -194,11 +221,12 @@ class FloorRecommendations(Definitions):
cost_result=cost_result
),
],
"type": "floor_insulation",
"type": material["type"],
"description": self._make_floor_description(material),
"starting_u_value": u_value,
"new_u_value": new_u_value,
"sap_points": None,
"already_installed": already_installed,
**cost_result
}
)

View file

@ -0,0 +1,248 @@
from recommendations.Costs import Costs
from recommendations.recommendation_utils import check_simulation_difference, override_costs
from backend.Property import Property
from etl.epc_clean.epc_attributes.MainheatControlAttributes import MainheatControlAttributes
class HeatingControlRecommender:
def __init__(self, property_instance: Property):
self.property = property_instance
self.costs = Costs(self.property)
self.recommendation = []
def recommend(self, heating_description):
# Reset the recommendations
self.recommendation = []
# This first iteration of the recommender will provide very basic recommendation
# We recommend heating controls based on the main heating system
if heating_description in ["Room heaters, electric"]:
self.recommend_room_heaters_electric_controls()
return
if heating_description in ["Electric storage heaters", "Electric storage heaters, radiators"]:
self.recommend_high_heat_retention_controls()
return
if heating_description in ["Boiler and radiators, mains gas"]:
# We can recommend roomstat programmer trvs
self.recommend_roomstat_programmer_trvs()
# We can also recommend time and temperature zone controls
self.recommend_time_temperature_zone_controls()
return
def recommend_room_heaters_electric_controls(self):
"""
If the home has Room heaters, electric, we start by identifying potential heating controls that could
be upgraded, that would provide a practical impact. This will be the least invasive improvement.
We can then consider the heating system itself
:return:
"""
if (self.property.data["mainheatc-energy-eff"] in ["Poor", "Very Poor", "Average"]) or (
self.property.main_heating_controls["clean_description"] in ["Programmer and room thermostat"]
):
# We recommend Programmer and appliance thermostats as the heating control. This has an average energy
# efficiency rating, and is likely to be more efficient than the current heating controls. if the
# rating is poor or very poor, the home may have a Programmer and room thermostat, which is less efficient
# than a Programmer and appliance thermostats, because it allows for much more granular control at not
# just a room level but individual heater/appliance level
# Note: A room thermostat is commonly placed in a hallway, and it measures the temperature of the air
# surrounding it. It then sends a signal to the heating system to turn on or off, depending on the
# temperature. An appliance thermostat, on the other hand, is placed on the heater/appliance itself, and
# measures the temperature of the heater/appliance. This allows for much more granular control, and
# prevents overheating.
# In order to cost, we check if the property already has a programmer, and therefor we will just need to
# add the cost of the appliance thermostats
has_programmer = self.property.main_heating_controls["switch_system"] == "programmer"
ending_config = MainheatControlAttributes("Programmer and appliance thermostats").process()
# We look at what has changed in the ending config, and compare it to the current config
# We use this to determine how we should be updating the config
simulation_config = check_simulation_difference(
new_config=ending_config, old_config=self.property.main_heating_controls
)
# This upgrade will only take the heating system to average energy efficiency
simulation_config["mainheatc_energy_eff_ending"] = "Good"
self.recommendation.append(
{
"description": "upgrade heating controls to Programmer and Appliance or Smart Thermostats",
**self.costs.programmer_and_appliance_thermostat(has_programmer=has_programmer),
"simulation_config": simulation_config
}
)
# We don't implement any other recommendations right now
return
def recommend_high_heat_retention_controls(self):
"""
When applicable, we recommend upgrading the heating controls to high heat retention controls. This is a
specific type of control system that is designed to work with electric storage heaters. It is a more
efficient control system than the standard controls that come with electric storage heaters.
We can then consider the heating system itself
:return:
"""
# We recommend upgrading to Celect type controls
ending_config = MainheatControlAttributes("Controls for high heat retention storage heaters").process()
# We look at what has changed in the ending config, and compare it to the current config
simulation_config = check_simulation_difference(
new_config=ending_config, old_config=self.property.main_heating_controls
)
# This upgrade will only take the heating system to average energy efficiency
simulation_config["mainheatc_energy_eff_ending"] = "Good"
self.recommendation.append(
{
"description": "upgrade heating controls to High Heat Retention Storage Heater Controls",
**self.costs.celect_type_controls(),
"simulation_config": simulation_config
}
)
# We don't implement any other recommendations right now
return
def recommend_roomstat_programmer_trvs(self):
"""
If the home has a boiler and radiators, mains gas, we start by identifying potential heating controls that could
be upgraded, that would provide a practical impact.
The criteria for recommending an upgrade to heating controls are (one of these must be true)
1) There are no controls
2) No programmer
3) No room thermostat
4) No TRVs
:return:
"""
# We check if we have the conditions to recommend this upgrade
needs_programmer = self.property.main_heating_controls["switch_system"] is None
needs_room_thermostat = self.property.main_heating_controls["thermostatic_control"] is None
needs_trvs = self.property.main_heating_controls["trvs"] is None
can_recommend = (
(self.property.main_heating_controls["no_control"] is not None) or
needs_programmer or
needs_room_thermostat or
needs_trvs
)
if not can_recommend:
return
ending_config = MainheatControlAttributes("Programmer, room thermostat and TRVS").process()
# We use this to determine how we should be updating the config
simulation_config = check_simulation_difference(
new_config=ending_config, old_config=self.property.main_heating_controls
)
# This upgrade will only take the heating system to average energy efficiency
# If the current system is below good, we make it good
if self.property.data["mainheatc-energy-eff"] in ["Poor", "Very Poor", "Average"]:
simulation_config["mainheatc_energy_eff_ending"] = "Good"
has_programmer = not needs_programmer
has_room_thermostat = not needs_room_thermostat
has_trvs = not needs_trvs
cost_result = self.costs.roomstat_programmer_trvs(
number_heated_rooms=int(self.property.data["number-heated-rooms"]),
has_programmer=has_programmer,
has_room_thermostat=has_room_thermostat,
has_trvs=has_trvs
)
description = "upgrade heating controls to Room thermostat, programmer and TRVs"
already_installed = "heating_control" in self.property.already_installed
if already_installed:
cost_result = override_costs(cost_result)
description = "Heating controls have already been upgraded, no further action needed."
self.recommendation.append(
{
"type": "heating_control",
"parts": [],
"description": description,
**cost_result,
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
"already_installed": already_installed,
"simulation_config": simulation_config
}
)
return
def recommend_time_temperature_zone_controls(self):
"""
If the home has a boiler, we can recommend time and temperature zone controls. This is a more advanced
and more efficient control system than the standard controls that come with a boiler. However, it may come
with a higher cost and more involved usage
:return:
"""
# We check if the efficiency of the current heating controls is good or below, and
# Conditions for installation are as follows:
# 1) The current heating controls are not time and temperature zone controls
# 2) The current heating controls are not already at 'Very Good' or above
if (
(self.property.main_heating_controls["thermostatic_control"] == "time and temperature zone control") or
(self.property.data["mainheatc-energy-eff"] in ["Very Good"])
):
# No recommendation needed
return
ending_config = MainheatControlAttributes("Time and temperature zone control").process()
# We use this to determine how we should be updating the config
simulation_config = check_simulation_difference(
new_config=ending_config, old_config=self.property.main_heating_controls
)
# If the current system is below very good, we make it very good
if self.property.data["mainheatc-energy-eff"] in ["Poor", "Very Poor", "Average", "Good"]:
simulation_config["mainheatc_energy_eff_ending"] = "Very Good"
cost_result = self.costs.time_and_temperature_zone_control(
number_heated_rooms=int(self.property.data["number-heated-rooms"])
)
description = ("Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & "
"temperature zone control)")
already_installed = "heating_control" in self.property.already_installed
if already_installed:
cost_result = override_costs(cost_result)
description = "Heating controls have already been upgraded, no further action needed."
self.recommendation.append(
{
"type": "heating_control",
"parts": [],
"description": description,
**cost_result,
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
"already_installed": already_installed,
"simulation_config": simulation_config
}
)

View file

@ -0,0 +1,435 @@
import pandas as pd
from recommendations.Costs import Costs
from recommendations.recommendation_utils import check_simulation_difference, override_costs
from backend.Property import Property
from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes
from etl.epc_clean.epc_attributes.HotWaterAttributes import HotWaterAttributes
from etl.epc_clean.epc_attributes.MainFuelAttributes import MainFuelAttributes
from recommendations.HeatingControlRecommender import HeatingControlRecommender
class HeatingRecommender:
def __init__(self, property_instance: Property):
self.property = property_instance
self.costs = Costs(self.property)
self.recommendations = []
def recommend(self, phase=0):
# TODO: We could have a system flush recommendation for an existing boiler, where there is no need to replace
# the boiler, but instead flushing the system will make it run more efficiently. There is a cost for this
# in the Costs class, stored as SYSTEM_FLUSH_COST
self.recommendations = []
# This first iteration of the recommender will provide very basic recommendation
# We recommend heating controls based on the main heating system
has_electric_heating_description = self.property.main_heating["clean_description"] in [
"Room heaters, electric", "Electric storage heaters", "Electric storage heaters, radiators"
]
no_heating_no_mains = (
self.property.main_heating["clean_description"] in ["No system present, electric heaters assumed"] and
not self.property.data["mains-gas-flag"]
)
if has_electric_heating_description or no_heating_no_mains:
# Recommend high heat retention storage heaters
self.recommend_hhr_storage_heaters(phase=phase, system_change=True, heating_controls_only=False)
# if the property has mains heating with boiler and radiators, we recommend optimal heating controls
has_boiler = self.property.main_heating["clean_description"] in ["Boiler and radiators, mains gas"]
# We also check that the property doesn't have a heating system, but it has access to the mains gas
no_heating_has_mains = self.property.main_heating["clean_description"] in [
'No system present, electric heaters assumed'
] and self.property.data["mains-gas-flag"]
has_gas_heaters = (
self.property.main_heating["clean_description"] in ["Room heaters, mains gas"] and
self.property.data["mains-gas-flag"]
)
# We also check if the property has electric heating, but it has access to the mains gas
electic_heating_has_mains = has_electric_heating_description and self.property.data["mains-gas-flag"]
portable_heaters_has_mains = (
self.property.main_heating["clean_description"] in ["Portable electric heaters assumed for most rooms"] and
self.property.data["mains-gas-flag"]
)
if (
has_boiler or
no_heating_has_mains or
electic_heating_has_mains or
has_gas_heaters or
portable_heaters_has_mains
):
# This indicates that the home previously did not have a boiler in place and so would require
# an overhaul to the system - right now, this is all reasons, apart from if there is an existing boiler
system_change = not has_boiler
exising_room_heaters = self.property.main_heating["clean_description"] in [
"Room heaters, electric", "Room heaters, mains gas"
]
self.recommend_boiler_upgrades(
phase=phase, system_change=system_change, exising_room_heaters=exising_room_heaters
)
return
@staticmethod
def check_simulation_difference(old_config, new_config):
"""
Given two dictionaries, that describe the heating control configurations, this method will compare the two
and pick out the differences. These differences will be things that have been added and things that have been
removed. This will be used to determine how we should be updating the configuration in the simulation
:return:
"""
differences = {key + "_ending": new_config[key] for key in new_config if old_config[key] != new_config[key]}
return differences
def combine_heating_and_controls(
self, controls_recommendations, heating_simulation_config, costs, description, phase, heating_controls_only,
system_change
):
"""
Given a recommendation for heating controls, and a recommendation for the heating system, we combine the two
into a single recommendation
:param controls_recommendations: The heating controls recommendations
:param heating_simulation_config: The simulation configuration for the heating system
:param costs: The costs of the heating system
:param description: The description of the recommendation
:param phase: The phase of the recommendation
:param heating_controls_only: If True, we will also add a recommendation for heating controls only
:param system_change: Indicates if we are recommending a different type of heating system, compared to the
current system. If we have a system change and we have a heat control recommendation, we only recommend
both heating and controls together
:return:
"""
# We produce recommendations with & without heating controls
# We will also produce a recommendation for heating controls only
heating_controls_switch = [True, False] if controls_recommendations else [False]
if not heating_simulation_config:
heating_controls_switch = []
if system_change and len(controls_recommendations):
heating_controls_switch = [True]
output = []
for controls_switch in heating_controls_switch:
total_costs = costs.copy()
recommendation_simulation_config = heating_simulation_config.copy()
recommendation_description = description
if controls_switch:
# We add the costs of the heating controls, onto each key in the costs dictionary
for key in total_costs:
total_costs[key] += controls_recommendations[0][key]
recommendation_simulation_config = {
**recommendation_simulation_config,
**controls_recommendations[0]["simulation_config"]
}
controls_description = controls_recommendations[0]['description']
# Make the first letter of the description lowercase
controls_description = (
controls_description[0].lower() + controls_description[1:]
)
recommendation_description = f"{description} and {controls_description}"
already_installed = "cavity_wall_insulation" in self.property.already_installed
if already_installed:
total_costs = override_costs(total_costs)
recommendation_description = "Heating system has already been upgraded, no further action needed."
recommendation = {
"phase": phase,
"parts": [
# TODO
],
"type": "heating",
"description": recommendation_description,
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
"already_installed": already_installed,
**total_costs,
"simulation_config": recommendation_simulation_config
}
output.append(recommendation)
if heating_controls_only and len(controls_recommendations):
# Also add on a recommendation for heating controls only
heating_control_recommendation = controls_recommendations[0].copy()
# Capitalize the first letter of the description
heating_control_recommendation["description"] = (
heating_control_recommendation["description"][0].upper() +
heating_control_recommendation["description"][1:]
)
output.append(
{
"phase": phase,
"parts": [
# TODO
],
"type": "heating",
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
**heating_control_recommendation
}
)
return output
def recommend_hhr_storage_heaters(self, phase, system_change, heating_controls_only):
"""
We will recommend upgrading to a high heat retention storage system, if the current system is not already
high heat retention storage
:param phase: The phase of the recommendation
:param system_change: Indicates if we are recommending a different type of heating system, compared to the
current system
:param heating_controls_only: Indicates if we should include a recommendation for just heating controls
:return:
"""
controls_recommender = HeatingControlRecommender(self.property)
# The heating controls we're recommending for are based on the recommended heating system
high_heat_retention_contols_desc = "Controls for high heat retention storage heaters"
# We only recommend Celect-type controls if the current heating system is not Celect-type controls
if self.property.main_heating_controls["clean_description"] != high_heat_retention_contols_desc:
controls_recommender.recommend(heating_description="Electric storage heaters, radiators")
# Conditions for not needing this recommendation
already_installed_hh_retention = (
"Electric storage heaters" in self.property.main_heating["clean_description"] and
self.property.main_heating_controls["clean_description"].lower() == high_heat_retention_contols_desc.lower()
)
# Conditions for not recommending electric storage heaters
if already_installed_hh_retention:
# No recommendation needed
return
# Set up artefacts, suitable for the simulation and regardless of controls
heating_ending_config = MainHeatAttributes("Electric storage heaters, radiators").process()
heating_simulation_config = check_simulation_difference(
new_config=heating_ending_config, old_config=self.property.main_heating
)
# This upgrade will only take the heating system to average energy efficiency
heating_simulation_config["mainheat_energy_eff_ending"] = "Average"
# If the property is off-gas and has no heating system in place, the number of heated rooms will actually
# be 0, so we use the number of rooms as the figure
number_heated_rooms = (
self.property.data["number-heated-rooms"] if self.property.data["number-heated-rooms"] > 0
else (
self.property.number_of_rooms - 1 if self.property.number_of_rooms > 1 else
self.property.number_of_rooms
)
)
# Upgrade to electric storage heaters
costs = self.costs.high_heat_electric_storage_heaters(
number_heated_rooms=number_heated_rooms
)
description = "Install high heat retention electric storage heaters"
recommendations = self.combine_heating_and_controls(
controls_recommendations=controls_recommender.recommendation,
heating_simulation_config=heating_simulation_config,
costs=costs,
description=description,
phase=phase,
heating_controls_only=heating_controls_only,
system_change=system_change
)
self.recommendations.extend(recommendations)
@staticmethod
def estimate_boiler_size(property_type, built_form, floor_area, floor_height, num_heated_rooms):
# Step 1: Base size estimation based on property type (as a starting point)
base_size = {
'Flat': 25,
'House': 30,
'Maisonette': 28,
'Bungalow': 27
}
# Step 2: Calculate the volume of the property
volume = floor_area * floor_height
# Step 3: Adjust base size for built form (to account for heat retention)
form_adjustment = {
'Mid-Terrace': 0,
'End-Terrace': 2,
'Semi-Detached': 4,
'Detached': 6
}
# Step 4: Further adjust for the total volume and number of heated rooms
volume_adjustment = (volume / 100) # Simplified adjustment factor for volume
rooms_adjustment = (num_heated_rooms - 5) * 0.5 # Assuming base case of 5 rooms
# Calculate the estimated boiler size
estimated_size = base_size[property_type] + form_adjustment[built_form] + volume_adjustment + rooms_adjustment
# Step 5: Align with available boiler sizes and ensure it does not exceed 35kW, as it's rare to need more
available_sizes = [30, 35, 40, 45, 50]
estimated_size = min(max(estimated_size, 30), 40) # Ensure within 30kW to 35kW range
# Find the closest available size (in this case, either rounding up or down to align with 30 or 35)
closest_size = min(available_sizes, key=lambda x: abs(x - estimated_size))
return closest_size
def recommend_boiler_upgrades(self, phase, system_change, exising_room_heaters):
"""
This boiler recommendation will only recommend a like-for-like upgrade, since changing the system
is generally more expensive
:param phase:
:param system_change: Indicates if the property would be undergoing a heating system change. This could be true
if the home didn't have a heating system in place, or if the home had electric heating
previously
:param exising_room_heaters: Indicates if the property had room heaters previously - if so, a boiler
recommendation will need to be accompanied by removal of the room heaters
:return:
"""
recommendation_phase = phase
# We now recommend boiler upgrades, if applicable
simulation_config = {}
boiler_costs = {}
boiler_recommendation = {}
if self.property.data["mainheat-energy-eff"] in ["Very Poor", "Poor", "Average"]:
boiler_size = self.estimate_boiler_size(
property_type=self.property.data["property-type"],
built_form=self.property.data["built-form"],
floor_area=self.property.floor_area,
floor_height=self.property.floor_height,
num_heated_rooms=self.property.data["number-heated-rooms"],
)
# We recommend a combi boiler under the following conditions
# 1) If there are 4 or fewer rooms (we don't use heqted rooms because none of the rooms could be
# heated if there is no existing heating system).
# 2) There 1 or fewer bathrooms
# Otherwise, we recommend a gas condensing boiler, which will server a larger property, that has multiple
# bathrooms
is_combi = (
(self.property.number_of_rooms <= 4) and
(self.property.n_bathrooms in [None, 0, 1])
)
if is_combi:
description = "Upgrade to a new combi boiler"
else:
description = "Upgrade to a new gas condensing boiler"
simulation_config = {"mainheat_energy_eff_ending": "Good"}
if system_change:
# Installation of a boiler improves the hot water system so we need to reflect this in
# the outcome of the recommendation
heating_ending_config = MainHeatAttributes("Boiler and radiators, mains gas").process()
hotwater_ending_config = HotWaterAttributes("From main system").process()
fuel_ending_config = MainFuelAttributes("mains gas (not community)").process()
heating_simulation_config = check_simulation_difference(
new_config=heating_ending_config, old_config=self.property.main_heating
)
hotwater_simulation_config = check_simulation_difference(
new_config=hotwater_ending_config, old_config=self.property.hotwater
)
fuel_simulation_config = check_simulation_difference(
new_config=fuel_ending_config, old_config=self.property.main_fuel
)
simulation_config = {
**simulation_config,
**heating_simulation_config,
**hotwater_simulation_config,
**fuel_simulation_config,
"hot_water_energy_eff_ending": "Good"
}
boiler_costs = self.costs.boiler(
is_combi=is_combi,
size=f"{boiler_size}kw",
exising_room_heaters=exising_room_heaters,
system_change=system_change,
n_heated_rooms=self.property.data["number-heated-rooms"],
n_rooms=self.property.number_of_rooms
)
already_installed = "heating" in self.property.already_installed
if already_installed:
boiler_costs = override_costs(boiler_costs)
description = "Heating system has already been upgraded, no further action needed."
boiler_recommendation = {
"phase": recommendation_phase,
"parts": [
# TODO
],
"type": "heating",
"description": description,
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
"already_installed": already_installed,
"simulation_config": simulation_config,
**boiler_costs
}
# We recommend the heating controls
# If the property did not previously have a boiler, we combine
controls_recommender = HeatingControlRecommender(self.property)
controls_recommender.recommend(heating_description="Boiler and radiators, mains gas")
# We may have 2 recommendations from the heating controls
if not controls_recommender.recommendation:
return
if system_change:
# We combine the heating and controls recommendations, in the case of a system change
combined_recommendations = []
for controls_recommendation in controls_recommender.recommendation:
combined_recommendation = self.combine_heating_and_controls(
controls_recommendations=[controls_recommendation],
heating_simulation_config=simulation_config,
costs=boiler_costs,
description=boiler_recommendation["description"],
phase=recommendation_phase,
heating_controls_only=False,
system_change=True
)
combined_recommendations.extend(combined_recommendation)
# Overwrite the existing boiler recommendation
self.recommendations.extend(combined_recommendations)
else:
# We increment the recommendation phase, since the heating controls are separate from the boiler upgrade
# but we'll only upgrade if we have a heating recommendation
has_heating_recommendation = any(
recommendation["type"] == "heating" for recommendation in self.recommendations
)
if has_heating_recommendation:
recommendation_phase += 1
# The heating controls recommendation is distrinct from the boiler upgrade recommendation
# We insert phase into the recommendations for heating controls
for recommendation in controls_recommender.recommendation:
recommendation["phase"] = recommendation_phase
self.recommendations.extend(controls_recommender.recommendation)
return

Some files were not shown because too many files have changed in this diff Show more