mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
commit
c23ad48e1b
129 changed files with 50560 additions and 3785 deletions
8
.gitignore
vendored
8
.gitignore
vendored
|
|
@ -241,6 +241,7 @@ fabric.properties
|
|||
# Locally stored data
|
||||
local_data/*
|
||||
/local_data/*
|
||||
etl/epc/local_data/*
|
||||
|
||||
*.DS_Store
|
||||
infrastructure/terraform/.terraform*
|
||||
|
|
@ -255,7 +256,7 @@ open_uprn/.idea/
|
|||
conservation_areas/.idea/
|
||||
model_data/.idea/
|
||||
model_data/simulation_system/.idea/
|
||||
|
||||
model_data/simulation_system/
|
||||
model_data/simulation_system/data*
|
||||
model_data/simulation_system/model_directory/
|
||||
model_data/simulation_system/predictions/
|
||||
|
|
@ -264,4 +265,7 @@ model_data/simulation_system/predictions/
|
|||
.idea/misc.iml
|
||||
|
||||
adhoc
|
||||
adhoc/*
|
||||
adhoc/*
|
||||
|
||||
etl-router-venv/
|
||||
refactor_datasets/
|
||||
2
.idea/.gitignore
generated
vendored
2
.idea/.gitignore
generated
vendored
|
|
@ -1,3 +1,5 @@
|
|||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
# GitHub Copilot persisted chat sessions
|
||||
/copilot/chatSessions
|
||||
|
|
|
|||
|
|
@ -45,7 +45,9 @@ class Definitions:
|
|||
# contain a ‘null’ value. A resolution to correct these anomalies will be considered for future data releases.
|
||||
"NULL",
|
||||
# We sometimes see fields populated with just an empty string.
|
||||
""
|
||||
"",
|
||||
# An older value which rarely shows up but has been seen in the data.
|
||||
"UNKNOWN",
|
||||
}
|
||||
|
||||
DATA_ANOMALY_SUBSTRINGS = {
|
||||
|
|
|
|||
7
backend/DbClient.py
Normal file
7
backend/DbClient.py
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
class DbClient:
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
This class handles interaction with the database
|
||||
"""
|
||||
pass
|
||||
105
backend/OrdnanceSurvey.py
Normal file
105
backend/OrdnanceSurvey.py
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
from functools import lru_cache
|
||||
import urllib.parse
|
||||
import requests
|
||||
from utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
class OrdnanceSuveyClient:
|
||||
|
||||
def __init__(self, address, postcode, api_key):
|
||||
"""
|
||||
This class is tasked with interaction with the ordnance survey API.
|
||||
:param address: The address for the property to search for
|
||||
:param postcode: The postcode for the property to search for
|
||||
"""
|
||||
|
||||
self.address = address
|
||||
self.postcode = postcode
|
||||
self.full_address = ", ".join([self.address, self.postcode])
|
||||
self.api_key = api_key
|
||||
|
||||
self.results = None
|
||||
|
||||
self.most_relevant_result = None
|
||||
self.property_type = None
|
||||
self.built_form = None
|
||||
# This will be postcode and address, as returned by the ordnance survey
|
||||
self.address_os = None
|
||||
self.postcode_os = None
|
||||
|
||||
def set_places_address(self):
|
||||
"""
|
||||
Given a response from the places api, this function will set the address and postcode of the property
|
||||
"""
|
||||
|
||||
if self.most_relevant_result is None:
|
||||
raise ValueError("No results found - run get_places_api first")
|
||||
|
||||
self.address_os = self.most_relevant_result["ADDRESS"]
|
||||
self.postcode_os = self.most_relevant_result["POSTCODE"]
|
||||
# We strip out the postcode from the address as this is already stored separately
|
||||
self.address_os = self.address_os.replace(self.postcode_os, "").strip()
|
||||
# Remove trailing comma
|
||||
self.address_os = self.address_os.rstrip(",").strip()
|
||||
# Convert to title case
|
||||
self.address_os = self.address_os.title()
|
||||
# Make sure postcode is upper case
|
||||
self.postcode_os = self.postcode_os.upper()
|
||||
|
||||
@lru_cache(maxsize=128)
|
||||
def get_places_api(self):
|
||||
"""
|
||||
This method is tasked with getting the places api from the Ordnance Survey.
|
||||
"""
|
||||
|
||||
if not self.api_key:
|
||||
raise ValueError("Ordnance Survey API key not specified")
|
||||
|
||||
encoded_address_query = urllib.parse.quote(self.full_address)
|
||||
url = (f"https://api.os.uk/search/places/v1/find?query={encoded_address_query}&key="
|
||||
f"{self.api_key}")
|
||||
response = requests.get(url)
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
results = data['results']
|
||||
self.results = results
|
||||
|
||||
# Extract some details about the best match
|
||||
self.most_relevant_result = self.results[0]["DPA"]
|
||||
|
||||
self.parse_classification_code(self.most_relevant_result["CLASSIFICATION_CODE"])
|
||||
self.set_places_address()
|
||||
|
||||
else:
|
||||
logger.info("Could not find any results for the provided address and postcode")
|
||||
|
||||
return {"status": response.status_code}
|
||||
|
||||
def parse_classification_code(self, classification_code: str):
|
||||
"""
|
||||
This function will convert the classification code, returned by the OS places api, to a property type that is
|
||||
compatible with the EPC database.
|
||||
|
||||
The various classifications cane be found here:
|
||||
https://osdatahub.os.uk/docs/places/technicalSpecification
|
||||
|
||||
Under LPI Output, CLASSIFICATION_CODE is described, and a link is provided to the full table of classifications
|
||||
For these purposes, we do not need the full classification as this includes non-residential properties. We only
|
||||
parse the ones of interest to us
|
||||
:return:
|
||||
"""
|
||||
|
||||
value_map = {
|
||||
# In the OS api, "RD" is a "Dwelling" however this is not valid property type in the EPC database
|
||||
'RD': {},
|
||||
'RD02': {'property_type': 'House', 'built_form': 'Detatched'},
|
||||
'RD03': {'property_type': 'House', 'built_form': 'Semi-Detatched'},
|
||||
'RD04': {'property_type': 'House', 'built_form': 'Mid-Terrace'},
|
||||
'RD06': {'property_type': 'Flat'},
|
||||
}
|
||||
|
||||
mapped = value_map.get(classification_code, {})
|
||||
self.property_type = mapped.get("property_type", "")
|
||||
self.built_form = mapped.get("built_form", "")
|
||||
1101
backend/Property.py
1101
backend/Property.py
File diff suppressed because it is too large
Load diff
744
backend/SearchEpc.py
Normal file
744
backend/SearchEpc.py
Normal file
|
|
@ -0,0 +1,744 @@
|
|||
import os
|
||||
import time
|
||||
import re
|
||||
|
||||
import usaddress
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from epc_api.client import EpcClient
|
||||
from backend.OrdnanceSurvey import OrdnanceSuveyClient
|
||||
from BaseUtility import Definitions
|
||||
from utils.logger import setup_logger
|
||||
from typing import List
|
||||
from fuzzywuzzy import process
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
vartypes = {
|
||||
'low-energy-fixed-light-count': "Int64",
|
||||
# 'address': 'str',
|
||||
# 'uprn-source': 'str',
|
||||
'floor-height': 'float',
|
||||
'heating-cost-potential': 'float',
|
||||
'unheated-corridor-length': 'float',
|
||||
'hot-water-cost-potential': 'float',
|
||||
'construction-age-band': 'str',
|
||||
'potential-energy-rating': 'str',
|
||||
'mainheat-energy-eff': 'str',
|
||||
'windows-env-eff': 'str',
|
||||
'lighting-energy-eff': 'str',
|
||||
'environment-impact-potential': "Int64",
|
||||
'glazed-type': 'str',
|
||||
'heating-cost-current': 'float',
|
||||
# 'address3': 'str',
|
||||
'mainheatcont-description': 'str',
|
||||
'sheating-energy-eff': 'str',
|
||||
'property-type': 'str',
|
||||
'local-authority-label': 'str',
|
||||
'fixed-lighting-outlets-count': "Int64",
|
||||
'energy-tariff': 'str',
|
||||
'mechanical-ventilation': 'str',
|
||||
'hot-water-cost-current': 'str',
|
||||
'county': 'str',
|
||||
# 'postcode': 'str',
|
||||
'solar-water-heating-flag': 'str',
|
||||
'constituency': 'str',
|
||||
'co2-emissions-potential': 'float',
|
||||
'number-heated-rooms': 'float',
|
||||
'floor-description': 'str',
|
||||
'energy-consumption-potential': 'float',
|
||||
'local-authority': 'str',
|
||||
'built-form': 'str',
|
||||
'number-open-fireplaces': "Int64",
|
||||
'windows-description': 'str',
|
||||
'glazed-area': 'str',
|
||||
# 'inspection-date': str,
|
||||
'mains-gas-flag': 'str',
|
||||
'co2-emiss-curr-per-floor-area': 'float',
|
||||
# 'address1': 'str',
|
||||
'heat-loss-corridor': 'str',
|
||||
'flat-storey-count': "Int64",
|
||||
'constituency-label': 'str',
|
||||
'roof-energy-eff': 'str',
|
||||
'total-floor-area': 'float',
|
||||
'building-reference-number': 'str',
|
||||
'environment-impact-current': 'float',
|
||||
'co2-emissions-current': 'float',
|
||||
'roof-description': 'str',
|
||||
'floor-energy-eff': 'str',
|
||||
'number-habitable-rooms': 'float',
|
||||
# 'address2': 'str',
|
||||
'hot-water-env-eff': 'str',
|
||||
'posttown': 'str',
|
||||
'mainheatc-energy-eff': 'str',
|
||||
'main-fuel': 'str',
|
||||
'lighting-env-eff': 'str',
|
||||
'windows-energy-eff': 'str',
|
||||
'floor-env-eff': 'str',
|
||||
'sheating-env-eff': 'str',
|
||||
'lighting-description': 'str',
|
||||
'roof-env-eff': 'str',
|
||||
'walls-energy-eff': 'str',
|
||||
'photo-supply': 'float',
|
||||
'lighting-cost-potential': 'float',
|
||||
'mainheat-env-eff': 'str',
|
||||
'multi-glaze-proportion': 'float',
|
||||
'main-heating-controls': 'str',
|
||||
# 'lodgement-datetime',
|
||||
'flat-top-storey': 'str',
|
||||
'current-energy-rating': 'str',
|
||||
'secondheat-description': 'str',
|
||||
'walls-env-eff': 'str',
|
||||
'transaction-type': 'str',
|
||||
# 'uprn': "Int64",
|
||||
'current-energy-efficiency': 'float',
|
||||
'energy-consumption-current': 'float',
|
||||
'mainheat-description': 'str',
|
||||
'lighting-cost-current': 'float',
|
||||
# 'lodgement-date',
|
||||
'extension-count': "Int64",
|
||||
'mainheatc-env-eff': 'str',
|
||||
# 'lmk-key': 'str',
|
||||
'wind-turbine-count': "Int64",
|
||||
'tenure': 'str',
|
||||
'floor-level': 'str',
|
||||
'potential-energy-efficiency': "Int64",
|
||||
'hot-water-energy-eff': 'str',
|
||||
'low-energy-lighting': 'float',
|
||||
'walls-description': 'str',
|
||||
'hotwater-description': 'str'
|
||||
}
|
||||
|
||||
|
||||
class SearchEpc:
|
||||
"""
|
||||
Given address information about a home, this class is responsible for retrieving the EPC data associated
|
||||
to the property.
|
||||
|
||||
For a home, we might have address lines 1, 2, 3 and 4, as well as a postcode.
|
||||
|
||||
Often, simply searching the EPC database with address line 1 and postcode will be enough to find
|
||||
the property, but there are some cases where this is not true and we might need to utilise other
|
||||
combinations about the home to find the property
|
||||
"""
|
||||
|
||||
MAX_RETRIES = 5
|
||||
|
||||
SUCCESS = {
|
||||
"status": 200,
|
||||
"message": "success",
|
||||
"error": None
|
||||
}
|
||||
|
||||
NODATA = {
|
||||
"status": 201,
|
||||
"message": "No data",
|
||||
"error": None
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
address1: str,
|
||||
postcode: str,
|
||||
auth_token: str,
|
||||
os_api_key: str,
|
||||
full_address: str | None = None,
|
||||
max_retries: int = None,
|
||||
uprn: [int, None] = None,
|
||||
size=None,
|
||||
property_type=None,
|
||||
fast=False
|
||||
):
|
||||
"""
|
||||
Address lines 1 and postcode are mandatory fields. The other address lines are optional
|
||||
but can be used to find the epc for the home, if address1 and postcode are insufficient
|
||||
:param address1: string, propery's address line 1
|
||||
:param postcode: string, propery's postcode
|
||||
:param full_address: string, optional parameter, the full address of the property
|
||||
:param max_retries: int, optional, number of retries to make when searching the api
|
||||
:param uprn: int, optional, the uprn of the property
|
||||
:param size: int, optional, the number of results to return. If not provided, defaults to 25 which is the api's
|
||||
default
|
||||
:param property_type: str, optional, the property type of the property, if known before hand
|
||||
"""
|
||||
|
||||
self.address1 = address1
|
||||
self.postcode = postcode
|
||||
self.full_address = full_address
|
||||
self.uprn = uprn
|
||||
self.house_number = self.get_house_number(self.address1)
|
||||
self.numeric_house_number = self.extract_numeric_housenumber_part(self.house_number)
|
||||
|
||||
self.max_retries = max_retries if max_retries is not None else self.MAX_RETRIES
|
||||
|
||||
self.client = EpcClient(auth_token=auth_token)
|
||||
self.ordnance_survey_client = OrdnanceSuveyClient(
|
||||
address=self.address1, postcode=self.postcode, api_key=os_api_key
|
||||
)
|
||||
|
||||
self.data = None
|
||||
self.newest_epc = None
|
||||
self.older_epcs = None
|
||||
self.full_sap_epc = None
|
||||
|
||||
# These are the address and postcode values, which we store in the database
|
||||
self.address_clean = None
|
||||
self.postcode_clean = None
|
||||
|
||||
self.size = size if size is not None else 25
|
||||
|
||||
self.property_type = property_type
|
||||
self.fast = fast
|
||||
|
||||
@classmethod
|
||||
def get_house_number(cls, address: str) -> str | None:
|
||||
"""
|
||||
This method will use the usaddress library to parse an address and extract the house number
|
||||
:return:
|
||||
"""
|
||||
|
||||
parsed = usaddress.parse(address)
|
||||
parsed_house_number = [x for x in parsed if (x[1] == "AddressNumber")]
|
||||
parsed_house_number = parsed_house_number[0][0] if parsed_house_number else None
|
||||
|
||||
if parsed_house_number is None:
|
||||
# Because usaddress isn't optimal for parsing addresses with some prefixes such as 'Flat',
|
||||
# we also add a custom approach
|
||||
|
||||
# Pattern to look for 'Flat' or 'Apartment' followed by a number, or just a number at the beginning
|
||||
pattern = r'(?i)(?:flat|apartment)\s*(\d+)|^\s*(\d+)'
|
||||
|
||||
match = re.search(pattern, address)
|
||||
|
||||
if match:
|
||||
# Return the first non-None group found
|
||||
return next(g for g in match.groups() if g is not None)
|
||||
else:
|
||||
return None
|
||||
|
||||
# Remove training commas
|
||||
parsed_house_number = parsed_house_number.replace(",", "")
|
||||
|
||||
return parsed_house_number
|
||||
|
||||
@staticmethod
|
||||
def extract_numeric_housenumber_part(house_number: str | None) -> int | None:
|
||||
# Regular expression to find the first occurrence of one or more digits
|
||||
|
||||
if house_number is None:
|
||||
return None
|
||||
|
||||
match = re.search(r'\d+', house_number)
|
||||
|
||||
if match:
|
||||
return int(match.group())
|
||||
else:
|
||||
return None
|
||||
|
||||
def get_epc(self, params=None, size=None):
|
||||
# Get the EPC data with retries
|
||||
size = size if size is not None else self.size
|
||||
if params is None:
|
||||
if self.uprn:
|
||||
params = {"uprn": self.uprn}
|
||||
else:
|
||||
params = {"address": self.address1, "postcode": self.postcode}
|
||||
|
||||
for retry in range(self.max_retries):
|
||||
try:
|
||||
|
||||
if "uprn" in params:
|
||||
# We use the direct call method inside, since we need to implement uprn as a valid
|
||||
# parameter for the search function
|
||||
url = os.path.join(self.client.domestic.host, "search")
|
||||
response = self.client.domestic.call(method="get", url=url, params=params)
|
||||
else:
|
||||
response = self.client.domestic.search(params=params, size=size)
|
||||
|
||||
if response:
|
||||
self.data = response
|
||||
return self.SUCCESS
|
||||
|
||||
if retry > 0:
|
||||
logger.info("Failed previous attempt but retry successful")
|
||||
# If we got nothing, final try
|
||||
if not response:
|
||||
return {
|
||||
"status": 204,
|
||||
"message": "no data",
|
||||
"error": None
|
||||
}
|
||||
|
||||
return {
|
||||
"status": 200,
|
||||
"message": "success",
|
||||
"error": None
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
if retry < self.max_retries - 1:
|
||||
# If not the last retry, wait for 3 seconds before retrying
|
||||
time.sleep(3)
|
||||
else:
|
||||
# If it's the last retry, we continue
|
||||
return {
|
||||
"status": 500,
|
||||
"message": "Could not retrieve EPC data",
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def filter_rows(rows, property_type=None, address=None):
|
||||
"""
|
||||
This method should not be used when property_type and address are both not None
|
||||
:param rows:
|
||||
:param property_type:
|
||||
:param address:
|
||||
:return:
|
||||
"""
|
||||
# Given the results from the EPC api, attempts to reduce the number of rows
|
||||
uprns = {r["uprn"] for r in rows}
|
||||
|
||||
if (property_type is None) and (address is None):
|
||||
return rows
|
||||
|
||||
if len(uprns) == 1:
|
||||
return rows
|
||||
|
||||
if property_type is not None:
|
||||
# We can do a filter on the property type
|
||||
rows_filtered = [r for r in rows if r["property-type"] == property_type]
|
||||
|
||||
if rows_filtered:
|
||||
return rows_filtered
|
||||
|
||||
return rows
|
||||
|
||||
if address is not None:
|
||||
# We can do a filter on the property type
|
||||
best_match = process.extractOne(address, [r["address"] for r in rows], score_cutoff=0)
|
||||
rows_filtered = [r for r in rows if r["address"] == best_match[0]]
|
||||
|
||||
if rows_filtered:
|
||||
return rows_filtered
|
||||
|
||||
return rows
|
||||
|
||||
@staticmethod
|
||||
def format_address(newest_epc):
|
||||
"""
|
||||
Format address and postcode for storage in the database
|
||||
"""
|
||||
postcode = newest_epc["postcode"]
|
||||
address = newest_epc["address"]
|
||||
|
||||
# Format them
|
||||
address = address.replace(postcode, "").strip()
|
||||
address = address.rstrip(",").strip()
|
||||
address = address.title()
|
||||
|
||||
postcode = postcode.upper()
|
||||
|
||||
return address, postcode
|
||||
|
||||
def extract_epc_data(self, address=None):
|
||||
|
||||
"""
|
||||
Given a successful search, this method will format the data and return it
|
||||
:return:
|
||||
"""
|
||||
|
||||
if self.data is None:
|
||||
raise ValueError("data is missing, run search first")
|
||||
|
||||
rows = self.data["rows"]
|
||||
|
||||
# We perform some checks on the rows
|
||||
# Firstly, we should only have 1 urpn so if we have multiple, we'll need to filter down the
|
||||
# property further
|
||||
|
||||
rows = self.filter_rows(rows, property_type=self.property_type, address=None)
|
||||
rows = self.filter_rows(rows, property_type=None, address=address)
|
||||
|
||||
# We now check for a full sap epc:
|
||||
full_sap_epc = [r for r in rows if r["transaction-type"] == "new dwelling"]
|
||||
full_sap_epc = full_sap_epc[0] if full_sap_epc else {}
|
||||
|
||||
# Finally, we identify the newest epc and the rest, and then return
|
||||
newest_epc, older_epcs = self.filter_newest_epc(list_of_epcs=rows)
|
||||
|
||||
# Ge the uprn from the newest record for this home
|
||||
uprns = {r["uprn"] for r in rows if r["uprn"]}
|
||||
# We can sometimes have no uprn for a property
|
||||
if (len(uprns) == 0) and len(rows) > 0:
|
||||
logger.warning("Found data but missing uprn")
|
||||
elif len(uprns) != 1:
|
||||
# There is a possibility that we have multiple UPRNs for a single property, which is an error
|
||||
addresses = {r["address"] for r in rows}
|
||||
if len(addresses) == 1:
|
||||
# Take the uprn from the most recent
|
||||
uprns = {newest_epc["uprn"]}
|
||||
else:
|
||||
raise ValueError("Multiple UPRNs found - investigate me")
|
||||
|
||||
uprn = uprns.pop() if uprns else None
|
||||
|
||||
if self.fast:
|
||||
return newest_epc, [], {}, "", "", None
|
||||
|
||||
# Retrieve postcode and address
|
||||
address_epc, postcode_epc = self.format_address(newest_epc=newest_epc)
|
||||
|
||||
return newest_epc, older_epcs, full_sap_epc, address_epc, postcode_epc, uprn
|
||||
|
||||
@staticmethod
|
||||
def filter_newest_epc(list_of_epcs: List):
|
||||
newest_response = [
|
||||
r for r in list_of_epcs if
|
||||
r["lodgement-datetime"] == max([x["lodgement-datetime"] for x in list_of_epcs])
|
||||
]
|
||||
|
||||
if not newest_response:
|
||||
return {}, []
|
||||
|
||||
if len(newest_response) != 1:
|
||||
# It is possible (but rare, and likely an error on EPC lodgement) that we have multiple EPCs that
|
||||
# were lodged at the exact same time. In this case, we will take the first one
|
||||
newest_response = [newest_response[0]]
|
||||
|
||||
older_epcs = [epc for epc in list_of_epcs if epc["lmk-key"] != newest_response[0]["lmk-key"]]
|
||||
|
||||
return newest_response[0], older_epcs
|
||||
|
||||
@staticmethod
|
||||
def _get_epc_mode(col: str, epc_data: pd.DataFrame):
|
||||
"""
|
||||
Simple method to extract the mode value from the EPC data
|
||||
:param col: name of the column to take the mode of
|
||||
:param epc_data: pandas dataframe of epc data
|
||||
"""
|
||||
|
||||
mode_value = epc_data[[col]].mode(dropna=True)
|
||||
if len(mode_value) != 1:
|
||||
raise NotImplementedError("TODO: Handle multiple modes")
|
||||
mode_value = mode_value.iloc[0][col]
|
||||
|
||||
return mode_value
|
||||
|
||||
def fetch_nearby_epcs(
|
||||
self, initial_postcode: str,
|
||||
lmks_to_drop: list[str] | None = None,
|
||||
built_form: str = "",
|
||||
property_type: str = ""
|
||||
):
|
||||
"""
|
||||
Fetches and processes EPC data for a given initial postcode, applying successive trimming
|
||||
to the postcode and filtering the data until a non-empty result set is found.
|
||||
|
||||
The function queries the EPC API with the provided postcode, and if no data is found or
|
||||
if the data doesn't meet certain criteria, it progressively shortens the postcode by
|
||||
removing the last character and retries the query. This process continues until a valid
|
||||
set of EPC data is obtained or the postcode is exhausted.
|
||||
|
||||
Additional filtering is applied to the obtained EPC data based on 'lmk-key', 'built-form',
|
||||
and 'property-type'. The data is also processed to extract and numerically interpret house
|
||||
numbers, calculate house number distances, and apply weights based on these distances.
|
||||
|
||||
:param initial_postcode: The initial full postcode for the EPC data query.
|
||||
:param lmks_to_drop: List of 'lmk-key' values to be excluded from the EPC data.
|
||||
:param built_form: The 'built-form' value to be used for filtering the EPC data.
|
||||
:param property_type: The 'property-type' value to be used for filtering the EPC data.
|
||||
:return:
|
||||
"""
|
||||
|
||||
property_type_api_map = {
|
||||
"Bungalow": "bungalow",
|
||||
"Flat": "flat",
|
||||
"House": "house",
|
||||
"Maisonette": "maisonette",
|
||||
"Park home": "park home",
|
||||
}
|
||||
|
||||
postcode = initial_postcode
|
||||
while postcode:
|
||||
# Fetch data from EPC API
|
||||
params = {"postcode": postcode}
|
||||
if property_type:
|
||||
params["property-type"] = property_type_api_map[property_type]
|
||||
|
||||
# We take the 20 nearest homes of the relevant type, so not to pull in too many irrelevant homes
|
||||
epc_response = self.get_epc(params=params, size=100)
|
||||
|
||||
if epc_response["status"] == 200:
|
||||
epc_data = pd.DataFrame(self.data["rows"])
|
||||
|
||||
if lmks_to_drop is not None:
|
||||
epc_data = epc_data[~epc_data["lmk-key"].isin(lmks_to_drop)]
|
||||
|
||||
if not epc_data.empty:
|
||||
# Further processing of the EPC data
|
||||
epc_data['lodgement-datetime'] = pd.to_datetime(epc_data['lodgement-datetime'], errors='coerce')
|
||||
epc_data = epc_data.sort_values("lodgement-datetime", ascending=False).groupby("uprn").head(1)
|
||||
epc_data["house_number"] = epc_data["address"].apply(lambda add1: self.get_house_number(add1))
|
||||
epc_data["numeric_house_number"] = epc_data["house_number"].apply(
|
||||
lambda house_num: self.extract_numeric_housenumber_part(house_num)
|
||||
)
|
||||
|
||||
if self.numeric_house_number is None:
|
||||
# If we don't have a house number, we treat all weights as equal
|
||||
epc_data["weight"] = 1
|
||||
else:
|
||||
epc_data["house_number_distance"] = abs(
|
||||
epc_data["numeric_house_number"] - self.numeric_house_number
|
||||
)
|
||||
# # We add 1, just in case we have a 0 weight (e.g. comparing house number 7a to 7b, or 9A to 9)
|
||||
# epc_data["weight"] = 1 / (epc_data["house_number_distance"] + 1)
|
||||
# # If we have a home without a house number, fill that weight with average
|
||||
# epc_data["weight"] = epc_data["weight"].fillna(epc_data["weight"].mean())
|
||||
# # Finally, we might not have any house numbers whatsoever so everything could be
|
||||
# # missing, so we fill with 1
|
||||
# epc_data["weight"] = epc_data["weight"].fillna(1)
|
||||
# TODO: Testing
|
||||
# If the postcode is different from the initial postcode, it doesn't make sense to have
|
||||
# any weightings
|
||||
if all(pd.isnull(epc_data["house_number_distance"])) or (postcode != initial_postcode):
|
||||
epc_data["weight"] = 1
|
||||
else:
|
||||
epc_data["weight"] = 1 / np.sqrt(epc_data["house_number_distance"] + 1)
|
||||
epc_data["weight"] = epc_data["weight"].fillna(epc_data["weight"].mean())
|
||||
|
||||
estimation_property_type = self._estimate_str(
|
||||
key="property-type", estimation_data=epc_data
|
||||
) if property_type == "" else property_type
|
||||
|
||||
epc_built_form = self._estimate_str(
|
||||
key="built-form",
|
||||
estimation_data=epc_data[epc_data["property-type"] == estimation_property_type]
|
||||
)
|
||||
|
||||
if built_form == "Semi-Detached" and epc_built_form in ["End-Terraced", "Mid-Terraced"]:
|
||||
estimation_built_form = "End-Terraced"
|
||||
elif (built_form == "") or (pd.isnull(built_form)):
|
||||
estimation_built_form = epc_built_form
|
||||
else:
|
||||
estimation_built_form = built_form
|
||||
|
||||
# We handle some edge cases experiences with maisonettes - if built form is detatched, just filter
|
||||
# on maisonette
|
||||
# We also add some additional logic for Park homes, because they are far less common than other
|
||||
# property types
|
||||
|
||||
is_maisonette_with_bad_built_form = (estimation_property_type == "Maisonette") & (
|
||||
estimation_built_form in ["Detached", "Semi-Detached"]
|
||||
)
|
||||
|
||||
is_park_home_without_built_form = (estimation_property_type == "Park home") & (
|
||||
sum(epc_data["built-form"] == estimation_built_form) == 0
|
||||
)
|
||||
|
||||
has_missing_built_form = not estimation_built_form
|
||||
|
||||
if is_maisonette_with_bad_built_form or is_park_home_without_built_form or has_missing_built_form:
|
||||
epc_data = epc_data[epc_data["property-type"] == estimation_property_type]
|
||||
else:
|
||||
epc_data = epc_data[
|
||||
(epc_data["built-form"] == estimation_built_form) & (
|
||||
epc_data["property-type"] == estimation_property_type)
|
||||
]
|
||||
|
||||
if not epc_data.empty:
|
||||
return epc_data # Return the filtered data if it's not empty
|
||||
|
||||
# Shorten the postcode by one character for the next iteration
|
||||
postcode = postcode[:-1].rstrip()
|
||||
|
||||
# If loop finishes without a valid response, raise an exception
|
||||
raise Exception("Unable to find postcode data after trimming - investigate me")
|
||||
|
||||
def estimate_epc(self, property_type, built_form, lmks_to_drop=None):
|
||||
"""
|
||||
For a property that does not have an EPC, we retrieve the EPC data for the closest properties
|
||||
and estimate the EPC for the property in question.
|
||||
|
||||
Note - do we have postcodes with just a single address? We would need to use a different approach
|
||||
to find the closest homes
|
||||
:param property_type: This is the property type of the property we are estimating, that can be retrieved from
|
||||
the ordnance survey api
|
||||
:param built_form: This is the built form of the property we are estimating, that can be retrieved from
|
||||
the ordnance survey api
|
||||
:param lmks_to_drop: This is a list of LMK keys that should be dropped from the estimation process. This
|
||||
is used as an override for testing, to drop EPCs for the property we are testing
|
||||
:return:
|
||||
"""
|
||||
|
||||
# From the ordnance survey data, we want to determine the property type and then use only similar property
|
||||
# types for the estimation process
|
||||
epc_data = self.fetch_nearby_epcs(
|
||||
initial_postcode=self.postcode,
|
||||
lmks_to_drop=lmks_to_drop,
|
||||
built_form=built_form,
|
||||
property_type=property_type
|
||||
)
|
||||
|
||||
# If we have missing lodgment date, we fill it with inspection-date
|
||||
epc_data["lodgement-datetime"] = epc_data["lodgement-datetime"].fillna(epc_data["inspection-date"])
|
||||
# If we still have missing dates, we set it to the mean of the non NA dates
|
||||
epc_data["lodgement-datetime"] = epc_data["lodgement-datetime"].fillna(epc_data["lodgement-datetime"].mean())
|
||||
|
||||
# For each attribute, we need to determine the datatype and use an appropriate method
|
||||
# to estimate.
|
||||
estimated_epc = {}
|
||||
for key, vartype in vartypes.items():
|
||||
epc_data[key] = np.where(pd.isnull(epc_data[key]), None, epc_data[key])
|
||||
epc_data[key] = np.where(epc_data[key] == "", None, epc_data[key])
|
||||
estimation_data = epc_data[[key, "weight", "lodgement-datetime"]].copy()
|
||||
estimation_data = estimation_data[~pd.isnull(estimation_data[key])]
|
||||
estimation_data = estimation_data[~estimation_data[key].isin(Definitions.DATA_ANOMALY_MATCHES)]
|
||||
if vartype == "Int64":
|
||||
# We have some edge cases where we get the error "invalid literal for int() with base 10: '1.0'"
|
||||
# so this handles this
|
||||
estimation_data[key] = estimation_data[key].astype(float).astype(vartype)
|
||||
else:
|
||||
estimation_data[key] = estimation_data[key].astype(vartype)
|
||||
|
||||
if estimation_data.shape[0] == 0:
|
||||
estimated_epc[key] = None
|
||||
continue
|
||||
|
||||
if vartype == "Int64":
|
||||
estimated_value = self._estimate_int(estimation_data, key)
|
||||
elif vartype == "float":
|
||||
estimated_value = self._estimate_float(estimation_data, key)
|
||||
elif vartype == "str":
|
||||
estimated_value = self._estimate_str(estimation_data, key)
|
||||
else:
|
||||
raise NotImplementedError("estimation method not implemented for type")
|
||||
|
||||
estimated_epc[key] = estimated_value
|
||||
|
||||
# Insert an estimated lodgement datetime, with a weighted average
|
||||
estimated_epc["lodgement-datetime"] = self.calculate_weighted_lodgement_datetime(epc_data=epc_data)
|
||||
# Extract logement date
|
||||
# It is possible that there is still no lodgement date, so we need to handle this
|
||||
if pd.isnull(estimated_epc["lodgement-datetime"]):
|
||||
estimated_epc["lodgement-date"] = None
|
||||
else:
|
||||
estimated_epc["lodgement-date"] = estimated_epc["lodgement-datetime"].strftime("%Y-%m-%d")
|
||||
|
||||
estimated_epc["postcode"] = self.postcode
|
||||
estimated_epc["uprn"] = self.uprn
|
||||
estimated_epc["address"] = self.full_address
|
||||
# Indicate that this epc was estimated
|
||||
estimated_epc["estimated"] = True
|
||||
|
||||
return estimated_epc
|
||||
|
||||
@staticmethod
|
||||
def calculate_weighted_lodgement_datetime(epc_data):
|
||||
numeric_dates = pd.to_datetime(epc_data['lodgement-datetime']).view('int64')
|
||||
|
||||
# Calculate the weighted sum of dates
|
||||
weighted_sum = (numeric_dates * epc_data['weight']).sum()
|
||||
|
||||
# Calculate the sum of weights
|
||||
total_weights = epc_data['weight'].sum()
|
||||
|
||||
# Calculate the weighted mean in numeric format
|
||||
weighted_mean_numeric = weighted_sum / total_weights
|
||||
|
||||
# Convert the numeric weighted mean back to datetime
|
||||
weighted_mean_datetime = pd.to_datetime(weighted_mean_numeric)
|
||||
|
||||
return weighted_mean_datetime
|
||||
|
||||
@staticmethod
|
||||
def _estimate_int(estimation_data, key):
|
||||
return round(np.average(a=estimation_data[key], weights=estimation_data["weight"]))
|
||||
|
||||
@staticmethod
|
||||
def _estimate_float(estimation_data, key):
|
||||
return round(np.average(a=estimation_data[key], weights=estimation_data["weight"]), 2)
|
||||
|
||||
@staticmethod
|
||||
def _estimate_str(estimation_data, key):
|
||||
agg = estimation_data.groupby(key)["weight"].sum().reset_index()
|
||||
agg = agg[agg["weight"] == agg["weight"].max()]
|
||||
if agg.shape[0] != 1:
|
||||
# If we have multiple modes, we take the more recent data on average
|
||||
recent_grouped = estimation_data[
|
||||
estimation_data[key].isin(agg[key].values)
|
||||
].groupby(key)["lodgement-datetime"].mean()
|
||||
|
||||
newest_group = recent_grouped.idxmax()
|
||||
return newest_group
|
||||
|
||||
return agg[key].values[0]
|
||||
|
||||
def find_property(self, skip_os=False):
|
||||
"""
|
||||
This method will attempt to identify a property. It will, at first, use the EPC api to try and
|
||||
find the EPC for the property and the associated UPRN. If this fails, it will use the Ordnance Survey API to
|
||||
find the UPRN of the address.
|
||||
|
||||
Because no result may have been provided by the EPC api because of formatting issues with the address,
|
||||
if the ordnance survey api is used and the uprn retrieved, the EPC api is queried again with the UPRN, just
|
||||
as a final check to see if there is any EPC data.
|
||||
|
||||
If there is no EPC data, the epc data will be estimated based on the surrounding properties
|
||||
"""
|
||||
|
||||
# Step 1: use the epc api to find the property and uprn
|
||||
response = self.get_epc()
|
||||
|
||||
if response["status"] == 200:
|
||||
(
|
||||
self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean, self.uprn
|
||||
) = self.extract_epc_data(address=self.full_address)
|
||||
return
|
||||
|
||||
# Step 2: If we don't have an EPC, we use the ordnance survey api to find the uprn
|
||||
if skip_os:
|
||||
if self.ordnance_survey_client.property_type is not None:
|
||||
# We can try and estimate
|
||||
estimated_epc = self.estimate_epc(
|
||||
property_type=self.ordnance_survey_client.property_type,
|
||||
built_form=self.ordnance_survey_client.built_form
|
||||
)
|
||||
self.newest_epc = estimated_epc
|
||||
self.older_epcs = []
|
||||
self.full_sap_epc = {}
|
||||
|
||||
# Finally, set a standardised address 1 and postcode
|
||||
self.address_clean = self.ordnance_survey_client.address_os
|
||||
self.postcode_clean = self.ordnance_survey_client.postcode_os
|
||||
return
|
||||
|
||||
os_response = self.ordnance_survey_client.get_places_api()
|
||||
|
||||
if os_response["status"] != 200:
|
||||
# Investigate this if it happens
|
||||
raise Exception("Unable to find property - investigate me")
|
||||
|
||||
# Step 3: Now that we have a urpn, do another check against the epc api, this time searching with the uprn
|
||||
self.uprn = self.ordnance_survey_client.most_relevant_result["UPRN"]
|
||||
response = self.get_epc()
|
||||
if response["status"] == 200:
|
||||
(
|
||||
self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean, self.uprn
|
||||
) = self.extract_epc_data()
|
||||
return
|
||||
|
||||
# Step 4: If we still don't have an EPC, we estimate the EPC data
|
||||
self.full_address = self.ordnance_survey_client.most_relevant_result["ADDRESS"]
|
||||
estimated_epc = self.estimate_epc(
|
||||
property_type=self.ordnance_survey_client.property_type,
|
||||
built_form=self.ordnance_survey_client.built_form
|
||||
)
|
||||
self.newest_epc = estimated_epc
|
||||
self.older_epcs = []
|
||||
self.full_sap_epc = {}
|
||||
|
||||
# Finally, set a standardised address 1 and postcode
|
||||
self.address_clean = self.ordnance_survey_client.address_os
|
||||
self.postcode_clean = self.ordnance_survey_client.postcode_os
|
||||
return
|
||||
|
|
@ -8,9 +8,12 @@ class Settings(BaseSettings):
|
|||
SECRET_KEY: str
|
||||
ENVIRONMENT: str
|
||||
DATA_BUCKET: str
|
||||
PREDICTIONS_BUCKET: str
|
||||
SAP_PREDICTIONS_BUCKET: str
|
||||
CARBON_PREDICTIONS_BUCKET: str
|
||||
HEAT_PREDICTIONS_BUCKET: str
|
||||
PLAN_TRIGGER_BUCKET: str
|
||||
EPC_AUTH_TOKEN: str
|
||||
ORDNANCE_SURVEY_API_KEY: str
|
||||
DB_HOST: str
|
||||
DB_PASSWORD: str
|
||||
DB_USERNAME: str
|
||||
|
|
|
|||
50
backend/app/db/functions/non_intrusive_surveys.py
Normal file
50
backend/app/db/functions/non_intrusive_surveys.py
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
from sqlalchemy.orm import Session
|
||||
from backend.app.db.models.non_intrusive_surveys import NonIntrusiveSurvey, NonIntrusiveSurveyNotes
|
||||
|
||||
|
||||
def upload_non_intrusive_survey_notes(session: Session, non_invasive_notes, batch_size=500):
|
||||
"""
|
||||
Uploads a list of non-intrusive survey notes into the database in batches. Each dictionary in the list represents
|
||||
one survey and its associated notes.
|
||||
|
||||
:param session: SQLAlchemy Session object through which all database transactions are handled.
|
||||
:param non_invasive_notes: List of dictionaries where each dictionary contains survey details including 'uprn',
|
||||
'survey_date', 'surveyor', and other notes as key-value pairs.
|
||||
:param batch_size: The size of each batch to be processed (default is 500).
|
||||
:return: None
|
||||
"""
|
||||
|
||||
# Helper function to process each batch
|
||||
def process_batch(batch):
|
||||
surveys = []
|
||||
notes = []
|
||||
|
||||
for note in batch:
|
||||
survey = NonIntrusiveSurvey(
|
||||
uprn=note['uprn'],
|
||||
survey_date=note['survey_date'],
|
||||
surveyor=note['surveyor']
|
||||
)
|
||||
surveys.append(survey)
|
||||
|
||||
session.add_all(surveys)
|
||||
session.flush() # Get IDs for surveys
|
||||
|
||||
for note, survey in zip(batch, surveys):
|
||||
for key, value in note.items():
|
||||
if key not in ['uprn', 'survey_date', 'surveyor']:
|
||||
notes.append(NonIntrusiveSurveyNotes(
|
||||
survey_id=survey.id,
|
||||
title=key,
|
||||
note=value
|
||||
))
|
||||
|
||||
session.bulk_save_objects(notes)
|
||||
session.commit()
|
||||
|
||||
# Split the data into batches and process each batch
|
||||
total = len(non_invasive_notes)
|
||||
for start in range(0, total, batch_size):
|
||||
end = min(start + batch_size, total)
|
||||
batch = non_invasive_notes[start:end]
|
||||
process_batch(batch)
|
||||
|
|
@ -3,15 +3,17 @@ from backend.app.db.models.recommendations import Plan, PlanRecommendations, Rec
|
|||
from backend.app.db.models.portfolio import Portfolio
|
||||
|
||||
|
||||
def aggregate_portfolio_recommendations(session, portfolio_id: int):
|
||||
def aggregate_portfolio_recommendations(
|
||||
session, portfolio_id: int, total_valuation_increase: float, labour_days: float
|
||||
):
|
||||
# Aggregate multiple fields
|
||||
aggregates = (
|
||||
session.query(
|
||||
func.sum(Recommendation.estimated_cost).label("cost"),
|
||||
func.sum(Recommendation.total_work_hours).label("total_work_hours"),
|
||||
# For future usage we will aggregate multiple fields in this step
|
||||
# func.sum(Recommendation.heat_demand).label("total_heat_demand"),
|
||||
# func.sum(Recommendation.energy_savings).label("total_energy_savings")
|
||||
func.sum(Recommendation.adjusted_heat_demand).label("energy_savings"),
|
||||
func.sum(Recommendation.co2_equivalent_savings).label("co2_equivalent_savings"),
|
||||
func.sum(Recommendation.energy_cost_savings).label("energy_cost_savings"),
|
||||
)
|
||||
.join(PlanRecommendations, PlanRecommendations.recommendation_id == Recommendation.id)
|
||||
.join(Plan, Plan.id == PlanRecommendations.plan_id)
|
||||
|
|
@ -22,8 +24,9 @@ def aggregate_portfolio_recommendations(session, portfolio_id: int):
|
|||
aggregates_dict = {
|
||||
"cost": aggregates.cost or 0,
|
||||
"total_work_hours": aggregates.total_work_hours or 0,
|
||||
# "total_heat_demand": aggregates.total_heat_demand or 0,
|
||||
# "total_energy_savings": aggregates.total_energy_savings or 0
|
||||
"energy_savings": aggregates.energy_savings or 0,
|
||||
"co2_equivalent_savings": aggregates.co2_equivalent_savings or 0,
|
||||
"energy_cost_savings": aggregates.energy_cost_savings or 0,
|
||||
}
|
||||
|
||||
# Get the portfolio and update the fields
|
||||
|
|
@ -32,6 +35,10 @@ def aggregate_portfolio_recommendations(session, portfolio_id: int):
|
|||
for key, value in aggregates_dict.items():
|
||||
setattr(portfolio, key, value)
|
||||
|
||||
# Insert total valuation increase and labour days
|
||||
portfolio.property_valuation_increase = total_valuation_increase
|
||||
portfolio.labour_days = labour_days
|
||||
|
||||
# Merge the updated portfolio back into the session
|
||||
session.merge(portfolio)
|
||||
session.flush()
|
||||
|
|
|
|||
|
|
@ -3,13 +3,15 @@
|
|||
###
|
||||
import datetime
|
||||
import pytz
|
||||
from sqlalchemy.orm import Session
|
||||
from backend.app.db.models.portfolio import (
|
||||
PropertyModel, PropertyCreationStatus, PortfolioStatus, PropertyTargetsModel, PropertyDetailsEpcModel
|
||||
PropertyModel, PropertyCreationStatus, PortfolioStatus, PropertyTargetsModel, PropertyDetailsEpcModel,
|
||||
PropertyDetailsSpatial
|
||||
)
|
||||
from sqlalchemy.orm.exc import NoResultFound
|
||||
|
||||
|
||||
def create_property(session, portfolio_id: int, address: str, postcode: str) -> (int, bool):
|
||||
def create_property(session: Session, portfolio_id: int, address: str, postcode: str, uprn: str) -> (int, bool):
|
||||
"""
|
||||
This function will create a record for the property in the database if it does not exist.
|
||||
If it does exist, it will just update the updated_at field.
|
||||
|
|
@ -23,7 +25,7 @@ def create_property(session, portfolio_id: int, address: str, postcode: str) ->
|
|||
try:
|
||||
# Attempt to fetch the existing property
|
||||
existing_property = session.query(PropertyModel).filter_by(
|
||||
address=address, postcode=postcode, portfolio_id=portfolio_id
|
||||
uprn=uprn, portfolio_id=portfolio_id
|
||||
).one()
|
||||
|
||||
# Update the 'updated_at' field
|
||||
|
|
@ -41,6 +43,7 @@ def create_property(session, portfolio_id: int, address: str, postcode: str) ->
|
|||
address=address,
|
||||
postcode=postcode,
|
||||
portfolio_id=portfolio_id,
|
||||
uprn=uprn,
|
||||
creation_status=PropertyCreationStatus.LOADING,
|
||||
status=PortfolioStatus.ASSESSMENT.value,
|
||||
has_pre_condition_report=False,
|
||||
|
|
@ -55,7 +58,9 @@ def create_property(session, portfolio_id: int, address: str, postcode: str) ->
|
|||
return new_property.id, True
|
||||
|
||||
|
||||
def create_property_targets(session, property_id: int, portfolio_id: int, epc_target=None, heat_demand_target=None):
|
||||
def create_property_targets(
|
||||
session: Session, property_id: int, portfolio_id: int, epc_target=None, heat_demand_target=None
|
||||
):
|
||||
"""
|
||||
This function will create a record for the property targets in the database if it does not exist.
|
||||
:param session: The database session
|
||||
|
|
@ -78,7 +83,9 @@ def create_property_targets(session, property_id: int, portfolio_id: int, epc_ta
|
|||
return True
|
||||
|
||||
|
||||
def update_property_data(session, property_id: int, portfolio_id: int, property_data: dict):
|
||||
def update_property_data(
|
||||
session: Session, property_id: int, portfolio_id: int, property_data: dict
|
||||
):
|
||||
now = datetime.datetime.now(pytz.utc)
|
||||
|
||||
try:
|
||||
|
|
@ -103,7 +110,9 @@ def update_property_data(session, property_id: int, portfolio_id: int, property_
|
|||
return True
|
||||
|
||||
|
||||
def create_property_details_epc(session, property_details_epc: dict):
|
||||
def create_property_details_epc(
|
||||
session: Session, property_details_epc: dict
|
||||
):
|
||||
"""
|
||||
This function will create or update a record for the property details EPC in the database.
|
||||
:param session: The database session
|
||||
|
|
@ -128,3 +137,36 @@ def create_property_details_epc(session, property_details_epc: dict):
|
|||
session.flush()
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def update_or_create_property_spatial_details(session: Session, uprn: int, property_details_spatial: dict):
|
||||
"""
|
||||
Update an existing property details record or create a new one based on the UPRN.
|
||||
|
||||
:param session: The SQLAlchemy session for database interaction.
|
||||
:param uprn: The unique property reference number (UPRN) of the property.
|
||||
:param property_details_spatial: A dictionary containing the spatial property details to store or update.
|
||||
:return: True if the operation is successful, otherwise raises an exception.
|
||||
"""
|
||||
|
||||
try:
|
||||
# Attempt to fetch the existing property details
|
||||
existing_property_details = session.query(PropertyDetailsSpatial).filter_by(
|
||||
uprn=uprn
|
||||
).one()
|
||||
|
||||
# Update the fields with the data in property_details
|
||||
for key, value in property_details_spatial.items():
|
||||
setattr(existing_property_details, key, value)
|
||||
|
||||
# Merge the updated property details back into the session and flush
|
||||
session.merge(existing_property_details)
|
||||
session.flush()
|
||||
|
||||
except NoResultFound:
|
||||
# Create a new record if not found
|
||||
new_property_details = PropertyDetailsSpatial(uprn=uprn, **property_details_spatial)
|
||||
session.add(new_property_details)
|
||||
session.flush()
|
||||
|
||||
return True
|
||||
|
|
|
|||
|
|
@ -80,7 +80,13 @@ def upload_recommendations(session: Session, recommendations_to_upload, property
|
|||
"starting_u_value": rec.get("starting_u_value"),
|
||||
"new_u_value": rec.get("new_u_value"),
|
||||
"sap_points": rec["sap_points"],
|
||||
"heat_demand": rec["heat_demand"],
|
||||
"adjusted_heat_demand": rec["adjusted_heat_demand"],
|
||||
"co2_equivalent_savings": rec["co2_equivalent_savings"],
|
||||
"total_work_hours": rec["labour_hours"],
|
||||
"energy_cost_savings": rec["energy_cost_savings"],
|
||||
"labour_days": rec["labour_days"],
|
||||
"already_installed": rec["already_installed"],
|
||||
}
|
||||
for rec in recommendations_to_upload
|
||||
]
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ class MaterialType(enum.Enum):
|
|||
exposed_floor_insulation = "exposed_floor_insulation"
|
||||
flat_roof_insulation = "flat_roof_insulation"
|
||||
room_roof_insulation = "room_roof_insulation"
|
||||
windows_glazing = "windows_glazing"
|
||||
|
||||
iwi_wall_demolition = "iwi_wall_demolition"
|
||||
iwi_vapour_barrier = "iwi_vapour_barrier"
|
||||
|
|
@ -32,6 +33,10 @@ class MaterialType(enum.Enum):
|
|||
ewi_wall_demolition = "ewi_wall_demolition"
|
||||
ewi_wall_preparation = "ewi_wall_preparation"
|
||||
ewi_wall_redecoration = "ewi_wall_redecoration"
|
||||
low_energy_lighting_installation = "low_energy_lighting_installation"
|
||||
flat_roof_preparation = "flat_roof_preparation"
|
||||
flat_roof_vapour_barrier = "flat_roof_vapour_barrier"
|
||||
flat_roof_waterproofing = "flat_roof_waterproofing"
|
||||
|
||||
|
||||
class DepthUnit(enum.Enum):
|
||||
|
|
@ -42,6 +47,7 @@ class CostUnit(enum.Enum):
|
|||
gbp_sq_meter = "gbp_sq_meter"
|
||||
gbp_per_unit = "gbp_per_unit"
|
||||
gbp_per_m2 = "gbp_per_m2"
|
||||
gbp_per_m = "gbp_per_m"
|
||||
|
||||
|
||||
class RValueUnit(enum.Enum):
|
||||
|
|
|
|||
22
backend/app/db/models/non_intrusive_surveys.py
Normal file
22
backend/app/db/models/non_intrusive_surveys.py
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
from sqlalchemy import Column, BigInteger, String, TIMESTAMP, ForeignKey, Integer
|
||||
from sqlalchemy.orm import declarative_base
|
||||
|
||||
Base = declarative_base()
|
||||
|
||||
|
||||
class NonIntrusiveSurvey(Base):
|
||||
__tablename__ = 'non_intrusive_survey'
|
||||
|
||||
id = Column(BigInteger, primary_key=True, autoincrement=True)
|
||||
uprn = Column(Integer, nullable=False)
|
||||
survey_date = Column(TIMESTAMP, nullable=False)
|
||||
surveyor = Column(String, nullable=False)
|
||||
|
||||
|
||||
class NonIntrusiveSurveyNotes(Base):
|
||||
__tablename__ = 'non_intrusive_survey_notes'
|
||||
|
||||
id = Column(BigInteger, primary_key=True, autoincrement=True)
|
||||
survey_id = Column(BigInteger, ForeignKey('non_intrusive_survey.id'), nullable=False)
|
||||
title = Column(String, nullable=False)
|
||||
note = Column(String, nullable=False)
|
||||
|
|
@ -42,6 +42,7 @@ class Portfolio(Base):
|
|||
property_valuation_increase = Column(Float) # Unit is always £ so we don't need to store the unit for the moment
|
||||
rental_yield_increase = Column(Float) # Unit is always £ so we don't need to store the unit for the moment
|
||||
total_work_hours = Column(Float)
|
||||
labour_days = Column(Float)
|
||||
created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
|
||||
updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
|
||||
|
||||
|
|
@ -85,6 +86,7 @@ class PropertyModel(Base):
|
|||
tenure = Column(Text)
|
||||
current_epc_rating = Column(Enum(Epc))
|
||||
current_sap_points = Column(Float)
|
||||
current_valuation = Column(Float)
|
||||
|
||||
|
||||
class FeatureRating(enum.Enum):
|
||||
|
|
@ -151,6 +153,21 @@ class PropertyDetailsEpcModel(Base):
|
|||
energy_tariff = Column(Text)
|
||||
primary_energy_consumption = Column(Float)
|
||||
co2_emissions = Column(Float)
|
||||
adjusted_energy_consumption = Column(Float)
|
||||
estimated = Column(Boolean, default=False)
|
||||
|
||||
|
||||
class PropertyDetailsSpatial(Base):
|
||||
__tablename__ = "property_details_spatial"
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
uprn = Column(Integer, nullable=False)
|
||||
x_coordinate = Column(Float)
|
||||
y_coordinate = Column(Float)
|
||||
latitude = Column(Float)
|
||||
longitude = Column(Float)
|
||||
conservation_status = Column(Boolean)
|
||||
is_listed_building = Column(Boolean)
|
||||
is_heritage_building = Column(Boolean)
|
||||
|
||||
|
||||
class PropertyDetailsMeter(Base):
|
||||
|
|
|
|||
|
|
@ -22,12 +22,15 @@ class Recommendation(Base):
|
|||
new_u_value = Column(Float)
|
||||
sap_points = Column(Float)
|
||||
heat_demand = Column(Float)
|
||||
adjusted_heat_demand = Column(Float)
|
||||
co2_equivalent_savings = Column(Float)
|
||||
energy_savings = Column(Float)
|
||||
energy_cost_savings = Column(Float)
|
||||
property_valuation_increase = Column(Float)
|
||||
rental_yield_increase = Column(Float)
|
||||
total_work_hours = Column(Float)
|
||||
labour_days = Column(Float)
|
||||
already_installed = Column(Boolean, nullable=False, default=False)
|
||||
|
||||
|
||||
class RecommendationMaterials(Base):
|
||||
|
|
@ -51,6 +54,9 @@ class Plan(Base):
|
|||
property_id = Column(BigInteger, ForeignKey(PropertyModel.id), nullable=False)
|
||||
created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
|
||||
is_default = Column(Boolean, nullable=False)
|
||||
valuation_increase_lower_bound = Column(Float)
|
||||
valuation_increase_upper_bound = Column(Float)
|
||||
valuation_increase_average = Column(Float)
|
||||
|
||||
|
||||
class PlanRecommendations(Base):
|
||||
|
|
|
|||
|
|
@ -1,7 +1,9 @@
|
|||
from datetime import datetime
|
||||
|
||||
from tqdm import tqdm
|
||||
import pandas as pd
|
||||
from epc_api.client import EpcClient
|
||||
from etl.epc.Record import EPCRecord
|
||||
from backend.SearchEpc import SearchEpc
|
||||
from fastapi import APIRouter, Depends
|
||||
from sqlalchemy.exc import IntegrityError, OperationalError
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
|
@ -12,7 +14,8 @@ from backend.app.db.connection import db_engine
|
|||
from backend.app.db.functions.materials_functions import get_materials
|
||||
from backend.app.db.functions.portfolio_functions import aggregate_portfolio_recommendations
|
||||
from backend.app.db.functions.property_functions import (
|
||||
create_property, create_property_details_epc, create_property_targets, update_property_data
|
||||
create_property, create_property_details_epc, create_property_targets, update_property_data,
|
||||
update_or_create_property_spatial_details
|
||||
)
|
||||
from backend.app.db.functions.recommendations_functions import (
|
||||
create_plan, create_plan_recommendations, upload_recommendations
|
||||
|
|
@ -20,29 +23,39 @@ from backend.app.db.functions.recommendations_functions import (
|
|||
from backend.app.db.models.portfolio import rating_lookup
|
||||
from backend.app.dependencies import validate_token
|
||||
from backend.app.plan.schemas import PlanTriggerRequest
|
||||
from backend.app.plan.utils import (
|
||||
create_recommendation_scoring_data, get_cleaned, insert_temp_recommendation_id
|
||||
)
|
||||
from backend.app.utils import epc_to_sap_lower_bound, read_csv_from_s3, read_parquet_from_s3
|
||||
from backend.app.plan.utils import get_cleaned
|
||||
from backend.app.utils import epc_to_sap_lower_bound, sap_to_epc
|
||||
|
||||
from backend.ml_models.sap_change_model.api import SAPChangeModelAPI
|
||||
from backend.ml_models.api import ModelApi
|
||||
from backend.Property import Property
|
||||
from etl.epc.DataProcessor import DataProcessor
|
||||
from etl.epc.settings import COLUMNS_TO_MERGE_ON
|
||||
from recommendations.FloorRecommendations import FloorRecommendations
|
||||
from recommendations.RoofRecommendations import RoofRecommendations
|
||||
from recommendations.VentilationRecommendations import VentilationRecommendations
|
||||
from recommendations.FireplaceRecommendations import FireplaceRecommendations
|
||||
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
|
||||
|
||||
from recommendations.optimiser.CostOptimiser import CostOptimiser
|
||||
from recommendations.optimiser.GainOptimiser import GainOptimiser
|
||||
from recommendations.optimiser.optimiser_functions import prepare_input_measures
|
||||
from recommendations.WallRecommendations import WallRecommendations
|
||||
from recommendations.Recommendations import Recommendations
|
||||
from utils.logger import setup_logger
|
||||
from utils.s3 import read_dataframe_from_s3_parquet
|
||||
from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3
|
||||
from backend.ml_models.Valuation import PropertyValuation
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
BATCH_SIZE = 5
|
||||
SCORING_BATCH_SIZE = 400
|
||||
|
||||
|
||||
def patch_epc(patch, epc_records):
|
||||
"""
|
||||
This utility function is useful to patch the epc data if we have data from the customer
|
||||
:return:
|
||||
"""
|
||||
|
||||
for patch_variable, patch_value in patch.items():
|
||||
if patch_variable in epc_records["original_epc"]:
|
||||
epc_records["original_epc"][patch_variable] = patch_value
|
||||
|
||||
return epc_records
|
||||
|
||||
|
||||
router = APIRouter(
|
||||
prefix="/plan",
|
||||
|
|
@ -58,31 +71,52 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
session = sessionmaker(bind=db_engine)()
|
||||
created_at = datetime.now().isoformat()
|
||||
|
||||
# TODO: We should store the trigger file path in the database with the plan so we can track the file that
|
||||
# triggered the plan
|
||||
|
||||
# TODO: if the measure is already installed, it should actually be the very first phase
|
||||
|
||||
try:
|
||||
session.begin()
|
||||
logger.info("Getting the inputs")
|
||||
epc_client = EpcClient(auth_token=get_settings().EPC_AUTH_TOKEN)
|
||||
plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path)
|
||||
uprn_filenames = read_dataframe_from_s3_parquet(
|
||||
bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
|
||||
)
|
||||
cleaning_data = read_parquet_from_s3(
|
||||
# If we have patches or overrides, we should read them in here
|
||||
patches = []
|
||||
if body.patches_file_path:
|
||||
patches = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.patches_file_path)
|
||||
|
||||
already_installed = []
|
||||
if body.already_installed_file_path:
|
||||
already_installed = read_csv_from_s3(
|
||||
bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.already_installed_file_path
|
||||
)
|
||||
|
||||
cleaning_data = read_dataframe_from_s3_parquet(
|
||||
bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
|
||||
)
|
||||
|
||||
input_properties = []
|
||||
for config in plan_input:
|
||||
for config in tqdm(plan_input):
|
||||
# We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
|
||||
# TODO: implment validation. We should also standardise postcode and address in some fashion as
|
||||
# a postcode of abcdef would be considered different to ABCDEF
|
||||
uprn = config.get("uprn", None)
|
||||
if uprn:
|
||||
uprn = int(float(uprn))
|
||||
|
||||
epc_searcher = SearchEpc(
|
||||
address1=config["address"],
|
||||
postcode=config["postcode"],
|
||||
uprn=uprn,
|
||||
auth_token=get_settings().EPC_AUTH_TOKEN,
|
||||
os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY
|
||||
)
|
||||
epc_searcher.find_property()
|
||||
# Create a record in db
|
||||
property_id, is_new = create_property(
|
||||
session, portfolio_id=body.portfolio_id, address=config['address'], postcode=config['postcode']
|
||||
session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
|
||||
)
|
||||
# if a new record was not created, we don't produduce recommendations
|
||||
if not is_new:
|
||||
continue
|
||||
# TODO: Need to add heat demand target
|
||||
|
||||
create_property_targets(
|
||||
session,
|
||||
property_id=property_id,
|
||||
|
|
@ -91,24 +125,41 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
heat_demand_target=None
|
||||
)
|
||||
|
||||
epc_records = {
|
||||
'original_epc': epc_searcher.newest_epc.copy(),
|
||||
'full_sap_epc': epc_searcher.full_sap_epc.copy(),
|
||||
'old_data': epc_searcher.older_epcs.copy(),
|
||||
}
|
||||
|
||||
patch = next((
|
||||
x for x in patches if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
|
||||
), {})
|
||||
epc_records = patch_epc(patch, epc_records)
|
||||
|
||||
prepared_epc = EPCRecord(
|
||||
epc_records=epc_records,
|
||||
run_mode="newdata",
|
||||
cleaning_data=cleaning_data
|
||||
)
|
||||
|
||||
property_already_installed = next((
|
||||
x for x in already_installed if
|
||||
(x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
|
||||
), {})
|
||||
input_properties.append(
|
||||
Property(
|
||||
postcode=config['postcode'],
|
||||
address1=config['address'],
|
||||
epc_client=epc_client,
|
||||
id=property_id
|
||||
id=property_id,
|
||||
address=epc_searcher.address_clean,
|
||||
postcode=epc_searcher.postcode_clean,
|
||||
epc_record=prepared_epc,
|
||||
already_installed=property_already_installed,
|
||||
**Property.extract_kwargs(config)
|
||||
)
|
||||
)
|
||||
|
||||
if not input_properties:
|
||||
return Response(status_code=204)
|
||||
|
||||
logger.info("Getting EPC, and spatial data")
|
||||
for p in input_properties:
|
||||
p.search_address_epc()
|
||||
p.set_year_built()
|
||||
p.get_spatial_data(uprn_filenames)
|
||||
|
||||
# The materials data could be cached or local so we don't need to make
|
||||
# consistent requests to the backend for
|
||||
# the same data
|
||||
|
|
@ -116,173 +167,112 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
materials = get_materials(session)
|
||||
cleaned = get_cleaned()
|
||||
|
||||
uprn_filenames = read_dataframe_from_s3_parquet(
|
||||
bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
|
||||
)
|
||||
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket=get_settings().DATA_BUCKET)
|
||||
|
||||
logger.info("Getting spatial data")
|
||||
for p in input_properties:
|
||||
p.get_spatial_data(uprn_filenames)
|
||||
|
||||
logger.info("Getting components and epc recommendations")
|
||||
|
||||
# TODO: Move this to a class. We probably want a Recommender class which takes the injects the optimisers
|
||||
# in as a dependency and then the optimisers can take the input measures in as part of the setup() method
|
||||
|
||||
recommendations = {}
|
||||
recommendations_scoring_data = []
|
||||
|
||||
for p in input_properties:
|
||||
representative_recommendations = {}
|
||||
for p in tqdm(input_properties):
|
||||
|
||||
# Property recommendations
|
||||
p.get_components(cleaned)
|
||||
p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
|
||||
|
||||
property_recommendations = []
|
||||
|
||||
# Floor recommendations
|
||||
floor_recommender = FloorRecommendations(property_instance=p, materials=materials)
|
||||
floor_recommender.recommend()
|
||||
|
||||
if floor_recommender.recommendations:
|
||||
property_recommendations.append(floor_recommender.recommendations)
|
||||
|
||||
# Wall recommendations
|
||||
|
||||
wall_recomender = WallRecommendations(property_instance=p, materials=materials)
|
||||
wall_recomender.recommend()
|
||||
|
||||
if wall_recomender.recommendations:
|
||||
property_recommendations.append(wall_recomender.recommendations)
|
||||
|
||||
# Roof recommendations
|
||||
roof_recommender = RoofRecommendations(property_instance=p, materials=materials)
|
||||
roof_recommender.recommend()
|
||||
|
||||
if roof_recommender.recommendations:
|
||||
property_recommendations.append(roof_recommender.recommendations)
|
||||
|
||||
# Ventilation recommendations
|
||||
ventilation_recomender = VentilationRecommendations(
|
||||
property_instance=p,
|
||||
materials=[part for part in materials if part["type"] == "mechanical_ventilation"]
|
||||
)
|
||||
ventilation_recomender.recommend()
|
||||
|
||||
if ventilation_recomender.recommendation:
|
||||
property_recommendations.append(ventilation_recomender.recommendation)
|
||||
|
||||
# Fireplace sealing recommendations
|
||||
fireplace_recommender = FireplaceRecommendations(property_instance=p)
|
||||
fireplace_recommender.recommend()
|
||||
|
||||
if fireplace_recommender.recommendation:
|
||||
property_recommendations.append(fireplace_recommender.recommendation)
|
||||
|
||||
# We insert temporary ids into the recommendations which is important for the optimiser later
|
||||
property_recommendations = insert_temp_recommendation_id(property_recommendations)
|
||||
recommender = Recommendations(property_instance=p, materials=materials, exclusions=body.exclusions)
|
||||
property_recommendations, property_representative_recommendations = recommender.recommend()
|
||||
|
||||
if not property_recommendations:
|
||||
continue
|
||||
|
||||
recommendations[p.id] = property_recommendations
|
||||
representative_recommendations[p.id] = property_representative_recommendations
|
||||
|
||||
# Finally, we'll prepare data for predicting the impact on SAP
|
||||
data_processor = DataProcessor(None, newdata=True)
|
||||
data_processor.insert_data(pd.DataFrame([p.get_model_data()]))
|
||||
# TODO: Temp
|
||||
if data_processor.data["UPRN"].values[0] == "":
|
||||
data_processor.data["UPRN"] = 0
|
||||
p.create_base_difference_epc_record(cleaned_lookup=cleaned)
|
||||
p.adjust_difference_record_with_recommendations(
|
||||
property_recommendations, property_representative_recommendations
|
||||
)
|
||||
|
||||
data_processor.pre_process()
|
||||
recommendations_scoring_data.extend(p.recommendations_scoring_data)
|
||||
|
||||
starting_epc_data = data_processor.get_component_features(suffix="_STARTING")
|
||||
ending_epc_data = data_processor.get_component_features(suffix="_ENDING")
|
||||
fixed_data = data_processor.get_fixed_features()
|
||||
|
||||
# We update the ending record with the recommended updates and we set lodgement date to today
|
||||
ending_epc_data["DAYS_TO_ENDING"] = data_processor.calculate_days_to(created_at)
|
||||
|
||||
for recommendations_by_type in property_recommendations:
|
||||
for i, rec in enumerate(recommendations_by_type):
|
||||
scoring_dict = create_recommendation_scoring_data(
|
||||
property=p,
|
||||
recommendation=rec,
|
||||
starting_epc_data=starting_epc_data,
|
||||
ending_epc_data=ending_epc_data,
|
||||
fixed_data=fixed_data,
|
||||
)
|
||||
|
||||
recommendations_scoring_data.append(scoring_dict)
|
||||
|
||||
# cleanup
|
||||
del data_processor
|
||||
# TODO: Make sure that number_habitable_rooms has been dropped
|
||||
|
||||
logger.info("Preparing data for scoring in sap change api")
|
||||
recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
|
||||
|
||||
# Perform the same cleaning as in the model - first clean number of room variables though
|
||||
recommendations_scoring_data = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=recommendations_scoring_data,
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
|
||||
colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
|
||||
recommendations_scoring_data = recommendations_scoring_data.drop(
|
||||
columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
|
||||
"carbon_ending"]
|
||||
)
|
||||
|
||||
recommendations_scoring_data = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=recommendations_scoring_data,
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
|
||||
).drop(columns=["LOCAL_AUTHORITY"])
|
||||
model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
|
||||
|
||||
recommendations_scoring_data = DataProcessor.clean_missings_after_description_process(
|
||||
recommendations_scoring_data,
|
||||
ignore_cols=[c for c in recommendations_scoring_data.columns if ("thermal_transmittance" in c) or (
|
||||
"insulation_thickness" in c) or ("ENERGY_EFF" in c)]
|
||||
)
|
||||
|
||||
recommendations_scoring_data = DataProcessor.clean_efficiency_variables(recommendations_scoring_data)
|
||||
|
||||
sap_change_model_api = SAPChangeModelAPI(portfolio_id=body.portfolio_id, timestamp=created_at)
|
||||
file_location = sap_change_model_api.upload_scoring_data(
|
||||
df=recommendations_scoring_data, bucket=get_settings().DATA_BUCKET
|
||||
)
|
||||
response = sap_change_model_api.predict(
|
||||
file_location="s3://{DATA_BUCKET}/".format(DATA_BUCKET=get_settings().DATA_BUCKET) + file_location,
|
||||
)
|
||||
|
||||
# Retrieve the predictions
|
||||
predictions = pd.DataFrame(
|
||||
read_parquet_from_s3(
|
||||
bucket_name=get_settings().PREDICTIONS_BUCKET,
|
||||
file_key=response["storage_filepath"].split(get_settings().PREDICTIONS_BUCKET + "/")[1]
|
||||
all_predictions = {
|
||||
"sap_change_predictions": pd.DataFrame(),
|
||||
"heat_demand_predictions": pd.DataFrame(),
|
||||
"carbon_change_predictions": pd.DataFrame()
|
||||
}
|
||||
to_loop_over = range(0, recommendations_scoring_data.shape[0], SCORING_BATCH_SIZE)
|
||||
for chunk in tqdm(to_loop_over, total=len(to_loop_over)):
|
||||
predictions_dict = model_api.predict_all(
|
||||
df=recommendations_scoring_data.iloc[chunk:chunk + SCORING_BATCH_SIZE],
|
||||
bucket=get_settings().DATA_BUCKET,
|
||||
prediction_buckets={
|
||||
"sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET,
|
||||
"heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET,
|
||||
"carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
predictions["predictions"] = predictions["predictions"].astype(float).round(1)
|
||||
predictions[['property_id', 'recommendation_id']] = predictions['id'].str.split('+', expand=True)
|
||||
# Append the predictions to the predictions dictionary
|
||||
for key, scored in predictions_dict.items():
|
||||
all_predictions[key] = pd.concat([all_predictions[key], scored])
|
||||
|
||||
# Insert the predictions into the recommendations and run the optimiser
|
||||
# TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a
|
||||
# possibility with heating system
|
||||
# TODO: After optimising, if there are any cheap, quick win measures (e.g. insulate water tank with hot water
|
||||
# cylinder jacket), we should add these to the recommendations as default
|
||||
logger.info("Optimising recommendations")
|
||||
for property_id in recommendations.keys():
|
||||
|
||||
property = [p for p in input_properties if p.id == property_id][0]
|
||||
property_predictions = predictions[predictions["property_id"] == str(property_id)]
|
||||
property_instance = [p for p in input_properties if p.id == property_id][0]
|
||||
|
||||
for recommendations_by_type in recommendations[property_id]:
|
||||
for rec in recommendations_by_type:
|
||||
new_sap = property_predictions[property_predictions["recommendation_id"] == str(
|
||||
rec["recommendation_id"]
|
||||
)]["predictions"].values[0]
|
||||
recommendations_with_impact, current_adjusted_energy, expected_adjusted_energy = (
|
||||
Recommendations.calculate_recommendation_impact(
|
||||
property_instance=property_instance,
|
||||
all_predictions=all_predictions,
|
||||
recommendations=recommendations
|
||||
)
|
||||
)
|
||||
|
||||
rec["sap_points"] = new_sap - float(property.data["current-energy-efficiency"])
|
||||
# Store the resulting adjusted energy in the property instance
|
||||
property_instance.set_adjusted_energy(
|
||||
current_adjusted_energy=current_adjusted_energy,
|
||||
expected_adjusted_energy=expected_adjusted_energy
|
||||
)
|
||||
|
||||
if rec["sap_points"] is None:
|
||||
raise ValueError("Sap points missing")
|
||||
input_measures = prepare_input_measures(recommendations_with_impact, body.goal)
|
||||
|
||||
input_measures = prepare_input_measures(recommendations[property_id], body.goal)
|
||||
current_sap_points = int(property_instance.data["current-energy-efficiency"])
|
||||
target_sap_points = epc_to_sap_lower_bound(body.goal_value)
|
||||
sap_gain = CostOptimiser.calculate_sap_gain_with_slack(target_sap_points - current_sap_points)
|
||||
|
||||
if body.budget:
|
||||
optimiser = GainOptimiser(input_measures, max_cost=body.budget)
|
||||
optimiser = GainOptimiser(
|
||||
input_measures, max_cost=body.budget, max_gain=sap_gain if sap_gain > 0 else 0
|
||||
)
|
||||
else:
|
||||
# The minimum gain is the minimum number of SAP points required to get to the target SAP band
|
||||
current_sap_points = int(property.data["current-energy-efficiency"])
|
||||
target_sap_points = epc_to_sap_lower_bound(body.goal_value)
|
||||
|
||||
# If the gain is negative, the optimiser will return an empty solution
|
||||
optimiser = CostOptimiser(
|
||||
input_measures, min_gain=target_sap_points - current_sap_points
|
||||
input_measures,
|
||||
min_gain=sap_gain
|
||||
)
|
||||
|
||||
optimiser.setup()
|
||||
|
|
@ -291,13 +281,26 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
|
||||
selected_recommendations = {r["id"] for r in solution}
|
||||
|
||||
# If wall insulation is selected, we also include mechanical ventilation as a best practice measure
|
||||
if any(x in [r["type"] for r in solution] for x in [
|
||||
"internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation"
|
||||
]):
|
||||
ventilation_rec = next(
|
||||
(r[0] for r in recommendations_with_impact if r[0]["type"] == "mechanical_ventilation"),
|
||||
None
|
||||
)
|
||||
|
||||
# If a matching recommendation was found, add its ID to the selected recommendations
|
||||
if ventilation_rec:
|
||||
selected_recommendations.add(ventilation_rec["recommendation_id"])
|
||||
|
||||
# We'll use the set of selected recommendations to filter the recommendations to upload
|
||||
final_recommendations = [
|
||||
[
|
||||
{**rec, "default": True if rec["recommendation_id"] in selected_recommendations else False}
|
||||
for rec in recommendations_by_type
|
||||
]
|
||||
for recommendations_by_type in recommendations[property_id]
|
||||
for recommendations_by_type in recommendations_with_impact
|
||||
]
|
||||
|
||||
# We'll also unlist the recommendations so they're a bit easier to handle from here onwards
|
||||
|
|
@ -311,6 +314,7 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
# 3) the recommendations
|
||||
|
||||
logger.info("Uploading recommendations to the database")
|
||||
property_valuation_increases = []
|
||||
session.commit()
|
||||
for i in range(0, len(input_properties), BATCH_SIZE):
|
||||
try:
|
||||
|
|
@ -318,30 +322,43 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
batch_properties = input_properties[i:i + BATCH_SIZE]
|
||||
|
||||
for p in batch_properties:
|
||||
recommendations_to_upload = recommendations.get(p.id, [])
|
||||
default_recommendations = [r for r in recommendations_to_upload if r["default"]]
|
||||
total_sap_points = sum([r["sap_points"] for r in default_recommendations])
|
||||
new_sap_points = float(p.data["current-energy-efficiency"]) + total_sap_points
|
||||
new_epc = sap_to_epc(new_sap_points)
|
||||
|
||||
valuations = PropertyValuation.estimate(property_instance=p, target_epc=new_epc)
|
||||
|
||||
# Your existing operations
|
||||
property_details_epc = p.get_property_details_epc(
|
||||
portfolio_id=body.portfolio_id, rating_lookup=rating_lookup
|
||||
portfolio_id=body.portfolio_id, rating_lookup=rating_lookup,
|
||||
)
|
||||
create_property_details_epc(session, property_details_epc)
|
||||
|
||||
# TODO: TEMP
|
||||
if p.data["uprn"] == "":
|
||||
print("Get rid of me!")
|
||||
p.data["uprn"] = 0
|
||||
update_or_create_property_spatial_details(session, p.uprn, p.spatial)
|
||||
|
||||
property_data = p.get_full_property_data()
|
||||
property_data = p.get_full_property_data(current_valuation=valuations["current_value"])
|
||||
update_property_data(
|
||||
session, property_id=p.id, portfolio_id=body.portfolio_id, property_data=property_data
|
||||
)
|
||||
|
||||
recommendations_to_upload = recommendations.get(p.id, [])
|
||||
if not recommendations_to_upload:
|
||||
continue
|
||||
|
||||
new_plan_id = create_plan(session, {
|
||||
"portfolio_id": body.portfolio_id,
|
||||
"property_id": p.id,
|
||||
"is_default": True
|
||||
"is_default": True,
|
||||
"valuation_increase_lower_bound": (
|
||||
valuations["lower_bound_increased_value"] - valuations["current_value"]
|
||||
),
|
||||
"valuation_increase_upper_bound": (
|
||||
valuations["upper_bound_increased_value"] - valuations["current_value"]
|
||||
),
|
||||
"valuation_increase_average": (
|
||||
valuations["average_increased_value"] - valuations["current_value"]
|
||||
),
|
||||
})
|
||||
|
||||
uploaded_recommendation_ids = upload_recommendations(session, recommendations_to_upload, p.id)
|
||||
|
|
@ -350,6 +367,10 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
session, plan_id=new_plan_id, recommendation_ids=uploaded_recommendation_ids
|
||||
)
|
||||
|
||||
property_valuation_increases.append(
|
||||
valuations["average_increased_value"] - valuations["current_value"]
|
||||
)
|
||||
|
||||
# Commit the session after each batch
|
||||
session.commit()
|
||||
|
||||
|
|
@ -365,7 +386,18 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
# way to do this, but it's the simplest and will be a process that we can re-use since when we change a
|
||||
# recommendation from being default to not default, we'll need to re-run this process to re-calculate the
|
||||
# the portfolion level impact
|
||||
aggregate_portfolio_recommendations(session, portfolio_id=body.portfolio_id)
|
||||
|
||||
total_valuation_increase = sum(property_valuation_increases)
|
||||
labour_days = round(max(
|
||||
[sum(r["labour_days"] for r in rec_group if r["default"]) for p_id, rec_group in recommendations.items()]
|
||||
))
|
||||
|
||||
aggregate_portfolio_recommendations(
|
||||
session,
|
||||
portfolio_id=body.portfolio_id,
|
||||
total_valuation_increase=total_valuation_increase,
|
||||
labour_days=labour_days
|
||||
)
|
||||
|
||||
# Commit final changes
|
||||
session.commit()
|
||||
|
|
|
|||
|
|
@ -1,10 +1,53 @@
|
|||
from pydantic import BaseModel
|
||||
from pydantic import BaseModel, conlist, validator
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class PlanTriggerRequest(BaseModel):
|
||||
budget: float | None = None
|
||||
budget: Optional[float] = None
|
||||
goal: str
|
||||
housing_type: str
|
||||
goal_value: str
|
||||
portfolio_id: int
|
||||
trigger_file_path: str
|
||||
already_installed_file_path: Optional[str] = None
|
||||
patches_file_path: Optional[str] = None
|
||||
exclusions: Optional[conlist(str, min_items=1)] = None
|
||||
|
||||
# Pre-defined list of possibilities for exclusions
|
||||
_allowed_exclusions = {
|
||||
"wall_insulation",
|
||||
"ventilation",
|
||||
"roof_insulation",
|
||||
"floor_insulation",
|
||||
"windows",
|
||||
"fireplace",
|
||||
"heating",
|
||||
"hot_water",
|
||||
"lighting",
|
||||
"solar_pv"
|
||||
}
|
||||
|
||||
_allowed_goals = {"Increase EPC"}
|
||||
|
||||
_allowed_housing_types = {"Social", "Private"}
|
||||
|
||||
# Validator to ensure exclusions are within the pre-defined possibilities
|
||||
@validator('exclusions', each_item=True)
|
||||
def check_exclusions(cls, v):
|
||||
if v not in cls._allowed_exclusions:
|
||||
raise ValueError(f"{v} is not an allowed exclusion")
|
||||
return v
|
||||
|
||||
# Validator to ensure that the goal is within the pre-defined possibilities
|
||||
@validator('goal')
|
||||
def check_goal(cls, v):
|
||||
if v not in cls._allowed_goals:
|
||||
raise ValueError(f"{v} is not a valid goal")
|
||||
return v
|
||||
|
||||
# Validator to ensure that the housing type is within the pre-defined possibilities
|
||||
@validator('housing_type')
|
||||
def check_housing_type(cls, v):
|
||||
if v not in cls._allowed_housing_types:
|
||||
raise ValueError(f"{v} is not a valid housing type")
|
||||
return v
|
||||
|
|
|
|||
|
|
@ -8,25 +8,6 @@ from backend.app.config import get_settings
|
|||
import msgpack
|
||||
|
||||
|
||||
def insert_temp_recommendation_id(property_recommendations):
|
||||
"""
|
||||
Creates a temporary recommendation id which is needed for
|
||||
filtering recommendations between default and no, after the optimiser has been
|
||||
run
|
||||
:param property_recommendations: nested list of recommendations, grouped by data_types
|
||||
:return: Updated recommendations_to_upload, where where recommendation has a "recommendation_id"
|
||||
integer inserted
|
||||
"""
|
||||
idx = 0
|
||||
|
||||
for recs in property_recommendations:
|
||||
for rec in recs:
|
||||
rec["recommendation_id"] = idx
|
||||
idx += 1
|
||||
|
||||
return property_recommendations
|
||||
|
||||
|
||||
def get_cleaned():
|
||||
"""
|
||||
This function will retrieve the cleaned dataset from s3 which has the cleaned
|
||||
|
|
@ -44,145 +25,3 @@ def get_cleaned():
|
|||
cleaned = msgpack.unpackb(cleaned, raw=False)
|
||||
|
||||
return cleaned
|
||||
|
||||
|
||||
def create_recommendation_scoring_data(
|
||||
property: Property,
|
||||
recommendation: dict,
|
||||
starting_epc_data: pd.DataFrame,
|
||||
ending_epc_data: pd.DataFrame,
|
||||
fixed_data: pd.DataFrame,
|
||||
):
|
||||
"""
|
||||
This wrapper function prepares data to be passed to the sap model api
|
||||
:return:
|
||||
"""
|
||||
|
||||
scoring_dict = {
|
||||
"UPRN": property.data["uprn"],
|
||||
"id": "+".join([str(property.id), str(recommendation["recommendation_id"])]),
|
||||
"LOCAL_AUTHORITY": property.data["local-authority"],
|
||||
**starting_epc_data.to_dict("records")[0],
|
||||
**ending_epc_data.to_dict("records")[0],
|
||||
**fixed_data.to_dict("records")[0]
|
||||
}
|
||||
|
||||
# Set staring u-values if we don't have them
|
||||
if scoring_dict["walls_thermal_transmittance"] is None:
|
||||
scoring_dict["walls_thermal_transmittance"] = get_wall_u_value(
|
||||
clean_description=property.walls["clean_description"],
|
||||
age_band=property.age_band,
|
||||
is_granite_or_whinstone=property.walls["is_granite_or_whinstone"],
|
||||
is_sandstone_or_limestone=property.walls["is_sandstone_or_limestone"]
|
||||
)
|
||||
|
||||
if scoring_dict["floor_thermal_transmittance"] is None:
|
||||
scoring_dict["floor_thermal_transmittance"] = get_floor_u_value(
|
||||
floor_type=property.floor_type,
|
||||
area=property.floor_area,
|
||||
perimeter=property.perimeter,
|
||||
wall_type=property.wall_type,
|
||||
insulation_thickness=property.floor["insulation_thickness"],
|
||||
age_band=property.age_band,
|
||||
)
|
||||
|
||||
if scoring_dict["roof_thermal_transmittance"] is None:
|
||||
scoring_dict["roof_thermal_transmittance"] = get_roof_u_value(
|
||||
insulation_thickness=property.roof["insulation_thickness"],
|
||||
has_dwelling_above=property.roof["has_dwelling_above"],
|
||||
is_loft=property.roof["is_loft"],
|
||||
is_roof_room=property.roof["is_roof_room"],
|
||||
is_thatched=property.roof["is_thatched"],
|
||||
age_band=property.age_band,
|
||||
is_flat=property.roof["is_flat"],
|
||||
is_pitched=property.roof["is_pitched"],
|
||||
is_at_rafters=property.roof["is_at_rafters"],
|
||||
)
|
||||
|
||||
for col in [
|
||||
"walls_insulation_thickness", "floor_insulation_thickness", "roof_insulation_thickness"
|
||||
]:
|
||||
if scoring_dict[col] is None:
|
||||
scoring_dict[col] = "none"
|
||||
|
||||
# We update the description to indicate it's insulated
|
||||
if recommendation["type"] == "wall_insulation":
|
||||
# The upgrade made here is to the u-value of the walls and the description of the
|
||||
# insulation thickness
|
||||
scoring_dict["walls_thermal_transmittance_ENDING"] = recommendation["new_u_value"]
|
||||
scoring_dict["walls_insulation_thickness_ENDING"] = "above average"
|
||||
scoring_dict["WALLS_ENERGY_EFF_ENDING"] = "Good"
|
||||
else:
|
||||
if scoring_dict["walls_thermal_transmittance_ENDING"] is None:
|
||||
scoring_dict["walls_thermal_transmittance_ENDING"] = get_wall_u_value(
|
||||
clean_description=property.walls["clean_description"],
|
||||
age_band=property.age_band,
|
||||
is_granite_or_whinstone=property.walls["is_granite_or_whinstone"],
|
||||
is_sandstone_or_limestone=property.walls["is_sandstone_or_limestone"]
|
||||
)
|
||||
|
||||
if scoring_dict["walls_insulation_thickness_ENDING"] is None:
|
||||
scoring_dict["walls_insulation_thickness_ENDING"] = "none"
|
||||
|
||||
# Update description to indicate it's insulate
|
||||
if recommendation["type"] == "floor_insulation":
|
||||
if len(recommendation["parts"]) > 1:
|
||||
raise NotImplementedError("Have more than 1 floor insulation part - handle this case")
|
||||
|
||||
scoring_dict["floor_thermal_transmittance_ENDING"] = recommendation["new_u_value"]
|
||||
# We don't really see above average for this in the training data
|
||||
scoring_dict["floor_insulation_thickness_ENDING"] = "average"
|
||||
scoring_dict["FLOOR_ENERGY_EFF_ENDING"] = "Good"
|
||||
else:
|
||||
if scoring_dict["floor_thermal_transmittance_ENDING"] is None:
|
||||
scoring_dict["floor_thermal_transmittance_ENDING"] = get_floor_u_value(
|
||||
floor_type=property.floor_type,
|
||||
area=property.floor_area,
|
||||
perimeter=property.perimeter,
|
||||
wall_type=property.wall_type,
|
||||
insulation_thickness=property.floor["insulation_thickness"],
|
||||
age_band=property.age_band,
|
||||
)
|
||||
|
||||
if scoring_dict["floor_insulation_thickness_ENDING"] is None:
|
||||
scoring_dict["floor_insulation_thickness_ENDING"] = "none"
|
||||
|
||||
if recommendation["type"] == "roof_insulation":
|
||||
scoring_dict["roof_thermal_transmittance_ENDING"] = recommendation["new_u_value"]
|
||||
|
||||
parts = recommendation["parts"]
|
||||
if len(parts) != 1:
|
||||
raise ValueError("More than one part for roof insulation - investiage me")
|
||||
|
||||
scoring_dict["roof_insulation_thickness_ENDING"] = str(int(parts[0]["depth"]))
|
||||
scoring_dict["ROOF_ENERGY_EFF_ENDING"] = "Very Good"
|
||||
else:
|
||||
# Fill missing roof u-values - this fill is not based on recommended upgrades
|
||||
if scoring_dict["roof_thermal_transmittance_ENDING"] is None:
|
||||
scoring_dict["roof_thermal_transmittance_ENDING"] = get_roof_u_value(
|
||||
insulation_thickness=property.roof["insulation_thickness"],
|
||||
has_dwelling_above=property.roof["has_dwelling_above"],
|
||||
is_loft=property.roof["is_loft"],
|
||||
is_roof_room=property.roof["is_roof_room"],
|
||||
is_thatched=property.roof["is_thatched"],
|
||||
age_band=property.age_band,
|
||||
is_flat=property.roof["is_flat"],
|
||||
is_pitched=property.roof["is_pitched"],
|
||||
is_at_rafters=property.roof["is_at_rafters"],
|
||||
)
|
||||
|
||||
if scoring_dict["roof_insulation_thickness_ENDING"] is None:
|
||||
scoring_dict["roof_insulation_thickness_ENDING"] = "none"
|
||||
|
||||
if recommendation["type"] == "mechanical_ventilation":
|
||||
scoring_dict["MECHANICAL_VENTILATION_ENDING"] = 'mechanical, extract only'
|
||||
|
||||
if recommendation["type"] == "sealing_open_fireplace":
|
||||
scoring_dict["NUMBER_OPEN_FIREPLACES_ENDING"] = 0
|
||||
|
||||
if recommendation["type"] not in [
|
||||
"wall_insulation", "floor_insulation", "roof_insulation", "mechanical_ventilation", "sealing_open_fireplace",
|
||||
]:
|
||||
raise NotImplementedError("Implement me")
|
||||
|
||||
return scoring_dict
|
||||
|
|
|
|||
|
|
@ -1,10 +1,7 @@
|
|||
import boto3
|
||||
import csv
|
||||
from io import StringIO
|
||||
import string
|
||||
import secrets
|
||||
import logging
|
||||
import pandas as pd
|
||||
from io import BytesIO
|
||||
|
||||
|
||||
|
|
@ -42,25 +39,6 @@ def setup_logger(log_file=None, level=logging.INFO, overwrite_handler=False):
|
|||
return logger
|
||||
|
||||
|
||||
def read_csv_from_s3(bucket_name, filepath):
|
||||
s3 = boto3.client('s3')
|
||||
|
||||
# Get the object from s3
|
||||
s3_object = s3.get_object(Bucket=bucket_name, Key=filepath)
|
||||
|
||||
# Read the CSV body from the s3 object
|
||||
body = s3_object['Body'].read()
|
||||
|
||||
# Use StringIO to create a file-like object from the string
|
||||
csv_data = StringIO(body.decode('utf-8'))
|
||||
|
||||
# Use csv library to read it into a list of dictionaries
|
||||
reader = csv.DictReader(csv_data)
|
||||
data = list(reader)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def generate_api_key():
|
||||
# Define the characters that will be used to generate the api key
|
||||
characters = string.ascii_letters + string.digits
|
||||
|
|
@ -69,15 +47,15 @@ def generate_api_key():
|
|||
return api_key
|
||||
|
||||
|
||||
def sap_to_epc(sap_points: int):
|
||||
def sap_to_epc(sap_points: int | float):
|
||||
"""
|
||||
Simple utility function to convert SAP points to EPC rating.
|
||||
:param sapPoints: numerical value of SAP points, typically between 0 and 100
|
||||
:param sap_points: numerical value of SAP points, typically between 0 and 100
|
||||
:return:
|
||||
"""
|
||||
|
||||
if sap_points <= 0 or sap_points > 100:
|
||||
raise ValueError("SAP points should be between 1 and 100.")
|
||||
if sap_points <= 0:
|
||||
raise ValueError("SAP points should be above 0.")
|
||||
|
||||
if sap_points >= 92:
|
||||
return "A"
|
||||
|
|
@ -121,19 +99,6 @@ def epc_to_sap_lower_bound(epc: str):
|
|||
raise ValueError("EPC rating should be between A and G")
|
||||
|
||||
|
||||
def read_parquet_from_s3(bucket_name, file_key):
|
||||
client = boto3.client('s3')
|
||||
|
||||
# Get the object
|
||||
s3_object = client.get_object(Bucket=bucket_name, Key=file_key)
|
||||
|
||||
# Read the CSV body into a DataFrame
|
||||
csv_body = s3_object["Body"].read()
|
||||
df = pd.read_parquet(BytesIO(csv_body))
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def save_dataframe_to_s3_parquet(df, bucket_name, file_key):
|
||||
"""
|
||||
Save a pandas DataFrame to S3 as a Parquet file.
|
||||
|
|
|
|||
117
backend/ml_models/AnnualBillSavings.py
Normal file
117
backend/ml_models/AnnualBillSavings.py
Normal file
|
|
@ -0,0 +1,117 @@
|
|||
class AnnualBillSavings:
|
||||
"""
|
||||
This is a simple class which will estimate the annual bill savings, based on the kwh savings.
|
||||
This class uses data from Ofgem, including their price caps, to provide us with an estimate for
|
||||
1KWH of energy.
|
||||
"""
|
||||
|
||||
# These gas an electricity consumption figures are based off of figures presented by Ofgem
|
||||
# https://www.ofgem.gov.uk/information-consumers/energy-advice-households/average-gas-and-electricity-use-explained
|
||||
AVERAGE_ELECTRICITY_CONSUMPTION = 2700
|
||||
AVERAGE_GAS_CONSUMPTION = 11500
|
||||
|
||||
# Latest price cap figures from Ofgem are for April 2024
|
||||
# https://www.ofgem.gov.uk/publications/new-energy-price-cap-level-april-june-2024-starts-today
|
||||
ELECTRICITY_PRICE_CAP = 0.245
|
||||
GAS_PRICE_CAP = 0.0604
|
||||
|
||||
# This is a weighted mean of the price caps, using the consumption figures above as weights
|
||||
PRICE_FACTOR = 0.09549999999999999
|
||||
|
||||
# Daily standard charge, based on average across England, Scotland and Wales, and includes VAT
|
||||
DAILY_STANDARD_CHARGE_GAS = 0.3143
|
||||
DAILY_STANDARD_CHARGE_ELECTRICITY = 0.601
|
||||
|
||||
EPC_BANDS = ["G", "F", "E", "D", "C", "B", "A"]
|
||||
|
||||
@classmethod
|
||||
def estimate(cls, kwh: float):
|
||||
"""
|
||||
Estimate the annual bill savings based on the kwh savings
|
||||
:param kwh: The kwh savings
|
||||
:return: An estimate for annual bill savings
|
||||
"""
|
||||
return cls.PRICE_FACTOR * kwh
|
||||
|
||||
@classmethod
|
||||
def estimate_electric(cls, kwh: float):
|
||||
"""
|
||||
Estimate the annual bill savings based on the kwh savings
|
||||
:param kwh: The kwh savings
|
||||
:return: An estimate for annual bill savings
|
||||
"""
|
||||
return cls.ELECTRICITY_PRICE_CAP * kwh
|
||||
|
||||
@classmethod
|
||||
def calculate_annual_bill(cls, kwh):
|
||||
"""
|
||||
This method will estimate the total annual bill for a property
|
||||
It assumed gas & electricity are used
|
||||
:param kwh: The total kwh consumption
|
||||
:return: An estimate for annual bill
|
||||
"""
|
||||
|
||||
return cls.PRICE_FACTOR * kwh + (cls.DAILY_STANDARD_CHARGE_GAS + cls.DAILY_STANDARD_CHARGE_ELECTRICITY * 365)
|
||||
|
||||
@classmethod
|
||||
def adjust_energy_to_metered(cls, epc_energy_consumption, current_epc_rating):
|
||||
"""
|
||||
The over-prediction of energy use by EPCs in Great Britain: A comparison
|
||||
of EPC-modelled and metered primary energy use intensity
|
||||
|
||||
Which can be found here: https://www.sciencedirect.com/science/article/pii/S0378778823002542
|
||||
We implement the results on page 10
|
||||
|
||||
:return:
|
||||
"""
|
||||
|
||||
gradients = {
|
||||
"A": -0.1,
|
||||
"B": -0.1,
|
||||
"C": -0.43,
|
||||
"D": -0.52,
|
||||
"E": -0.7,
|
||||
"F": -0.76,
|
||||
"G": -0.76
|
||||
}
|
||||
|
||||
intercepts = {
|
||||
"A": 28,
|
||||
"B": 28,
|
||||
"C": 97,
|
||||
"D": 119,
|
||||
"E": 160,
|
||||
"F": 157,
|
||||
"G": 157
|
||||
}
|
||||
|
||||
gradient = gradients[current_epc_rating]
|
||||
intercept = intercepts[current_epc_rating]
|
||||
|
||||
# This should be negative
|
||||
consumption_difference = gradient * epc_energy_consumption + intercept
|
||||
|
||||
adjusted_consumption = (epc_energy_consumption + consumption_difference)
|
||||
if adjusted_consumption < 0:
|
||||
raise ValueError("consumption_difference should be negative")
|
||||
|
||||
return adjusted_consumption
|
||||
|
||||
@classmethod
|
||||
def adjust_expected_band(cls, expected_epc_rating, current_epc_rating):
|
||||
"""
|
||||
Because of the differing intercepts and intercepts when adjusting, it's possible for
|
||||
expected_adjusted_energy to be bigger than current_adjusted_energy. In this case, we'll
|
||||
adjust, against at most 1 EPC band above the curent. This function performs the EPC adjustment
|
||||
:param expected_epc_rating: The expected EPC rating
|
||||
:param current_epc_rating: The current EPC rating
|
||||
"""
|
||||
|
||||
# Find index of expected EPC rating
|
||||
expected_index = cls.EPC_BANDS.index(expected_epc_rating)
|
||||
current_index = cls.EPC_BANDS.index(current_epc_rating)
|
||||
|
||||
if expected_index - 1 < current_index:
|
||||
return current_epc_rating
|
||||
|
||||
return cls.EPC_BANDS[expected_index - 1]
|
||||
173
backend/ml_models/Valuation.py
Normal file
173
backend/ml_models/Valuation.py
Normal file
|
|
@ -0,0 +1,173 @@
|
|||
import numpy as np
|
||||
|
||||
|
||||
class PropertyValuation:
|
||||
"""
|
||||
This is a placeholder class for the property valuation model
|
||||
"""
|
||||
|
||||
UPRN_VALUE_LOOKUP = {
|
||||
15038202: 202000,
|
||||
37024763: 213000,
|
||||
100070478545: 212000,
|
||||
100070297696: 662000, # Based on Zoopla's estimation of nearby house, 8 bloomfield road
|
||||
100070476394: 222000, # Based on Zoopla's estimation of next door, 20 Parkside
|
||||
100071264896: 128000,
|
||||
# Based on next door neighbour: https://themovemarket.com/tools/propertyprices/flat-2-queens-wood-house-219
|
||||
# -brandwood-road-birmingham-b14-6pu
|
||||
100070533688: 218000, # Based on Zoopla's estimation of 95 Tenby Road, which is also mid terrace
|
||||
100070505235: 344000, # Based on Zoopla's estimation of 131 School road, which is also semi-detached
|
||||
100070513306: 182000, # Based on Zoopla's estimation of 61 Simmons Drive
|
||||
100071306896: 77000, # Based on Flat 2 of 44 Wedgewood Road on Zoopla
|
||||
100021192109: 650000, # Based on Zoopla
|
||||
766249482: 358000, # Based on Zoopla estimate for 19 Spring Lane, 3 bedroom semi-detached
|
||||
100120703802: 277000, # Based on Zoopla
|
||||
10014469685: 286000, # Based on Zoopla
|
||||
10001328782: 196000, # Based on Zoopla
|
||||
# Urban Splash - valuations from The Move Market
|
||||
10023345430: 74_000,
|
||||
10023345435: 99_000,
|
||||
10023345436: 62_000,
|
||||
10023345441: 62_000,
|
||||
10094183503: 2_988_000,
|
||||
10094183499: 123_000,
|
||||
10070056824: 70_000,
|
||||
110070056242: 100_000,
|
||||
10070056243: 130_000,
|
||||
10070056817: 130_000,
|
||||
10094183501: 185_000,
|
||||
10070056250: 71_000,
|
||||
10094183500: 185_000,
|
||||
10070056843: 67_000,
|
||||
10070056844: 67_000,
|
||||
10070056241: 76_000,
|
||||
10070056834: 63_000,
|
||||
10023345439: 62_000,
|
||||
10070056815: 101_000,
|
||||
10070056816: 101_000,
|
||||
10094183498: 101_000,
|
||||
10070056840: 673_000,
|
||||
10070056848: 76_000,
|
||||
10070056849: 76_000,
|
||||
10070056829: 76_000,
|
||||
10070056920: 76_000,
|
||||
10023345463: 76_000,
|
||||
# IMMO Dudley Pilot - search by going to https://www.zoopla.co.uk/property/uprn/{uprn}/
|
||||
90070461: 172_000, # Based on Zoopla
|
||||
90022227: 181_000, # Based on Zoopla
|
||||
90106884: 180_000, # Based on Zoopla
|
||||
90051858: 201_000, # Based on Zoopla
|
||||
90060989: 172_000, # Based on Zoopla
|
||||
90048026: 196_000, # Based on Zoopla
|
||||
90077535: 192_000, # Based on Zoopla
|
||||
90093693: 279_000, # Based on Zoopla
|
||||
90055152: 149_000, # Based on Zoopla
|
||||
90028499: 238_000, # Based on Zoopla
|
||||
}
|
||||
|
||||
# We base our valuation uplifts on a number of sources
|
||||
# https://www.moneysupermarket.com/gas-and-electricity/value-of-efficiency/
|
||||
MSM_MAPPING = [
|
||||
{"start": "G", "end": "F", "increase_percentage": 0.06},
|
||||
{"start": "F", "end": "E", "increase_percentage": 0.01},
|
||||
{"start": "E", "end": "D", "increase_percentage": 0.01},
|
||||
{"start": "D", "end": "C", "increase_percentage": 0.02},
|
||||
{"start": "C", "end": "B", "increase_percentage": 0.04},
|
||||
{"start": "B", "end": "A", "increase_percentage": 0.0},
|
||||
]
|
||||
|
||||
# https://www.lloydsbankinggroup.com/media/press-releases/2021/halifax/homebuyers-pay-a-green-premium-of-40000
|
||||
# -for-the-most-energy-efficient-properties.html
|
||||
LLOYDS_MAPPING = [
|
||||
{"start": "G", "end": "F", "increase_percentage": 0.038},
|
||||
{"start": "F", "end": "E", "increase_percentage": 0.029},
|
||||
{"start": "E", "end": "D", "increase_percentage": 0.024},
|
||||
{"start": "D", "end": "C", "increase_percentage": 0.02},
|
||||
{"start": "C", "end": "B", "increase_percentage": 0.02},
|
||||
{"start": "B", "end": "A", "increase_percentage": 0.018},
|
||||
]
|
||||
|
||||
KNIGHT_FRANK_MAPPING = [
|
||||
{"start": "D", "end": "C", "increase_percentage": 0.03},
|
||||
{"start": "D", "end": "B", "increase_percentage": 0.088},
|
||||
{"start": "D", "end": "A", "increase_percentage": 0.088},
|
||||
]
|
||||
|
||||
NATIONWIDE_MAPPING = [
|
||||
# {"start": "G", "end": "D", "increase_percentage": 0.035},
|
||||
# {"start": "F", "end": "D", "increase_percentage": 0.035},
|
||||
# {"start": "D", "end": "B", "increase_percentage": 0.017},
|
||||
# {"start": "D", "end": "A", "increase_percentage": 0.017},
|
||||
]
|
||||
|
||||
EPC_BANDS = ["G", "F", "E", "D", "C", "B", "A"]
|
||||
|
||||
@classmethod
|
||||
def get_increase(cls, epc_band_range):
|
||||
|
||||
increases = []
|
||||
for i in range(len(epc_band_range)):
|
||||
|
||||
if i == len(epc_band_range) - 1:
|
||||
break
|
||||
|
||||
current = epc_band_range[i]
|
||||
next = epc_band_range[i + 1]
|
||||
|
||||
msm_increase = [x for x in cls.MSM_MAPPING if x["start"] == current and x["end"] == next][0]
|
||||
lloyds_increase = [x for x in cls.LLOYDS_MAPPING if x["start"] == current and x["end"] == next][0]
|
||||
|
||||
increases.append(
|
||||
{
|
||||
"start": current,
|
||||
"end": next,
|
||||
"msm_increase": msm_increase["increase_percentage"],
|
||||
"lloyds_increase": lloyds_increase["increase_percentage"],
|
||||
}
|
||||
)
|
||||
|
||||
# We now aggregate the increases. The should be compound increases so we multiply them together
|
||||
msm_increase = np.prod([1 + x["msm_increase"] for x in increases]) - 1
|
||||
lloyds_increase = np.prod([1 + x["lloyds_increase"] for x in increases]) - 1
|
||||
|
||||
return msm_increase, lloyds_increase
|
||||
|
||||
@classmethod
|
||||
def estimate(cls, property_instance, target_epc):
|
||||
value = cls.UPRN_VALUE_LOOKUP.get(property_instance.uprn)
|
||||
|
||||
if not value:
|
||||
return {
|
||||
"current_value": 0,
|
||||
"lower_bound_increased_value": 0,
|
||||
"upper_bound_increased_value": 0,
|
||||
"average_increased_value": 0,
|
||||
"average_increase": 0
|
||||
}
|
||||
|
||||
current_epc = property_instance.data["current-energy-rating"]
|
||||
# We get the spectrum of ratings between the current and target EPC
|
||||
epc_band_range = cls.EPC_BANDS[cls.EPC_BANDS.index(current_epc): cls.EPC_BANDS.index(target_epc) + 1]
|
||||
|
||||
msm_increase, lloyds_increase = cls.get_increase(epc_band_range)
|
||||
|
||||
# We now use the knight frank and nationwide data to get further valuation evidence, if we have it
|
||||
kf_increase = [x for x in cls.KNIGHT_FRANK_MAPPING if x["start"] == current_epc and x["end"] == target_epc]
|
||||
nw_increase = [x for x in cls.NATIONWIDE_MAPPING if x["start"] == current_epc and x["end"] == target_epc]
|
||||
|
||||
kf_increase = kf_increase[0]["increase_percentage"] if kf_increase else None
|
||||
nw_increase = nw_increase[0]["increase_percentage"] if nw_increase else None
|
||||
|
||||
all_increases = [x for x in [msm_increase, lloyds_increase, kf_increase, nw_increase] if x is not None]
|
||||
|
||||
max_increase = max(all_increases)
|
||||
min_increase = min(all_increases)
|
||||
avg_increase = np.mean(all_increases)
|
||||
|
||||
return {
|
||||
"current_value": value,
|
||||
"lower_bound_increased_value": value * (1 + min_increase),
|
||||
"upper_bound_increased_value": value * (1 + max_increase),
|
||||
"average_increased_value": value * (1 + avg_increase),
|
||||
"average_increase": value * (1 + avg_increase) - value
|
||||
}
|
||||
144
backend/ml_models/api.py
Normal file
144
backend/ml_models/api.py
Normal file
|
|
@ -0,0 +1,144 @@
|
|||
import pandas as pd
|
||||
import requests
|
||||
from requests.exceptions import RequestException
|
||||
from utils.logger import setup_logger
|
||||
from utils.s3 import save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
class ModelApi:
|
||||
MODEL_PREFIXES = [
|
||||
"sap_change_predictions",
|
||||
"heat_demand_predictions",
|
||||
"carbon_change_predictions"
|
||||
]
|
||||
|
||||
MODEL_URLS = {
|
||||
"sap_change_predictions": "sapmodel",
|
||||
"heat_demand_predictions": "heatmodel",
|
||||
"carbon_change_predictions": "carbonmodel"
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
portfolio_id,
|
||||
timestamp,
|
||||
base_url="https://api.dev.hestia.homes",
|
||||
):
|
||||
"""
|
||||
This class handles the communication with the Model APIs. These models include SAP change, heat demain change
|
||||
and carbon change
|
||||
|
||||
property_id (int, optional): :
|
||||
:param portfolio_id: The portfolio ID to be passed in the request payload. Defaults to 4.
|
||||
:param timestamp: The creation timestamp to be passed in the request payload. Defaults to None.
|
||||
:param base_url:
|
||||
"""
|
||||
self.base_url = base_url
|
||||
self.portfolio_id = portfolio_id
|
||||
self.timestamp = timestamp
|
||||
|
||||
def upload_scoring_data(self, df: pd.DataFrame, bucket: str, model_prefix: str) -> str:
|
||||
"""
|
||||
The sap model api needs a scoring data that is sitting in s3 to use as a dataset to score on
|
||||
This method allows the user to upload a table as a parquet file. This method will return the file
|
||||
location, which can be used as the file location in the predict() method
|
||||
|
||||
:param df: Pandas dataframe with scoring data to be uploaded to s3
|
||||
:param bucket: Name of the bucket in s3 to upload to
|
||||
:param model_prefix: The model prefix to be used in the file location
|
||||
:return:
|
||||
"""
|
||||
|
||||
if model_prefix not in self.MODEL_PREFIXES:
|
||||
raise ValueError(f"Model prefix specified is not in {self.MODEL_PREFIXES}")
|
||||
|
||||
# Store parquet file in s3 for scoring
|
||||
file_location = f"{model_prefix}/{self.portfolio_id}/{self.timestamp}.parquet"
|
||||
|
||||
logger.info("Storing scoring data to s3")
|
||||
save_dataframe_to_s3_parquet(
|
||||
df=df,
|
||||
bucket_name=bucket,
|
||||
file_key=file_location
|
||||
)
|
||||
|
||||
return file_location
|
||||
|
||||
def predict(self, file_location, model_prefix: str):
|
||||
"""Makes a POST request to the SAP Change Model API with the provided parameters.
|
||||
|
||||
Args:
|
||||
file_location (str): The file location to be passed in the request payload.
|
||||
model_prefix (str): The model prefix to be used in the request URL.
|
||||
|
||||
Returns:
|
||||
dict: The API response as a dictionary if the request was successful, None otherwise.
|
||||
"""
|
||||
logger.info(f"Making request to {model_prefix} change api")
|
||||
url = f"{self.base_url}/{self.MODEL_URLS[model_prefix]}/predict"
|
||||
payload = {
|
||||
"file_location": file_location,
|
||||
"property_id": "", # This should get removed
|
||||
"portfolio_id": self.portfolio_id,
|
||||
"created_at": self.timestamp
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post(url, json=payload, headers={"Content-Type": "application/json"}, timeout=120)
|
||||
|
||||
# Check if the response status code is 2xx (success)
|
||||
response.raise_for_status()
|
||||
|
||||
# Return the JSON response as a Python dictionary
|
||||
return response.json()
|
||||
except RequestException as e:
|
||||
logger.error(f"An error occurred: {e}")
|
||||
# In case of an error, you might want to return None or raise the exception
|
||||
# depending on how you want to handle errors in your application
|
||||
return None
|
||||
|
||||
def predict_all(self, df, bucket, prediction_buckets) -> dict:
|
||||
|
||||
"""
|
||||
For each model prefix, this method will upload the scoring data to s3 and then make a request to the
|
||||
model api to generate predictions. The predictions will be stored in the predictions bucket.
|
||||
This method will then fetch the stored predictions and format them, returning all of the predictions as
|
||||
a dictionary of panaas dataframes
|
||||
:param df: Pandas dataframe with scoring data to be uploaded to s3
|
||||
:param bucket: Name of the bucket in s3 to upload to
|
||||
:param prediction_buckets: Dictionary containing the prediction buckets for each model prefix
|
||||
:return:
|
||||
"""
|
||||
|
||||
predictions = {}
|
||||
for model_prefix in self.MODEL_PREFIXES:
|
||||
logger.info(f"Scoring for model prefix: {model_prefix}")
|
||||
file_location = self.upload_scoring_data(df, bucket, model_prefix)
|
||||
response = self.predict(
|
||||
"s3://{DATA_BUCKET}/".format(DATA_BUCKET=bucket) + file_location, model_prefix
|
||||
)
|
||||
|
||||
predictions_bucket = prediction_buckets[model_prefix]
|
||||
|
||||
# Retrieve the predictions
|
||||
predictions_df = pd.DataFrame(
|
||||
read_dataframe_from_s3_parquet(
|
||||
bucket_name=predictions_bucket,
|
||||
file_key=response["storage_filepath"].split(predictions_bucket + "/")[1]
|
||||
)
|
||||
)
|
||||
|
||||
predictions_df['predictions'] = predictions_df["predictions"].astype(float).round(1)
|
||||
predictions_df[['property_id', 'recommendation_id']] = predictions_df['id'].str.split('+', expand=True)
|
||||
# To grab the phase, we pull the integer after "phase=" in the recommendation_id. We can do this with a
|
||||
# string split on phase= and then grab the second element of the resulting list. We could also use a
|
||||
# regular expression to do this but we use the string split method here, for safety.
|
||||
predictions_df['phase'] = predictions_df['recommendation_id'].str.split('phase=').str[1].str[0]
|
||||
# Convert back to int
|
||||
predictions_df['phase'] = predictions_df['phase'].astype(int)
|
||||
|
||||
predictions[model_prefix] = predictions_df
|
||||
|
||||
return predictions
|
||||
|
|
@ -1,83 +0,0 @@
|
|||
import pandas as pd
|
||||
import requests
|
||||
from requests.exceptions import RequestException
|
||||
from utils.logger import setup_logger
|
||||
from utils.s3 import save_dataframe_to_s3_parquet
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
class SAPChangeModelAPI:
|
||||
def __init__(
|
||||
self,
|
||||
portfolio_id,
|
||||
timestamp,
|
||||
base_url="https://api.dev.hestia.homes",
|
||||
):
|
||||
"""
|
||||
property_id (int, optional): :
|
||||
:param portfolio_id: The portfolio ID to be passed in the request payload. Defaults to 4.
|
||||
:param timestamp: The creation timestamp to be passed in the request payload. Defaults to None.
|
||||
:param base_url:
|
||||
"""
|
||||
self.base_url = base_url
|
||||
self.portfolio_id = portfolio_id
|
||||
self.timestamp = timestamp
|
||||
|
||||
def upload_scoring_data(self, df: pd.DataFrame, bucket: str) -> str:
|
||||
"""
|
||||
The sap model api needs a scoring data that is sitting in s3 to use as a dataset to score on
|
||||
This method allows the user to upload a table as a parquet file. This method will return the file
|
||||
location, which can be used as the file location in the predict() method
|
||||
|
||||
:param df: Pandas dataframe with scoring data to be uploaded to s3
|
||||
:param bucket: Name of the bucket in s3 to upload to
|
||||
:return:
|
||||
"""
|
||||
|
||||
# Store parquet file in s3 for scoring
|
||||
file_location = "sap_change_predictions/{portfolio_id}/{timestamp}.parquet".format(
|
||||
portfolio_id=self.portfolio_id,
|
||||
timestamp=self.timestamp
|
||||
)
|
||||
|
||||
logger.info("Storing scoring data to s3")
|
||||
save_dataframe_to_s3_parquet(
|
||||
df=df,
|
||||
bucket_name=bucket,
|
||||
file_key=file_location
|
||||
)
|
||||
|
||||
return file_location
|
||||
|
||||
def predict(self, file_location):
|
||||
"""Makes a POST request to the SAP Change Model API with the provided parameters.
|
||||
|
||||
Args:
|
||||
file_location (str): The file location to be passed in the request payload.
|
||||
|
||||
Returns:
|
||||
dict: The API response as a dictionary if the request was successful, None otherwise.
|
||||
"""
|
||||
logger.info("Making request to sap change api")
|
||||
url = f"{self.base_url}/sapmodel/predict"
|
||||
payload = {
|
||||
"file_location": file_location,
|
||||
"property_id": "", # This should get removed
|
||||
"portfolio_id": self.portfolio_id,
|
||||
"created_at": self.timestamp
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post(url, json=payload, headers={"Content-Type": "application/json"}, timeout=120)
|
||||
|
||||
# Check if the response status code is 2xx (success)
|
||||
response.raise_for_status()
|
||||
|
||||
# Return the JSON response as a Python dictionary
|
||||
return response.json()
|
||||
except RequestException as e:
|
||||
logger.error(f"An error occurred: {e}")
|
||||
# In case of an error, you might want to return None or raise the exception
|
||||
# depending on how you want to handle errors in your application
|
||||
return None
|
||||
|
|
@ -35,4 +35,5 @@ mip==1.15.0
|
|||
boto3==1.28.3
|
||||
pandas==1.5.3
|
||||
pyarrow==12.0.1
|
||||
textblob
|
||||
textblob
|
||||
usaddress==0.5.10
|
||||
|
|
@ -1,21 +1,24 @@
|
|||
import pandas as pd
|
||||
import pytest
|
||||
from unittest.mock import Mock
|
||||
from epc_api.client import EpcClient
|
||||
from backend.Property import Property
|
||||
from etl.epc_clean.EpcClean import EpcClean
|
||||
from etl.epc.Record import EPCRecord
|
||||
|
||||
# Define some test data
|
||||
mock_epc_response = {
|
||||
"rows": [
|
||||
{
|
||||
"tenure": "rental (social)",
|
||||
"lmk-key": 1,
|
||||
"uprn": 1,
|
||||
"number-habitable-rooms": 5,
|
||||
"property-type": "House",
|
||||
"built-form": "Detached",
|
||||
"inspection-date": "2023-06-01",
|
||||
'lodgement-datetime': '2023-06-01 20:29:01',
|
||||
"some-other-key": "some-value",
|
||||
"roof-description": "Roof Description",
|
||||
"roof-description": "pitched, no insulation",
|
||||
"walls-description": "Walls Description",
|
||||
"windows-description": "Windows Description",
|
||||
"mainheat-description": "Main Heating Description",
|
||||
|
|
@ -35,13 +38,15 @@ mock_epc_response = {
|
|||
"floor-height": 2.5,
|
||||
"total-floor-area": 100,
|
||||
"construction-age-band": "England and Wales: 1967-1975",
|
||||
"floor-description": "Floor Description"
|
||||
"floor-description": "Floor Description",
|
||||
"floor-level": "Ground"
|
||||
},
|
||||
{
|
||||
"lmk-key": 2,
|
||||
"uprn": 2,
|
||||
"number-habitable-rooms": 5,
|
||||
"property-type": "House",
|
||||
"built-form": "Detached",
|
||||
"inspection-date": "2023-05-01",
|
||||
'lodgement-datetime': '2023-05-01 20:29:01',
|
||||
"some-other-key": "some-other-value",
|
||||
|
|
@ -65,7 +70,8 @@ mock_epc_response = {
|
|||
"floor-height": 2.5,
|
||||
"total-floor-area": 100,
|
||||
"construction-age-band": "England and Wales: 1967-1975",
|
||||
"floor-description": "Floor Description"
|
||||
"floor-description": "Floor Description",
|
||||
"floor-level": "Ground"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -97,7 +103,8 @@ mock_epc_response_dupe = {
|
|||
"floor-height": 2.5,
|
||||
"total-floor-area": 100,
|
||||
"construction-age-band": "England and Wales: 1967-1975",
|
||||
"floor-description": "Floor Description"
|
||||
"floor-description": "Floor Description",
|
||||
"floor-level": "Ground"
|
||||
},
|
||||
{
|
||||
"lmk-key": 2,
|
||||
|
|
@ -125,7 +132,8 @@ mock_epc_response_dupe = {
|
|||
"floor-height": 2.5,
|
||||
"total-floor-area": 100,
|
||||
"construction-age-band": "England and Wales: 1967-1975",
|
||||
"floor-description": "Floor Description"
|
||||
"floor-description": "Floor Description",
|
||||
"floor-level": "Ground"
|
||||
},
|
||||
{
|
||||
"lmk-key": 3,
|
||||
|
|
@ -153,36 +161,71 @@ mock_epc_response_dupe = {
|
|||
"floor-height": 2.5,
|
||||
"total-floor-area": 100,
|
||||
"construction-age-band": "England and Wales: 1967-1975",
|
||||
"floor-description": "Floor Description"
|
||||
"floor-description": "Floor Description",
|
||||
"floor-level": "Ground"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
class TestProperty:
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def property_instance(self, mock_epc_client, mock_cleaner):
|
||||
property_instance = Property(1, "AB12CD", "Test Address", epc_client=mock_epc_client)
|
||||
def mock_photo_supply_lookup(self):
|
||||
return pd.DataFrame(
|
||||
[
|
||||
dict(
|
||||
tenure="rental (social)",
|
||||
built_form="Detached",
|
||||
property_type="House",
|
||||
construction_age_band="England and Wales: 1967-1975",
|
||||
is_flat=False,
|
||||
is_pitched=True,
|
||||
is_roof_room=False,
|
||||
floor_area_decile=2,
|
||||
photo_supply_median=40
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def mock_floor_area_decile_thresholds(self):
|
||||
return pd.DataFrame(
|
||||
{"floor_area_decile_thresholds": [0, 10, 30, 50]}
|
||||
)
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def property_instance(self, mock_cleaner):
|
||||
epc_record = EPCRecord()
|
||||
epc_record.prepared_epc = mock_epc_response["rows"][0]
|
||||
|
||||
property_instance = Property(id=1, postcode="AB12CD", address="Test Address", epc_record=epc_record)
|
||||
property_instance.number_of_floors = 2
|
||||
property_instance.number_of_rooms = 5
|
||||
property_instance.floor_area = 100
|
||||
property_instance.floor_height = 2.5
|
||||
return property_instance
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def property_instance_dupe_data(self, mock_epc_client_dupe_data):
|
||||
property_instance_dupe_data = Property(2, "AB12CD", "Test Address", epc_client=mock_epc_client_dupe_data)
|
||||
def property_instance_dupe_data(self):
|
||||
epc_record = EPCRecord()
|
||||
epc_record.prepared_epc = mock_epc_response_dupe["rows"][0]
|
||||
property_instance_dupe_data = Property(id=2, postcode="AB12CD", address="Test Address", epc_record=epc_record)
|
||||
return property_instance_dupe_data
|
||||
|
||||
@pytest.fixture
|
||||
def mock_epc_client(self):
|
||||
mock_epc_client = Mock(spec=EpcClient(auth_token="mocked_auth_token"))
|
||||
mock_epc_client.domestic.search.return_value = mock_epc_response.copy()
|
||||
mock_epc_client.auth_token = "mocked_auth_token"
|
||||
return mock_epc_client
|
||||
|
||||
@pytest.fixture
|
||||
def mock_epc_client_dupe_data(self):
|
||||
mock_epc_client_dupe_data = Mock(spec=EpcClient(auth_token="mocked_auth_token"))
|
||||
mock_epc_client_dupe_data.domestic.search.return_value = mock_epc_response_dupe.copy()
|
||||
mock_epc_client_dupe_data.auth_token = "mocked_auth_token"
|
||||
return mock_epc_client_dupe_data
|
||||
# @pytest.fixture
|
||||
# def mock_epc_client(self):
|
||||
# mock_epc_client = Mock(spec=EpcClient(auth_token="mocked_auth_token"))
|
||||
# mock_epc_client.domestic.search.return_value = mock_epc_response.copy()
|
||||
# mock_epc_client.auth_token = "mocked_auth_token"
|
||||
# return mock_epc_client
|
||||
#
|
||||
# @pytest.fixture
|
||||
# def mock_epc_client_dupe_data(self):
|
||||
# mock_epc_client_dupe_data = Mock(spec=EpcClient(auth_token="mocked_auth_token"))
|
||||
# mock_epc_client_dupe_data.domestic.search.return_value = mock_epc_response_dupe.copy()
|
||||
# mock_epc_client_dupe_data.auth_token = "mocked_auth_token"
|
||||
# return mock_epc_client_dupe_data
|
||||
|
||||
@pytest.fixture
|
||||
def mock_cleaner(self):
|
||||
|
|
@ -221,7 +264,11 @@ class TestProperty:
|
|||
}
|
||||
|
||||
mock_cleaner.cleaned = {
|
||||
"roof-description": [{"original_description": "Roof Description"}],
|
||||
"roof-description": [
|
||||
{"original_description": "Roof Description"},
|
||||
{"original_description": "pitched, no insulation", "is_pitched": True, "is_flat": False,
|
||||
"is_roof_room": False}
|
||||
],
|
||||
"walls-description": [walls_data],
|
||||
"windows-description": [{"original_description": "Windows Description"}],
|
||||
"mainheat-description": [{"original_description": "Main Heating Description"}],
|
||||
|
|
@ -232,37 +279,34 @@ class TestProperty:
|
|||
}
|
||||
return mock_cleaner
|
||||
|
||||
def test_init(self, mock_epc_client):
|
||||
inst1 = Property(0, "AB12CD", "Test Address", epc_client=mock_epc_client)
|
||||
# Should be mocked auth token
|
||||
assert inst1.epc_client.auth_token == "mocked_auth_token"
|
||||
def test_init(self):
|
||||
epc_record = EPCRecord()
|
||||
epc_record.prepared_epc = {"uprn": 1}
|
||||
inst1 = Property(0, postcode="AB12CD", address="Test Address", epc_record=epc_record)
|
||||
|
||||
inst2 = Property(3, "AB12CD", "Test Address", epc_client=mock_epc_client)
|
||||
assert inst2.epc_client.auth_token
|
||||
assert inst1.data is not None
|
||||
|
||||
inst3 = Property(4, "AB12CD", "Test Address", data={"some": "data"}, epc_client=mock_epc_client)
|
||||
assert inst3.data == {"some": "data"}
|
||||
inst2 = Property(3, "AB12CD", "Test Address", epc_record=epc_record)
|
||||
assert inst2.id == 3
|
||||
|
||||
data = inst3.search_address_epc()
|
||||
assert data is None
|
||||
inst3 = Property(4, "AB12CD", "Test Address", epc_record=epc_record)
|
||||
assert inst3.data == {"uprn": 1}
|
||||
|
||||
def test_search_address_epc(self, property_instance):
|
||||
# Call the method to test
|
||||
property_instance.search_address_epc()
|
||||
|
||||
# Verify that the correct data is being returned
|
||||
assert property_instance.data == mock_epc_response["rows"][0]
|
||||
|
||||
def test_search_address_epc_multiple_results(self, property_instance_dupe_data, mock_epc_client_dupe_data):
|
||||
with pytest.raises(Exception, match="More than one result found for this address - investigate me"):
|
||||
property_instance_dupe_data.search_address_epc()
|
||||
|
||||
def test_get_components(self, property_instance, mock_cleaner, mock_epc_client):
|
||||
property_instance.search_address_epc()
|
||||
property_instance.get_components(mock_cleaner.cleaned)
|
||||
def test_get_components(
|
||||
self, property_instance, mock_cleaner, mock_photo_supply_lookup, mock_floor_area_decile_thresholds
|
||||
):
|
||||
property_instance.get_components(
|
||||
mock_cleaner.cleaned,
|
||||
photo_supply_lookup=mock_photo_supply_lookup,
|
||||
floor_area_decile_thresholds=mock_floor_area_decile_thresholds
|
||||
)
|
||||
|
||||
# Verify that the components are set correctly
|
||||
assert property_instance.roof == {"original_description": "Roof Description"}
|
||||
assert property_instance.roof == {
|
||||
'original_description': 'pitched, no insulation', 'is_pitched': True,
|
||||
'is_flat': False, 'is_roof_room': False
|
||||
}
|
||||
|
||||
assert property_instance.walls == {
|
||||
"original_description": "Walls Description",
|
||||
"is_cavity_wall": True,
|
||||
|
|
@ -286,24 +330,15 @@ class TestProperty:
|
|||
|
||||
# Verify that ValueError is raised when EpcClean doesn't contain cleaned data
|
||||
with pytest.raises(ValueError, match="Cleaner does not contain cleaned data"):
|
||||
property_instance.get_components(mock_cleaner.cleaned)
|
||||
property_instance.get_components(mock_cleaner.cleaned, pd.DataFrame(), pd.DataFrame())
|
||||
|
||||
def test_get_components_no_data(self, property_instance, mock_cleaner):
|
||||
def test_get_components_no_attributes(
|
||||
self, property_instance, mock_cleaner, mock_photo_supply_lookup, mock_floor_area_decile_thresholds
|
||||
):
|
||||
# Modify the mock cleaner to have no attributes for a specific description
|
||||
mock_cleaner.cleaned = {
|
||||
"roof-description": []
|
||||
}
|
||||
|
||||
# Verify that ValueError is raised when no attributes are found
|
||||
with pytest.raises(ValueError, match="Property does not contain data"):
|
||||
property_instance.get_components(mock_cleaner.cleaned)
|
||||
|
||||
def test_get_components_no_attributes(self, property_instance, mock_cleaner):
|
||||
# Modify the mock cleaner to have no attributes for a specific description
|
||||
mock_cleaner.cleaned = {
|
||||
"roof-description": []
|
||||
}
|
||||
property_instance.search_address_epc()
|
||||
property_instance.data["roof-description"] = "Pitched, no insulation"
|
||||
property_instance.walls = {
|
||||
"original_description": "Walls Description",
|
||||
|
|
@ -324,14 +359,17 @@ class TestProperty:
|
|||
}
|
||||
|
||||
# Assert backup cleaning has been applied
|
||||
property_instance.get_components(mock_cleaner.cleaned)
|
||||
property_instance.get_components(
|
||||
mock_cleaner.cleaned, mock_photo_supply_lookup, mock_floor_area_decile_thresholds
|
||||
)
|
||||
|
||||
assert property_instance.roof["clean_description"] == "Pitched, no insulation"
|
||||
assert property_instance.roof["is_pitched"]
|
||||
|
||||
def test_get_components_multiple_attributes(self, property_instance, mock_cleaner):
|
||||
def test_get_components_multiple_attributes(
|
||||
self, property_instance, mock_cleaner, mock_photo_supply_lookup, mock_floor_area_decile_thresholds
|
||||
):
|
||||
# This shouldn't happen - it would mean a cleaning error
|
||||
property_instance.search_address_epc()
|
||||
property_instance.data["roof-description"] = "Roof Description"
|
||||
cleaned = {
|
||||
"roof-description": [
|
||||
|
|
@ -342,4 +380,102 @@ class TestProperty:
|
|||
|
||||
# Verify that ValueError is raised when multiple attributes are found
|
||||
with pytest.raises(ValueError, match="Either No attributes or multiple found for roof-description"):
|
||||
property_instance.get_components(cleaned)
|
||||
property_instance.get_components(cleaned, mock_photo_supply_lookup, mock_floor_area_decile_thresholds)
|
||||
|
||||
def test_set_spatial(self):
|
||||
epc_record = EPCRecord()
|
||||
epc_record.prepared_epc = mock_epc_response["rows"][0]
|
||||
prop = Property(1, postcode="AB12CD", address="Test Address", epc_record=epc_record)
|
||||
|
||||
spatial1 = pd.DataFrame([{
|
||||
'X_COORDINATE': 411143.0, 'Y_COORDINATE': 281701.0, 'LATITUDE': 52.4331896, 'LONGITUDE': -1.8375238,
|
||||
'conservation_status': True, 'is_listed_building': False, 'is_heritage_building': True
|
||||
}])
|
||||
|
||||
prop.set_spatial(spatial1)
|
||||
|
||||
assert prop.in_conservation_area
|
||||
assert not prop.is_listed
|
||||
assert prop.is_heritage
|
||||
assert prop.restricted_measures
|
||||
|
||||
prop2 = Property(1, "AB12CD", "Test Address", epc_record=epc_record)
|
||||
|
||||
spatial2 = pd.DataFrame([{
|
||||
'X_COORDINATE': 411143.0, 'Y_COORDINATE': 281701.0, 'LATITUDE': 52.4331896, 'LONGITUDE': -1.8375238,
|
||||
'conservation_status': None, 'is_listed_building': False, 'is_heritage_building': False
|
||||
}])
|
||||
|
||||
prop2.set_spatial(spatial2)
|
||||
|
||||
assert prop2.in_conservation_area is None
|
||||
assert not prop2.is_listed
|
||||
assert not prop2.is_heritage
|
||||
assert not prop2.restricted_measures
|
||||
|
||||
def test_set_floor_level(self):
|
||||
# In this case, we have a flat which looks looks it's on the first floor, but it's actually on the ground
|
||||
# floor, so we should set floor_level to 0
|
||||
epc_record = EPCRecord()
|
||||
epc_record.prepared_epc = {'floor-level': '01', 'property-type': 'Flat'}
|
||||
prop = Property(1, postcode="AB12CD", address="Test Address", epc_record=epc_record)
|
||||
prop.floor = {
|
||||
'original_description': 'Solid, no insulation (assumed)', 'clean_description': 'Solid, no insulation',
|
||||
'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_assumed': True,
|
||||
'is_to_unheated_space': False, 'is_to_external_air': False, 'is_suspended': False, 'is_solid': True,
|
||||
'another_property_below': False, 'insulation_thickness': 'none', 'floor_thermal_transmittance': None,
|
||||
'floor_insulation_thickness': 'none'
|
||||
}
|
||||
|
||||
prop.set_floor_level()
|
||||
|
||||
assert prop.floor_level == 0
|
||||
|
||||
# This property is labelled as being on the ground floor but actually has another property below
|
||||
# so we set floor level to 1
|
||||
epc_record = EPCRecord()
|
||||
epc_record.prepared_epc = {'floor-level': 'Ground', 'property-type': 'Flat'}
|
||||
prop2 = Property(1, postcode="AB12CD", address="Test Address", epc_record=epc_record)
|
||||
prop2.floor = {
|
||||
'original_description': '(Another dwelling below)', 'clean_description': 'Solid, no insulation',
|
||||
'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_assumed': False,
|
||||
'is_to_unheated_space': False, 'is_to_external_air': False, 'is_suspended': False, 'is_solid': False,
|
||||
'another_property_below': True, 'insulation_thickness': 'none', 'floor_thermal_transmittance': None,
|
||||
'floor_insulation_thickness': 'none'
|
||||
}
|
||||
|
||||
prop2.set_floor_level()
|
||||
|
||||
assert prop2.floor_level == 1
|
||||
|
||||
# this property is correctly labelled as being on the 2nd floor
|
||||
epc_record = EPCRecord()
|
||||
epc_record.prepared_epc = {'floor-level': '02', 'property-type': 'Flat'}
|
||||
prop3 = Property(1, postcode="AB12CD", address="Test Address", epc_record=epc_record)
|
||||
prop3.floor = {
|
||||
'original_description': '(Another dwelling below)', 'clean_description': 'Solid, no insulation',
|
||||
'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_assumed': False,
|
||||
'is_to_unheated_space': False, 'is_to_external_air': False, 'is_suspended': False, 'is_solid': False,
|
||||
'another_property_below': True, 'insulation_thickness': 'none', 'floor_thermal_transmittance': None,
|
||||
'floor_insulation_thickness': 'none'
|
||||
}
|
||||
|
||||
prop3.set_floor_level()
|
||||
|
||||
assert prop3.floor_level == 2
|
||||
|
||||
# Example of a house
|
||||
epc_record = EPCRecord()
|
||||
epc_record.prepared_epc = {'floor-level': '', 'property-type': 'House'}
|
||||
prop4 = Property(1, postcode="AB12CD", address="Test Address", epc_record=epc_record)
|
||||
prop4.floor = {
|
||||
'original_description': '(Another dwelling below)', 'clean_description': 'Solid, no insulation',
|
||||
'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_assumed': False,
|
||||
'is_to_unheated_space': False, 'is_to_external_air': False, 'is_suspended': False, 'is_solid': False,
|
||||
'another_property_below': False, 'insulation_thickness': 'none', 'floor_thermal_transmittance': None,
|
||||
'floor_insulation_thickness': 'none'
|
||||
}
|
||||
|
||||
prop4.set_floor_level()
|
||||
|
||||
assert prop4.floor_level is None
|
||||
|
|
|
|||
|
|
@ -1,989 +0,0 @@
|
|||
from backend.Property import Property
|
||||
from etl.epc.DataProcessor import DataProcessor
|
||||
from backend.app.plan.utils import create_recommendation_scoring_data, get_cleaned
|
||||
from etl.epc.settings import COLUMNS_TO_MERGE_ON
|
||||
from epc_api.client import EpcClient
|
||||
import pandas as pd
|
||||
import pytest
|
||||
import msgpack
|
||||
|
||||
from utils.s3 import read_dataframe_from_s3_parquet, read_from_s3
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
# Handy code for selecting testing data
|
||||
# import pickle
|
||||
#
|
||||
# with open("sap_dataset.pickle", "rb") as f:
|
||||
# sap_change_dataset = pickle.load(f)
|
||||
#
|
||||
# search_from = sap_change_dataset[
|
||||
# (sap_change_dataset["walls_thermal_transmittance_ENDING"] == sap_change_dataset["walls_thermal_transmittance"]) &
|
||||
# sap_change_dataset["is_to_unheated_space"]
|
||||
# ]
|
||||
# search_from = search_from[
|
||||
# (search_from["roof_thermal_transmittance_ENDING"] == search_from["roof_thermal_transmittance"]) &
|
||||
# (search_from["floor_thermal_transmittance_ENDING"] != search_from["floor_thermal_transmittance"]) &
|
||||
# (search_from["MECHANICAL_VENTILATION_ENDING"] == search_from["MECHANICAL_VENTILATION_STARTING"]) &
|
||||
# (search_from["SECONDHEAT_DESCRIPTION_ENDING"] == search_from["SECONDHEAT_DESCRIPTION_STARTING"]) &
|
||||
# (search_from["GLAZED_TYPE_ENDING"] == search_from["GLAZED_TYPE_STARTING"])
|
||||
# ]
|
||||
#
|
||||
# # Find a record where the only difference is cavity wall getting filled
|
||||
# ending_cols = [c for c in search_from.columns if "_ENDING" in c]
|
||||
#
|
||||
# ignore = [
|
||||
# "SAP_ENDING", "HEAT_DEMAND_ENDING", "CARBON_ENDING", "TRANSACTION_TYPE_ENDING", "FLOOR_HEIGHT_ENDING",
|
||||
# "DAYS_TO_ENDING", "TOTAL_FLOOR_AREA_ENDING"
|
||||
# ]
|
||||
#
|
||||
# ending_cols = [c for c in ending_cols if c not in ignore]
|
||||
#
|
||||
# for _, row in tqdm(search_from.iterrows(), total=search_from.shape[0]):
|
||||
#
|
||||
# same = True
|
||||
# starting_cols = []
|
||||
# for c in ending_cols:
|
||||
#
|
||||
# starting_col = c.replace("_ENDING", "")
|
||||
# if starting_col not in search_from.columns:
|
||||
# starting_col = c.replace("_ENDING", "_STARTING")
|
||||
# if starting_col not in search_from.columns:
|
||||
# raise Exception("something went wrong")
|
||||
#
|
||||
# starting_cols.append(starting_col)
|
||||
#
|
||||
# # We want them to be different
|
||||
# if c == "floor_thermal_transmittance_ENDING":
|
||||
# if (row[c] == row[starting_col]) | (row[starting_col] != "natural"):
|
||||
# same = False
|
||||
# break
|
||||
# else:
|
||||
# continue
|
||||
#
|
||||
# # We now check if the starting and ending values are the same
|
||||
# if row[c] != row[starting_col]:
|
||||
# same = False
|
||||
# break
|
||||
#
|
||||
# if same:
|
||||
# raise Exception("We found one!")
|
||||
#
|
||||
# fixed_cols = [c for c in search_from.columns if c not in starting_cols + ending_cols]
|
||||
#
|
||||
# import pandas as pd
|
||||
#
|
||||
# start = row[["SAP_STARTING"] + starting_cols]
|
||||
# start.index = [c.replace("_STARTING", "") for c in start.index]
|
||||
# end = row[["SAP_ENDING"] + ending_cols]
|
||||
# end.index = [c.replace("_ENDING", "") for c in end.index]
|
||||
# start["type"] = "starting"
|
||||
# end["type"] = "ending"
|
||||
#
|
||||
# compare = pd.concat([start, end], axis=1)
|
||||
#
|
||||
# ending_lmk = "1252008839062019090910572351658131"
|
||||
# starting_lmk = "1252008819542014122308482236142128"
|
||||
#
|
||||
# client = EpcClient(auth_token=EPC_AUTH_TOKEN)
|
||||
# result = client.domestic.search(params={"address": "Flat 14 Charles House, Freemens Way", "postcode": "CT14 9DL"})
|
||||
# starting_epc = [x for x in result["rows"] if x["lmk-key"] == starting_lmk][0]
|
||||
# ending_epc = [x for x in result["rows"] if x["lmk-key"] == ending_lmk][0]
|
||||
|
||||
|
||||
# with open(
|
||||
# os.path.abspath(os.path.dirname(__file__)) + "/backend/tests/test_data/cleaned.pickle", "rb"
|
||||
# ) as f:
|
||||
# cleaned = pickle.load(f)
|
||||
|
||||
# with open(
|
||||
# os.path.abspath(os.path.dirname(__file__)) + "/backend/tests/test_data/cleaning_data.pickle", "rb"
|
||||
# ) as f:
|
||||
# cleaning_data = pickle.load(f)
|
||||
|
||||
# TODO: Need to do floors, suspended and solid and to unheated space
|
||||
|
||||
|
||||
class TestSapModelPrep:
|
||||
|
||||
@pytest.fixture
|
||||
def cleaning_data(self):
|
||||
return read_dataframe_from_s3_parquet(
|
||||
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
|
||||
)
|
||||
|
||||
@pytest.fixture
|
||||
def cleaned(self):
|
||||
cleaned = read_from_s3(
|
||||
s3_file_name="cleaned_epc_data/cleaned.bson",
|
||||
bucket_name="retrofit-data-dev"
|
||||
)
|
||||
|
||||
cleaned = msgpack.unpackb(cleaned, raw=False)
|
||||
return cleaned
|
||||
|
||||
def test_fill_cavity_wall(self, cleaned, cleaning_data):
|
||||
"""
|
||||
We ensure that the process that prepares the data in the engine code results in the same data as
|
||||
the model is trained on
|
||||
"""
|
||||
|
||||
# This is an actual starting EPC
|
||||
starting_epc = {
|
||||
'low-energy-fixed-light-count': '', 'address': '26, Vicarage Lane, Eaton',
|
||||
'uprn-source': 'Address Matched', 'floor-height': '2.39', 'heating-cost-potential': '942',
|
||||
'unheated-corridor-length': '', 'hot-water-cost-potential': '97',
|
||||
'construction-age-band': 'England and Wales: 1967-1975', 'potential-energy-rating': 'D',
|
||||
'mainheat-energy-eff': 'Average', 'windows-env-eff': 'Good', 'lighting-energy-eff': 'Average',
|
||||
'environment-impact-potential': '53',
|
||||
'glazed-type': 'double glazing installed during or after 2002', 'heating-cost-current': '1475',
|
||||
'address3': '', 'mainheatcont-description': 'Programmer, room thermostat and TRVs',
|
||||
'sheating-energy-eff': 'N/A', 'property-type': 'House', 'local-authority-label': 'Melton',
|
||||
'fixed-lighting-outlets-count': '', 'energy-tariff': 'Single',
|
||||
'mechanical-ventilation': 'natural', 'hot-water-cost-current': '96', 'county': 'Leicestershire',
|
||||
'postcode': 'NG32 1SP', 'solar-water-heating-flag': 'Y', 'constituency': 'E14000909',
|
||||
'co2-emissions-potential': '5.7', 'number-heated-rooms': '7',
|
||||
'floor-description': 'Suspended, no insulation (assumed)',
|
||||
'energy-consumption-potential': '177', 'local-authority': 'E07000133', 'built-form': 'Detached',
|
||||
'number-open-fireplaces': '1', 'windows-description': 'Fully double glazed',
|
||||
'glazed-area': 'Normal', 'inspection-date': '2016-09-22', 'mains-gas-flag': 'N',
|
||||
'co2-emiss-curr-per-floor-area': '87', 'address1': '26, Vicarage Lane',
|
||||
'heat-loss-corridor': 'NO DATA!', 'flat-storey-count': '',
|
||||
'constituency-label': 'Rutland and Melton', 'roof-energy-eff': 'Very Poor',
|
||||
'total-floor-area': '116.0', 'building-reference-number': '4940047478',
|
||||
'environment-impact-current': '29', 'co2-emissions-current': '10.0',
|
||||
'roof-description': 'Pitched, limited insulation (assumed)', 'floor-energy-eff': 'NO DATA!',
|
||||
'number-habitable-rooms': '7', 'address2': 'Eaton', 'hot-water-env-eff': 'Good',
|
||||
'posttown': 'GRANTHAM', 'mainheatc-energy-eff': 'Good', 'main-fuel': 'oil (not community)',
|
||||
'lighting-env-eff': 'Average', 'windows-energy-eff': 'Good', 'floor-env-eff': 'N/A',
|
||||
'sheating-env-eff': 'N/A',
|
||||
'lighting-description': 'Low energy lighting in 31% of fixed outlets',
|
||||
'roof-env-eff': 'Very Poor', 'walls-energy-eff': 'Poor', 'photo-supply': '',
|
||||
'lighting-cost-potential': '69', 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100',
|
||||
'main-heating-controls': '2106', 'lodgement-datetime': '2016-09-23 20:29:01',
|
||||
'flat-top-storey': '', 'current-energy-rating': 'F',
|
||||
'secondheat-description': 'Room heaters, dual fuel (mineral and wood)', 'walls-env-eff': 'Poor',
|
||||
'transaction-type': 'marketed sale', 'uprn': '100030534042', 'current-energy-efficiency': '34',
|
||||
'energy-consumption-current': '343', 'mainheat-description': 'Boiler and radiators, oil',
|
||||
'lighting-cost-current': '117', 'lodgement-date': '2016-09-23', 'extension-count': '2',
|
||||
'mainheatc-env-eff': 'Good', 'lmk-key': '1481856849902016092320290148762028',
|
||||
'wind-turbine-count': '0', 'tenure': 'owner-occupied', 'floor-level': 'NODATA!',
|
||||
'potential-energy-efficiency': '64', 'hot-water-energy-eff': 'Good',
|
||||
'low-energy-lighting': '31',
|
||||
'walls-description': 'Cavity wall, as built, no insulation (assumed)',
|
||||
'hotwater-description': 'From main system, plus solar'
|
||||
}
|
||||
|
||||
# This is the training data as we prepare it in the engine
|
||||
# This is an actual record from the training data
|
||||
row = {
|
||||
'UPRN': '100030534042', 'RDSAP_CHANGE': 12, 'HEAT_DEMAND_CHANGE': -72,
|
||||
'CARBON_CHANGE': -2.0999999999999996, 'SAP_STARTING': 34, 'SAP_ENDING': 46, 'HEAT_DEMAND_STARTING': 343,
|
||||
'HEAT_DEMAND_ENDING': 271, 'CARBON_STARTING': 10.0, 'CARBON_ENDING': 7.9, 'PROPERTY_TYPE': 'House',
|
||||
'BUILT_FORM': 'Detached', 'CONSTITUENCY': 'E14000909', 'NUMBER_HABITABLE_ROOMS': 7.0,
|
||||
'NUMBER_HEATED_ROOMS': 7.0, 'FIXED_LIGHTING_OUTLETS_COUNT': 21.0,
|
||||
'CONSTRUCTION_AGE_BAND': 'England and Wales: 1967-1975', 'TRANSACTION_TYPE_STARTING': 'marketed sale',
|
||||
'MECHANICAL_VENTILATION_STARTING': 'natural',
|
||||
'SECONDHEAT_DESCRIPTION_STARTING': 'Room heaters, dual fuel (mineral and wood)',
|
||||
'ENERGY_TARIFF_STARTING': 'Single', 'SOLAR_WATER_HEATING_FLAG_STARTING': 'Y',
|
||||
'PHOTO_SUPPLY_STARTING': 0.0, 'GLAZED_TYPE_STARTING': 'double glazing installed during or after 2002',
|
||||
'MULTI_GLAZE_PROPORTION_STARTING': 100.0, 'LOW_ENERGY_LIGHTING_STARTING': 31.0,
|
||||
'NUMBER_OPEN_FIREPLACES_STARTING': 1.0, 'EXTENSION_COUNT_STARTING': 2.0,
|
||||
'TOTAL_FLOOR_AREA_STARTING': 116.0, 'FLOOR_HEIGHT_STARTING': 2.39,
|
||||
'TRANSACTION_TYPE_ENDING': 'marketed sale', 'MECHANICAL_VENTILATION_ENDING': 'natural',
|
||||
'SECONDHEAT_DESCRIPTION_ENDING': 'Room heaters, dual fuel (mineral and wood)',
|
||||
'ENERGY_TARIFF_ENDING': 'Single', 'SOLAR_WATER_HEATING_FLAG_ENDING': 'Y', 'PHOTO_SUPPLY_ENDING': 0.0,
|
||||
'GLAZED_TYPE_ENDING': 'double glazing installed during or after 2002',
|
||||
'MULTI_GLAZE_PROPORTION_ENDING': 100.0, 'LOW_ENERGY_LIGHTING_ENDING': 31.0,
|
||||
'NUMBER_OPEN_FIREPLACES_ENDING': 1.0, 'EXTENSION_COUNT_ENDING': 2.0, 'TOTAL_FLOOR_AREA_ENDING': 116.0,
|
||||
'FLOOR_HEIGHT_ENDING': 2.41, 'DAYS_TO_STARTING': 784, 'DAYS_TO_ENDING': 867,
|
||||
'walls_thermal_transmittance': 1.5, 'is_cavity_wall': True, 'is_filled_cavity': False,
|
||||
'is_solid_brick': False, 'is_system_built': False, 'is_timber_frame': False,
|
||||
'is_granite_or_whinstone': False, 'is_as_built': True, 'is_cob': False,
|
||||
'is_sandstone_or_limestone': False, 'is_park_home': False, 'walls_insulation_thickness': 'none',
|
||||
'external_insulation': False, 'internal_insulation': False, 'walls_thermal_transmittance_ENDING': 0.7,
|
||||
'is_park_home_ENDING': False, 'walls_insulation_thickness_ENDING': 'average',
|
||||
'external_insulation_ENDING': False, 'internal_insulation_ENDING': False,
|
||||
'floor_thermal_transmittance': 0.64, 'is_to_unheated_space': False, 'is_to_external_air': False,
|
||||
'is_suspended': True, 'is_solid': False, 'another_property_below': False,
|
||||
'floor_insulation_thickness': 'none', 'floor_thermal_transmittance_ENDING': 0.64,
|
||||
'floor_insulation_thickness_ENDING': 'none', 'roof_thermal_transmittance': 1.5, 'is_pitched': True,
|
||||
'is_roof_room': False, 'is_loft': False, 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False,
|
||||
'has_dwelling_above': False, 'roof_insulation_thickness': 'below average',
|
||||
'roof_thermal_transmittance_ENDING': 1.5, 'roof_insulation_thickness_ENDING': 'below average',
|
||||
'heater_type': 'Unknown', 'system_type': 'from main system', 'thermostat_characteristics': 'Unknown',
|
||||
'heating_scope': 'Unknown', 'energy_recovery': 'Unknown', 'hotwater_tariff_type': 'Unknown',
|
||||
'extra_features': 'plus solar', 'chp_systems': 'Unknown', 'distribution_system': 'Unknown',
|
||||
'no_system_present': 'Unknown', 'appliance': 'Unknown', 'heater_type_ENDING': 'Unknown',
|
||||
'system_type_ENDING': 'from main system', 'thermostat_characteristics_ENDING': 'Unknown',
|
||||
'heating_scope_ENDING': 'Unknown', 'energy_recovery_ENDING': 'Unknown',
|
||||
'hotwater_tariff_type_ENDING': 'Unknown', 'extra_features_ENDING': 'plus solar',
|
||||
'chp_systems_ENDING': 'Unknown', 'distribution_system_ENDING': 'Unknown',
|
||||
'no_system_present_ENDING': 'Unknown', 'appliance_ENDING': 'Unknown', 'has_radiators': True,
|
||||
'has_fan_coil_units': False, 'has_pipes_in_screed_above_insulation': False,
|
||||
'has_pipes_in_insulated_timber_floor': False, 'has_pipes_in_concrete_slab': False, 'has_boiler': True,
|
||||
'has_air_source_heat_pump': False, 'has_room_heaters': False, 'has_electric_storage_heaters': False,
|
||||
'has_warm_air': False, 'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False,
|
||||
'has_community_scheme': False, 'has_ground_source_heat_pump': False, 'has_no_system_present': False,
|
||||
'has_portable_electric_heaters': False, 'has_water_source_heat_pump': False,
|
||||
'has_electric_heat_pump': False, 'has_micro-cogeneration': False, 'has_solar_assisted_heat_pump': False,
|
||||
'has_exhaust_source_heat_pump': False, 'has_community_heat_pump': False, 'has_electric': False,
|
||||
'has_mains_gas': False, 'has_wood_logs': False, 'has_coal': False, 'has_oil': True,
|
||||
'has_wood_pellets': False, 'has_anthracite': False, 'has_dual_fuel_mineral_and_wood': False,
|
||||
'has_smokeless_fuel': False, 'has_lpg': False, 'has_b30k': False, 'has_electricaire': False,
|
||||
'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False, 'has_radiators_ENDING': True,
|
||||
'has_fan_coil_units_ENDING': False, 'has_pipes_in_screed_above_insulation_ENDING': False,
|
||||
'has_pipes_in_insulated_timber_floor_ENDING': False, 'has_pipes_in_concrete_slab_ENDING': False,
|
||||
'has_boiler_ENDING': True, 'has_air_source_heat_pump_ENDING': False, 'has_room_heaters_ENDING': False,
|
||||
'has_electric_storage_heaters_ENDING': False, 'has_warm_air_ENDING': False,
|
||||
'has_electric_underfloor_heating_ENDING': False, 'has_electric_ceiling_heating_ENDING': False,
|
||||
'has_community_scheme_ENDING': False, 'has_ground_source_heat_pump_ENDING': False,
|
||||
'has_no_system_present_ENDING': False, 'has_portable_electric_heaters_ENDING': False,
|
||||
'has_water_source_heat_pump_ENDING': False, 'has_electric_heat_pump_ENDING': False,
|
||||
'has_micro-cogeneration_ENDING': False, 'has_solar_assisted_heat_pump_ENDING': False,
|
||||
'has_exhaust_source_heat_pump_ENDING': False, 'has_community_heat_pump_ENDING': False,
|
||||
'has_electric_ENDING': False, 'has_mains_gas_ENDING': False, 'has_wood_logs_ENDING': False,
|
||||
'has_coal_ENDING': False, 'has_oil_ENDING': True, 'has_wood_pellets_ENDING': False,
|
||||
'has_anthracite_ENDING': False, 'has_dual_fuel_mineral_and_wood_ENDING': False,
|
||||
'has_smokeless_fuel_ENDING': False, 'has_lpg_ENDING': False, 'has_b30k_ENDING': False,
|
||||
'has_electricaire_ENDING': False, 'has_assumed_for_most_rooms_ENDING': False,
|
||||
'has_underfloor_heating_ENDING': False, 'thermostatic_control': 'room thermostat',
|
||||
'charging_system': 'Unknown', 'switch_system': 'programmer', 'no_control': 'Unknown',
|
||||
'dhw_control': 'Unknown', 'community_heating': 'Unknown', 'multiple_room_thermostats': False,
|
||||
'auxiliary_systems': 'Unknown', 'trvs': 'trvs', 'rate_control': 'Unknown',
|
||||
'thermostatic_control_ENDING': 'room thermostat', 'charging_system_ENDING': 'Unknown',
|
||||
'switch_system_ENDING': 'programmer', 'no_control_ENDING': 'Unknown', 'dhw_control_ENDING': 'Unknown',
|
||||
'community_heating_ENDING': 'Unknown', 'multiple_room_thermostats_ENDING': False,
|
||||
'auxiliary_systems_ENDING': 'Unknown', 'trvs_ENDING': 'trvs', 'rate_control_ENDING': 'Unknown',
|
||||
'glazing_type': 'double', 'glazing_type_ENDING': 'double', 'fuel_type': 'oil',
|
||||
'main-fuel_tariff_type': 'Unknown', 'is_community': False,
|
||||
'no_individual_heating_or_community_network': False, 'complex_fuel_type': 'Unknown',
|
||||
'fuel_type_ENDING': 'oil', 'main-fuel_tariff_type_ENDING': 'Unknown', 'is_community_ENDING': False,
|
||||
'no_individual_heating_or_community_network_ENDING': False, 'complex_fuel_type_ENDING': 'Unknown',
|
||||
'estimated_perimeter_STARTING': 44.77882152472145, 'estimated_perimeter_ENDING': 44.77882152472145,
|
||||
'HOT_WATER_ENERGY_EFF_STARTING': "Good",
|
||||
"FLOOR_ENERGY_EFF_STARTING": "Unknown",
|
||||
"WINDOWS_ENERGY_EFF_STARTING": "Good",
|
||||
"WALLS_ENERGY_EFF_STARTING": "Poor",
|
||||
"SHEATING_ENERGY_EFF_STARTING": "Unknown",
|
||||
"ROOF_ENERGY_EFF_STARTING": "Very Poor",
|
||||
"MAINHEAT_ENERGY_EFF_STARTING": "Average",
|
||||
"MAINHEATC_ENERGY_EFF_STARTING": "Good",
|
||||
"LIGHTING_ENERGY_EFF_STARTING": "Average",
|
||||
"POTENTIAL_ENERGY_EFFICIENCY": 64,
|
||||
"ENVIRONMENT_IMPACT_POTENTIAL": 53,
|
||||
"ENERGY_CONSUMPTION_POTENTIAL": 177.0,
|
||||
"CO2_EMISSIONS_POTENTIAL": 5.7,
|
||||
"HOT_WATER_ENERGY_EFF_ENDING": "Good",
|
||||
"FLOOR_ENERGY_EFF_ENDING": "Unknown",
|
||||
"WINDOWS_ENERGY_EFF_ENDING": "Good",
|
||||
"WALLS_ENERGY_EFF_ENDING": "Good",
|
||||
"SHEATING_ENERGY_EFF_ENDING": "Unknown",
|
||||
"ROOF_ENERGY_EFF_ENDING": "Very Poor",
|
||||
"MAINHEAT_ENERGY_EFF_ENDING": "Average",
|
||||
"MAINHEATC_ENERGY_EFF_ENDING": "Good",
|
||||
"LIGHTING_ENERGY_EFF_ENDING": "Average",
|
||||
}
|
||||
|
||||
home = Property(
|
||||
id=0,
|
||||
postcode=starting_epc["postcode"],
|
||||
address1=starting_epc["address1"],
|
||||
epc_client=EpcClient(auth_token="notoken"),
|
||||
data=starting_epc
|
||||
)
|
||||
home.get_components(cleaned)
|
||||
|
||||
data_processor = DataProcessor(None, newdata=True)
|
||||
data_processor.insert_data(pd.DataFrame([home.get_model_data()]))
|
||||
|
||||
data_processor.pre_process()
|
||||
|
||||
starting_epc_data = data_processor.get_component_features(suffix="_STARTING")
|
||||
ending_epc_data = data_processor.get_component_features(suffix="_ENDING")
|
||||
fixed_data = data_processor.get_fixed_features()
|
||||
|
||||
ending_lodgement_date = '2016-12-15'
|
||||
|
||||
ending_epc_data["DAYS_TO_ENDING"] = data_processor.calculate_days_to(ending_lodgement_date)
|
||||
|
||||
recommendation = {
|
||||
"recommendation_id": 0,
|
||||
"new_u_value": 0.7,
|
||||
"type": "wall_insulation"
|
||||
}
|
||||
|
||||
test_record = create_recommendation_scoring_data(
|
||||
property=home,
|
||||
recommendation=recommendation,
|
||||
starting_epc_data=starting_epc_data,
|
||||
ending_epc_data=ending_epc_data,
|
||||
fixed_data=fixed_data,
|
||||
)
|
||||
test_record = pd.DataFrame([test_record])
|
||||
|
||||
# Test the final cleaning:
|
||||
test_record = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=test_record,
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"]
|
||||
).drop(columns=["LOCAL_AUTHORITY"])
|
||||
|
||||
test_record = DataProcessor.clean_missings_after_description_process(
|
||||
test_record, [
|
||||
c for c in test_record.columns if
|
||||
("thermal_transmittance" in c) or ("insulation_thickness" in c)
|
||||
]
|
||||
)
|
||||
|
||||
# Test that the data has been set up correctly
|
||||
|
||||
# Things to fix:
|
||||
# [] Filled cavity should have an average insulation thickness in the cleaned data
|
||||
|
||||
for c in test_record.columns:
|
||||
if c in ["id", "SAP_ENDING", "HEAT_DEMAND_ENDING", "CARBON_ENDING"]:
|
||||
continue
|
||||
|
||||
if c == "FLOOR_HEIGHT_ENDING":
|
||||
assert (row[c] - test_record[c].values[0]) <= 0.020001
|
||||
continue
|
||||
|
||||
if c == "walls_insulation_thickness_ENDING":
|
||||
assert row[c] == "average"
|
||||
assert test_record[c].values[0] == "above average"
|
||||
continue
|
||||
|
||||
assert test_record[c].values[0] == row[c]
|
||||
|
||||
def test_solid_wall_insulation(self, cleaned, cleaning_data):
|
||||
|
||||
starting_epc2 = {
|
||||
'low-energy-fixed-light-count': '2', 'address': 'FLAT 12, WAREHOUSE W, 3 WESTERN GATEWAY',
|
||||
'uprn-source': 'Energy Assessor', 'floor-height': '3.64', 'heating-cost-potential': '465',
|
||||
'unheated-corridor-length': '', 'hot-water-cost-potential': '185',
|
||||
'construction-age-band': 'England and Wales: 1900-1929', 'potential-energy-rating': 'C',
|
||||
'mainheat-energy-eff': 'Very Poor', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Poor',
|
||||
'environment-impact-potential': '51', 'glazed-type': 'double glazing installed during or after 2002',
|
||||
'heating-cost-current': '1223', 'address3': '3 WESTERN GATEWAY',
|
||||
'mainheatcont-description': 'Programmer and appliance thermostats', 'sheating-energy-eff': 'N/A',
|
||||
'property-type': 'Flat', 'local-authority-label': 'Newham', 'fixed-lighting-outlets-count': '12',
|
||||
'energy-tariff': 'off-peak 7 hour', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '342',
|
||||
'county': '', 'postcode': 'E16 1BD', 'solar-water-heating-flag': 'N', 'constituency': 'E14001032',
|
||||
'co2-emissions-potential': '3.6', 'number-heated-rooms': '2', 'floor-description': '(other premises below)',
|
||||
'energy-consumption-potential': '307', 'local-authority': 'E09000025', 'built-form': 'Mid-Terrace',
|
||||
'number-open-fireplaces': '0', 'windows-description': 'Partial double glazing', 'glazed-area': 'Normal',
|
||||
'inspection-date': '2020-10-14', 'mains-gas-flag': 'N', 'co2-emiss-curr-per-floor-area': '66',
|
||||
'address1': 'FLAT 12', 'heat-loss-corridor': 'heated corridor', 'flat-storey-count': '',
|
||||
'constituency-label': 'West Ham', 'roof-energy-eff': 'N/A', 'total-floor-area': '70.0',
|
||||
'building-reference-number': '10000539740', 'environment-impact-current': '42',
|
||||
'co2-emissions-current': '4.6', 'roof-description': '(another dwelling above)', 'floor-energy-eff': 'N/A',
|
||||
'number-habitable-rooms': '2', 'address2': 'WAREHOUSE W', 'hot-water-env-eff': 'Poor', 'posttown': 'LONDON',
|
||||
'mainheatc-energy-eff': 'Good', 'main-fuel': 'electricity (not community)', 'lighting-env-eff': 'Poor',
|
||||
'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
|
||||
'lighting-description': 'Low energy lighting in 17% of fixed outlets', 'roof-env-eff': 'N/A',
|
||||
'walls-energy-eff': 'Very Poor', 'photo-supply': '0.0', 'lighting-cost-potential': '67',
|
||||
'mainheat-env-eff': 'Poor', 'multi-glaze-proportion': '61', 'main-heating-controls': '',
|
||||
'lodgement-datetime': '2020-10-14 00:00:00', 'flat-top-storey': 'N', 'current-energy-rating': 'F',
|
||||
'secondheat-description': 'None', 'walls-env-eff': 'Very Poor', 'transaction-type': 'marketed sale',
|
||||
'uprn': '10012839482', 'current-energy-efficiency': '33', 'energy-consumption-current': '393',
|
||||
'mainheat-description': 'Room heaters, electric', 'lighting-cost-current': '110',
|
||||
'lodgement-date': '2020-10-14', 'extension-count': '0', 'mainheatc-env-eff': 'Good',
|
||||
'lmk-key': 'b0d82f468273bec55ec5676a809b8e36b55db940ffa92f482a482f6aaa38eb1d', 'wind-turbine-count': '0',
|
||||
'tenure': 'Owner-occupied', 'floor-level': '01', 'potential-energy-efficiency': '71',
|
||||
'hot-water-energy-eff': 'Very Poor', 'low-energy-lighting': '17',
|
||||
'walls-description': 'Solid brick, as built, no insulation (assumed)',
|
||||
'hotwater-description': 'Electric immersion, standard tariff'
|
||||
}
|
||||
|
||||
row2 = {
|
||||
'UPRN': '10012839482', 'RDSAP_CHANGE': 8, 'HEAT_DEMAND_CHANGE': -59,
|
||||
'CARBON_CHANGE': -0.5999999999999996, 'SAP_STARTING': 33, 'SAP_ENDING': 41, 'HEAT_DEMAND_STARTING': 393,
|
||||
'HEAT_DEMAND_ENDING': 334, 'CARBON_STARTING': 4.6, 'CARBON_ENDING': 4.0, 'PROPERTY_TYPE': 'Flat',
|
||||
'BUILT_FORM': 'Mid-Terrace', 'CONSTITUENCY': 'E14001032', 'NUMBER_HABITABLE_ROOMS': 2.0,
|
||||
'NUMBER_HEATED_ROOMS': 2.0, 'FIXED_LIGHTING_OUTLETS_COUNT': 12.0,
|
||||
'CONSTRUCTION_AGE_BAND': 'England and Wales: 1996-2002', 'TRANSACTION_TYPE_STARTING': 'marketed sale',
|
||||
'MECHANICAL_VENTILATION_STARTING': 'natural', 'SECONDHEAT_DESCRIPTION_STARTING': 'None',
|
||||
'ENERGY_TARIFF_STARTING': 'off-peak 7 hour', 'SOLAR_WATER_HEATING_FLAG_STARTING': 'N',
|
||||
'PHOTO_SUPPLY_STARTING': 0.0, 'GLAZED_TYPE_STARTING': 'double glazing installed during or after 2002',
|
||||
'MULTI_GLAZE_PROPORTION_STARTING': 61.0, 'LOW_ENERGY_LIGHTING_STARTING': 17.0,
|
||||
'NUMBER_OPEN_FIREPLACES_STARTING': 0.0, 'EXTENSION_COUNT_STARTING': 0.0,
|
||||
'TOTAL_FLOOR_AREA_STARTING': 70.0, 'FLOOR_HEIGHT_STARTING': 3.64,
|
||||
'TRANSACTION_TYPE_ENDING': 'marketed sale', 'MECHANICAL_VENTILATION_ENDING': 'natural',
|
||||
'SECONDHEAT_DESCRIPTION_ENDING': 'None', 'ENERGY_TARIFF_ENDING': 'off-peak 7 hour',
|
||||
'SOLAR_WATER_HEATING_FLAG_ENDING': 'N', 'PHOTO_SUPPLY_ENDING': 0.0,
|
||||
'GLAZED_TYPE_ENDING': 'double glazing installed during or after 2002',
|
||||
'MULTI_GLAZE_PROPORTION_ENDING': 61.0, 'LOW_ENERGY_LIGHTING_ENDING': 17.0,
|
||||
'NUMBER_OPEN_FIREPLACES_ENDING': 0.0, 'EXTENSION_COUNT_ENDING': 0.0, 'TOTAL_FLOOR_AREA_ENDING': 70.0,
|
||||
'FLOOR_HEIGHT_ENDING': 3.64, 'DAYS_TO_STARTING': 2266, 'DAYS_TO_ENDING': 2307,
|
||||
'walls_thermal_transmittance': 1.7, 'is_cavity_wall': False, 'is_filled_cavity': False,
|
||||
'is_solid_brick': True, 'is_system_built': False, 'is_timber_frame': False,
|
||||
'is_granite_or_whinstone': False, 'is_as_built': True, 'is_cob': False,
|
||||
'is_sandstone_or_limestone': False, 'is_park_home': False, 'walls_insulation_thickness': 'none',
|
||||
'external_insulation': False, 'internal_insulation': False, 'walls_thermal_transmittance_ENDING': 0.21,
|
||||
'is_park_home_ENDING': False, 'walls_insulation_thickness_ENDING': 'average',
|
||||
'external_insulation_ENDING': False, 'internal_insulation_ENDING': False,
|
||||
'floor_thermal_transmittance': 0.0, 'is_to_unheated_space': False, 'is_to_external_air': False,
|
||||
'is_suspended': False, 'is_solid': False, 'another_property_below': True,
|
||||
'floor_insulation_thickness': 'none', 'floor_thermal_transmittance_ENDING': 0.0,
|
||||
'floor_insulation_thickness_ENDING': 'none', 'roof_thermal_transmittance': 0.0, 'is_pitched': False,
|
||||
'is_roof_room': False, 'is_loft': False, 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False,
|
||||
'has_dwelling_above': True, 'roof_insulation_thickness': 'none',
|
||||
'roof_thermal_transmittance_ENDING': 0.0, 'roof_insulation_thickness_ENDING': 'none',
|
||||
'heater_type': 'electric immersion', 'system_type': 'Unknown', 'thermostat_characteristics': 'Unknown',
|
||||
'heating_scope': 'Unknown', 'energy_recovery': 'Unknown', 'hotwater_tariff_type': 'standard tariff',
|
||||
'extra_features': 'Unknown', 'chp_systems': 'Unknown', 'distribution_system': 'Unknown',
|
||||
'no_system_present': 'Unknown', 'appliance': 'Unknown', 'heater_type_ENDING': 'electric immersion',
|
||||
'system_type_ENDING': 'Unknown', 'thermostat_characteristics_ENDING': 'Unknown',
|
||||
'heating_scope_ENDING': 'Unknown', 'energy_recovery_ENDING': 'Unknown',
|
||||
'hotwater_tariff_type_ENDING': 'standard tariff', 'extra_features_ENDING': 'Unknown',
|
||||
'chp_systems_ENDING': 'Unknown', 'distribution_system_ENDING': 'Unknown',
|
||||
'no_system_present_ENDING': 'Unknown', 'appliance_ENDING': 'Unknown', 'has_radiators': False,
|
||||
'has_fan_coil_units': False, 'has_pipes_in_screed_above_insulation': False,
|
||||
'has_pipes_in_insulated_timber_floor': False, 'has_pipes_in_concrete_slab': False, 'has_boiler': False,
|
||||
'has_air_source_heat_pump': False, 'has_room_heaters': True, 'has_electric_storage_heaters': False,
|
||||
'has_warm_air': False, 'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False,
|
||||
'has_community_scheme': False, 'has_ground_source_heat_pump': False, 'has_no_system_present': False,
|
||||
'has_portable_electric_heaters': False, 'has_water_source_heat_pump': False,
|
||||
'has_electric_heat_pump': False, 'has_micro-cogeneration': False, 'has_solar_assisted_heat_pump': False,
|
||||
'has_exhaust_source_heat_pump': False, 'has_community_heat_pump': False, 'has_electric': True,
|
||||
'has_mains_gas': False, 'has_wood_logs': False, 'has_coal': False, 'has_oil': False,
|
||||
'has_wood_pellets': False, 'has_anthracite': False, 'has_dual_fuel_mineral_and_wood': False,
|
||||
'has_smokeless_fuel': False, 'has_lpg': False, 'has_b30k': False, 'has_electricaire': False,
|
||||
'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False, 'has_radiators_ENDING': False,
|
||||
'has_fan_coil_units_ENDING': False, 'has_pipes_in_screed_above_insulation_ENDING': False,
|
||||
'has_pipes_in_insulated_timber_floor_ENDING': False, 'has_pipes_in_concrete_slab_ENDING': False,
|
||||
'has_boiler_ENDING': False, 'has_air_source_heat_pump_ENDING': False, 'has_room_heaters_ENDING': True,
|
||||
'has_electric_storage_heaters_ENDING': False, 'has_warm_air_ENDING': False,
|
||||
'has_electric_underfloor_heating_ENDING': False, 'has_electric_ceiling_heating_ENDING': False,
|
||||
'has_community_scheme_ENDING': False, 'has_ground_source_heat_pump_ENDING': False,
|
||||
'has_no_system_present_ENDING': False, 'has_portable_electric_heaters_ENDING': False,
|
||||
'has_water_source_heat_pump_ENDING': False, 'has_electric_heat_pump_ENDING': False,
|
||||
'has_micro-cogeneration_ENDING': False, 'has_solar_assisted_heat_pump_ENDING': False,
|
||||
'has_exhaust_source_heat_pump_ENDING': False, 'has_community_heat_pump_ENDING': False,
|
||||
'has_electric_ENDING': True, 'has_mains_gas_ENDING': False, 'has_wood_logs_ENDING': False,
|
||||
'has_coal_ENDING': False, 'has_oil_ENDING': False, 'has_wood_pellets_ENDING': False,
|
||||
'has_anthracite_ENDING': False, 'has_dual_fuel_mineral_and_wood_ENDING': False,
|
||||
'has_smokeless_fuel_ENDING': False, 'has_lpg_ENDING': False, 'has_b30k_ENDING': False,
|
||||
'has_electricaire_ENDING': False, 'has_assumed_for_most_rooms_ENDING': False,
|
||||
'has_underfloor_heating_ENDING': False, 'thermostatic_control': 'appliance thermostats',
|
||||
'charging_system': 'Unknown', 'switch_system': 'programmer', 'no_control': 'Unknown',
|
||||
'dhw_control': 'Unknown', 'community_heating': 'Unknown', 'multiple_room_thermostats': False,
|
||||
'auxiliary_systems': 'Unknown', 'trvs': 'Unknown', 'rate_control': 'Unknown',
|
||||
'thermostatic_control_ENDING': 'appliance thermostats', 'charging_system_ENDING': 'Unknown',
|
||||
'switch_system_ENDING': 'programmer', 'no_control_ENDING': 'Unknown', 'dhw_control_ENDING': 'Unknown',
|
||||
'community_heating_ENDING': 'Unknown', 'multiple_room_thermostats_ENDING': False,
|
||||
'auxiliary_systems_ENDING': 'Unknown', 'trvs_ENDING': 'Unknown', 'rate_control_ENDING': 'Unknown',
|
||||
'glazing_type': 'double', 'glazing_type_ENDING': 'double', 'fuel_type': 'electricity',
|
||||
'main-fuel_tariff_type': 'Unknown', 'is_community': False,
|
||||
'no_individual_heating_or_community_network': False, 'complex_fuel_type': 'Unknown',
|
||||
'fuel_type_ENDING': 'electricity', 'main-fuel_tariff_type_ENDING': 'Unknown',
|
||||
'is_community_ENDING': False, 'no_individual_heating_or_community_network_ENDING': False,
|
||||
'complex_fuel_type_ENDING': 'Unknown', 'estimated_perimeter_STARTING': 35.4964786985977,
|
||||
'estimated_perimeter_ENDING': 35.4964786985977,
|
||||
'HOT_WATER_ENERGY_EFF_STARTING': "Very Poor",
|
||||
"FLOOR_ENERGY_EFF_STARTING": "Unknown",
|
||||
"WINDOWS_ENERGY_EFF_STARTING": "Average",
|
||||
"WALLS_ENERGY_EFF_STARTING": "Very Poor",
|
||||
"SHEATING_ENERGY_EFF_STARTING": "Unknown",
|
||||
"ROOF_ENERGY_EFF_STARTING": "Unknown",
|
||||
"MAINHEAT_ENERGY_EFF_STARTING": "Very Poor",
|
||||
"MAINHEATC_ENERGY_EFF_STARTING": "Good",
|
||||
"LIGHTING_ENERGY_EFF_STARTING": "Poor",
|
||||
"POTENTIAL_ENERGY_EFFICIENCY": 71,
|
||||
"ENVIRONMENT_IMPACT_POTENTIAL": 51,
|
||||
"ENERGY_CONSUMPTION_POTENTIAL": 307,
|
||||
"CO2_EMISSIONS_POTENTIAL": 3.6,
|
||||
'HOT_WATER_ENERGY_EFF_ENDING': "Very Poor",
|
||||
"FLOOR_ENERGY_EFF_ENDING": "Unknown",
|
||||
"WINDOWS_ENERGY_EFF_ENDING": "Average",
|
||||
"WALLS_ENERGY_EFF_ENDING": "Good",
|
||||
"SHEATING_ENERGY_EFF_ENDING": "Unknown",
|
||||
"ROOF_ENERGY_EFF_ENDING": "Unknown",
|
||||
"MAINHEAT_ENERGY_EFF_ENDING": "Very Poor",
|
||||
"MAINHEATC_ENERGY_EFF_ENDING": "Good",
|
||||
"LIGHTING_ENERGY_EFF_ENDING": "Poor",
|
||||
}
|
||||
|
||||
home2 = Property(
|
||||
id=0,
|
||||
postcode=starting_epc2["postcode"],
|
||||
address1=starting_epc2["address1"],
|
||||
epc_client=EpcClient(auth_token="notoken"),
|
||||
data=starting_epc2
|
||||
)
|
||||
home2.get_components(cleaned)
|
||||
|
||||
data_processor2 = DataProcessor(None, newdata=True)
|
||||
data_processor2.insert_data(pd.DataFrame([home2.get_model_data()]))
|
||||
|
||||
data_processor2.pre_process()
|
||||
|
||||
starting_epc_data2 = data_processor2.get_component_features(suffix="_STARTING")
|
||||
ending_epc_data2 = data_processor2.get_component_features(suffix="_ENDING")
|
||||
fixed_data2 = data_processor2.get_fixed_features()
|
||||
|
||||
ending_lodgement_date2 = '2020-11-24'
|
||||
|
||||
ending_epc_data2["DAYS_TO_ENDING"] = data_processor2.calculate_days_to(ending_lodgement_date2)
|
||||
|
||||
recommendation2 = {
|
||||
"recommendation_id": 0,
|
||||
"new_u_value": 0.21,
|
||||
"type": "wall_insulation"
|
||||
}
|
||||
|
||||
test_record2 = create_recommendation_scoring_data(
|
||||
property=home2,
|
||||
recommendation=recommendation2,
|
||||
starting_epc_data=starting_epc_data2,
|
||||
ending_epc_data=ending_epc_data2,
|
||||
fixed_data=fixed_data2,
|
||||
)
|
||||
test_record2 = pd.DataFrame([test_record2])
|
||||
|
||||
# Test the final cleaning:
|
||||
test_record2 = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=test_record2,
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"]
|
||||
).drop(columns=["LOCAL_AUTHORITY"])
|
||||
|
||||
test_record2 = DataProcessor.clean_missings_after_description_process(
|
||||
test_record2, [
|
||||
c for c in test_record2.columns if
|
||||
("thermal_transmittance" in c) or ("insulation_thickness" in c)
|
||||
]
|
||||
)
|
||||
|
||||
for c in test_record2.columns:
|
||||
if c in ["id", "SAP_ENDING", "HEAT_DEMAND_ENDING", "CARBON_ENDING"]:
|
||||
continue
|
||||
|
||||
if c == "FLOOR_HEIGHT_ENDING":
|
||||
assert (row2[c] - test_record2[c].values[0]) <= 0.020001
|
||||
continue
|
||||
|
||||
if c == "walls_insulation_thickness_ENDING":
|
||||
assert row2[c] == "average"
|
||||
assert test_record2[c].values[0] == "above average"
|
||||
continue
|
||||
|
||||
if c == "CONSTRUCTION_AGE_BAND":
|
||||
# For this, we have different values in the original data
|
||||
assert row2[c] == "England and Wales: 1996-2002"
|
||||
assert test_record2[c].values[0] == "England and Wales: 1900-1929"
|
||||
continue
|
||||
|
||||
assert test_record2[c].values[0] == row2[c]
|
||||
|
||||
def test_ventilation(self, cleaned, cleaning_data):
|
||||
|
||||
starting_epc3 = {
|
||||
'low-energy-fixed-light-count': '', 'address': '45 Shepperson Road', 'uprn-source': 'Energy Assessor',
|
||||
'floor-height': '1.87', 'heating-cost-potential': '645', 'unheated-corridor-length': '',
|
||||
'hot-water-cost-potential': '69', 'construction-age-band': 'England and Wales: 1900-1929',
|
||||
'potential-energy-rating': 'C', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average',
|
||||
'lighting-energy-eff': 'Average', 'environment-impact-potential': '75',
|
||||
'glazed-type': 'double glazing, unknown install date', 'heating-cost-current': '1028', 'address3': '',
|
||||
'mainheatcont-description': 'Programmer, TRVs and bypass', 'sheating-energy-eff': 'N/A',
|
||||
'property-type': 'House', 'local-authority-label': 'Sheffield', 'fixed-lighting-outlets-count': '21',
|
||||
'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '96',
|
||||
'county': '', 'postcode': 'S6 4FG', 'solar-water-heating-flag': 'N', 'constituency': 'E14000921',
|
||||
'co2-emissions-potential': '2.9', 'number-heated-rooms': '5',
|
||||
'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '152',
|
||||
'local-authority': 'E08000019', 'built-form': 'Enclosed Mid-Terrace', 'number-open-fireplaces': '0',
|
||||
'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2022-06-13',
|
||||
'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '59', 'address1': '45 Shepperson Road',
|
||||
'heat-loss-corridor': '', 'flat-storey-count': '',
|
||||
'constituency-label': 'Sheffield, Brightside and Hillsborough', 'roof-energy-eff': 'Very Poor',
|
||||
'total-floor-area': '107.0', 'building-reference-number': '10002892085', 'environment-impact-current': '46',
|
||||
'co2-emissions-current': '6.3', 'roof-description': 'Pitched, no insulation (assumed)',
|
||||
'floor-energy-eff': 'N/A', 'number-habitable-rooms': '5', 'address2': '', 'hot-water-env-eff': 'Good',
|
||||
'posttown': 'SHEFFIELD', 'mainheatc-energy-eff': 'Average', 'main-fuel': 'mains gas (not community)',
|
||||
'lighting-env-eff': 'Average', 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A',
|
||||
'sheating-env-eff': 'N/A', 'lighting-description': 'Low energy lighting in 43% of fixed outlets',
|
||||
'roof-env-eff': 'Very Poor', 'walls-energy-eff': 'Very Poor', 'photo-supply': '0.0',
|
||||
'lighting-cost-potential': '83', 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100',
|
||||
'main-heating-controls': '', 'lodgement-datetime': '2023-05-27 12:15:21', 'flat-top-storey': '',
|
||||
'current-energy-rating': 'E', 'secondheat-description': 'None', 'walls-env-eff': 'Very Poor',
|
||||
'transaction-type': 'marketed sale', 'uprn': '100051073214', 'current-energy-efficiency': '54',
|
||||
'energy-consumption-current': '335', 'mainheat-description': 'Boiler and radiators, mains gas',
|
||||
'lighting-cost-current': '131', 'lodgement-date': '2023-05-27', 'extension-count': '1',
|
||||
'mainheatc-env-eff': 'Average',
|
||||
'lmk-key': 'dc1a4da246562656132b8e36e0534cd90b09fa40fc584e25e644e2d9ab86a247', 'wind-turbine-count': '0',
|
||||
'tenure': 'Not defined - use in the case of a new dwelling for which the intended tenure in not known. It '
|
||||
'is not to be used for an existing dwelling',
|
||||
'floor-level': '', 'potential-energy-efficiency': '80', 'hot-water-energy-eff': 'Good',
|
||||
'low-energy-lighting': '43',
|
||||
'walls-description': 'Sandstone or limestone, as built, no insulation (assumed)',
|
||||
'hotwater-description': 'From main system'
|
||||
}
|
||||
|
||||
row3 = {
|
||||
'UPRN': '100051073214', 'RDSAP_CHANGE': 2, 'HEAT_DEMAND_CHANGE': -22, 'CARBON_CHANGE': -0.39999999999999947,
|
||||
'SAP_STARTING': 54, 'SAP_ENDING': 56, 'HEAT_DEMAND_STARTING': 335, 'HEAT_DEMAND_ENDING': 313,
|
||||
'CARBON_STARTING': 6.3, 'CARBON_ENDING': 5.9, 'PROPERTY_TYPE': 'House', 'BUILT_FORM': 'Mid-Terrace',
|
||||
'CONSTITUENCY': 'E14000921', 'NUMBER_HABITABLE_ROOMS': 5.0, 'NUMBER_HEATED_ROOMS': 5.0,
|
||||
'FIXED_LIGHTING_OUTLETS_COUNT': 21.0, 'CONSTRUCTION_AGE_BAND': 'England and Wales: 1900-1929',
|
||||
'TRANSACTION_TYPE_STARTING': 'marketed sale', 'MECHANICAL_VENTILATION_STARTING': 'natural',
|
||||
'SECONDHEAT_DESCRIPTION_STARTING': 'None', 'ENERGY_TARIFF_STARTING': 'Single',
|
||||
'SOLAR_WATER_HEATING_FLAG_STARTING': 'N', 'PHOTO_SUPPLY_STARTING': 0.0,
|
||||
'GLAZED_TYPE_STARTING': 'double glazing, unknown install date', 'MULTI_GLAZE_PROPORTION_STARTING': 100.0,
|
||||
'LOW_ENERGY_LIGHTING_STARTING': 43.0, 'NUMBER_OPEN_FIREPLACES_STARTING': 0.0,
|
||||
'EXTENSION_COUNT_STARTING': 1.0, 'TOTAL_FLOOR_AREA_STARTING': 107.0, 'FLOOR_HEIGHT_STARTING': 1.87,
|
||||
'TRANSACTION_TYPE_ENDING': 'marketed sale', 'MECHANICAL_VENTILATION_ENDING': 'mechanical, extract only',
|
||||
'SECONDHEAT_DESCRIPTION_ENDING': 'None', 'ENERGY_TARIFF_ENDING': 'Single',
|
||||
'SOLAR_WATER_HEATING_FLAG_ENDING': 'N', 'PHOTO_SUPPLY_ENDING': 0.0,
|
||||
'GLAZED_TYPE_ENDING': 'double glazing, unknown install date', 'MULTI_GLAZE_PROPORTION_ENDING': 100.0,
|
||||
'LOW_ENERGY_LIGHTING_ENDING': 43.0, 'NUMBER_OPEN_FIREPLACES_ENDING': 0.0, 'EXTENSION_COUNT_ENDING': 1.0,
|
||||
'TOTAL_FLOOR_AREA_ENDING': 107.0, 'FLOOR_HEIGHT_ENDING': 1.87, 'DAYS_TO_STARTING': 3221,
|
||||
'DAYS_TO_ENDING': 2874, 'walls_thermal_transmittance': 2.0, 'is_cavity_wall': False,
|
||||
'is_filled_cavity': False, 'is_solid_brick': False, 'is_system_built': False, 'is_timber_frame': False,
|
||||
'is_granite_or_whinstone': False, 'is_as_built': True, 'is_cob': False, 'is_sandstone_or_limestone': True,
|
||||
'is_park_home': False, 'walls_insulation_thickness': 'none', 'external_insulation': False,
|
||||
'internal_insulation': False, 'walls_thermal_transmittance_ENDING': 2.0, 'is_park_home_ENDING': False,
|
||||
'walls_insulation_thickness_ENDING': 'none', 'external_insulation_ENDING': False,
|
||||
'internal_insulation_ENDING': False, 'floor_thermal_transmittance': 0.62, 'is_to_unheated_space': False,
|
||||
'is_to_external_air': False, 'is_suspended': True, 'is_solid': False, 'another_property_below': False,
|
||||
'floor_insulation_thickness': 'none', 'floor_thermal_transmittance_ENDING': 0.62,
|
||||
'floor_insulation_thickness_ENDING': 'none', 'roof_thermal_transmittance': 2.3, 'is_pitched': True,
|
||||
'is_roof_room': False, 'is_loft': False, 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False,
|
||||
'has_dwelling_above': False, 'roof_insulation_thickness': 'none', 'roof_thermal_transmittance_ENDING': 2.3,
|
||||
'roof_insulation_thickness_ENDING': 'none', 'heater_type': 'Unknown', 'system_type': 'from main system',
|
||||
'thermostat_characteristics': 'Unknown', 'heating_scope': 'Unknown', 'energy_recovery': 'Unknown',
|
||||
'hotwater_tariff_type': 'Unknown', 'extra_features': 'Unknown', 'chp_systems': 'Unknown',
|
||||
'distribution_system': 'Unknown', 'no_system_present': 'Unknown', 'appliance': 'Unknown',
|
||||
'heater_type_ENDING': 'Unknown', 'system_type_ENDING': 'from main system',
|
||||
'thermostat_characteristics_ENDING': 'Unknown', 'heating_scope_ENDING': 'Unknown',
|
||||
'energy_recovery_ENDING': 'Unknown', 'hotwater_tariff_type_ENDING': 'Unknown',
|
||||
'extra_features_ENDING': 'Unknown', 'chp_systems_ENDING': 'Unknown',
|
||||
'distribution_system_ENDING': 'Unknown', 'no_system_present_ENDING': 'Unknown',
|
||||
'appliance_ENDING': 'Unknown', 'has_radiators': True, 'has_fan_coil_units': False,
|
||||
'has_pipes_in_screed_above_insulation': False, 'has_pipes_in_insulated_timber_floor': False,
|
||||
'has_pipes_in_concrete_slab': False, 'has_boiler': True, 'has_air_source_heat_pump': False,
|
||||
'has_room_heaters': False, 'has_electric_storage_heaters': False, 'has_warm_air': False,
|
||||
'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False,
|
||||
'has_community_scheme': False, 'has_ground_source_heat_pump': False, 'has_no_system_present': False,
|
||||
'has_portable_electric_heaters': False, 'has_water_source_heat_pump': False,
|
||||
'has_electric_heat_pump': False, 'has_micro-cogeneration': False, 'has_solar_assisted_heat_pump': False,
|
||||
'has_exhaust_source_heat_pump': False, 'has_community_heat_pump': False, 'has_electric': False,
|
||||
'has_mains_gas': True, 'has_wood_logs': False, 'has_coal': False, 'has_oil': False,
|
||||
'has_wood_pellets': False, 'has_anthracite': False, 'has_dual_fuel_mineral_and_wood': False,
|
||||
'has_smokeless_fuel': False, 'has_lpg': False, 'has_b30k': False, 'has_electricaire': False,
|
||||
'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False, 'has_radiators_ENDING': True,
|
||||
'has_fan_coil_units_ENDING': False, 'has_pipes_in_screed_above_insulation_ENDING': False,
|
||||
'has_pipes_in_insulated_timber_floor_ENDING': False, 'has_pipes_in_concrete_slab_ENDING': False,
|
||||
'has_boiler_ENDING': True, 'has_air_source_heat_pump_ENDING': False, 'has_room_heaters_ENDING': False,
|
||||
'has_electric_storage_heaters_ENDING': False, 'has_warm_air_ENDING': False,
|
||||
'has_electric_underfloor_heating_ENDING': False, 'has_electric_ceiling_heating_ENDING': False,
|
||||
'has_community_scheme_ENDING': False, 'has_ground_source_heat_pump_ENDING': False,
|
||||
'has_no_system_present_ENDING': False, 'has_portable_electric_heaters_ENDING': False,
|
||||
'has_water_source_heat_pump_ENDING': False, 'has_electric_heat_pump_ENDING': False,
|
||||
'has_micro-cogeneration_ENDING': False, 'has_solar_assisted_heat_pump_ENDING': False,
|
||||
'has_exhaust_source_heat_pump_ENDING': False, 'has_community_heat_pump_ENDING': False,
|
||||
'has_electric_ENDING': False, 'has_mains_gas_ENDING': True, 'has_wood_logs_ENDING': False,
|
||||
'has_coal_ENDING': False, 'has_oil_ENDING': False, 'has_wood_pellets_ENDING': False,
|
||||
'has_anthracite_ENDING': False, 'has_dual_fuel_mineral_and_wood_ENDING': False,
|
||||
'has_smokeless_fuel_ENDING': False, 'has_lpg_ENDING': False, 'has_b30k_ENDING': False,
|
||||
'has_electricaire_ENDING': False, 'has_assumed_for_most_rooms_ENDING': False,
|
||||
'has_underfloor_heating_ENDING': False, 'thermostatic_control': 'Unknown', 'charging_system': 'Unknown',
|
||||
'switch_system': 'programmer', 'no_control': 'Unknown', 'dhw_control': 'Unknown',
|
||||
'community_heating': 'Unknown', 'multiple_room_thermostats': False, 'auxiliary_systems': 'bypass',
|
||||
'trvs': 'trvs', 'rate_control': 'Unknown', 'thermostatic_control_ENDING': 'Unknown',
|
||||
'charging_system_ENDING': 'Unknown', 'switch_system_ENDING': 'programmer', 'no_control_ENDING': 'Unknown',
|
||||
'dhw_control_ENDING': 'Unknown', 'community_heating_ENDING': 'Unknown',
|
||||
'multiple_room_thermostats_ENDING': False, 'auxiliary_systems_ENDING': 'bypass', 'trvs_ENDING': 'trvs',
|
||||
'rate_control_ENDING': 'Unknown', 'glazing_type': 'double', 'glazing_type_ENDING': 'double',
|
||||
'fuel_type': 'mains gas', 'main-fuel_tariff_type': 'Unknown', 'is_community': False,
|
||||
'no_individual_heating_or_community_network': False, 'complex_fuel_type': 'Unknown',
|
||||
'fuel_type_ENDING': 'mains gas', 'main-fuel_tariff_type_ENDING': 'Unknown', 'is_community_ENDING': False,
|
||||
'no_individual_heating_or_community_network_ENDING': False, 'complex_fuel_type_ENDING': 'Unknown',
|
||||
'estimated_perimeter_STARTING': 41.634120622393354, 'estimated_perimeter_ENDING': 41.634120622393354,
|
||||
'HOT_WATER_ENERGY_EFF_STARTING': "Good",
|
||||
"FLOOR_ENERGY_EFF_STARTING": "Unknown",
|
||||
"WINDOWS_ENERGY_EFF_STARTING": "Average",
|
||||
"WALLS_ENERGY_EFF_STARTING": "Very Poor",
|
||||
"SHEATING_ENERGY_EFF_STARTING": "Unknown",
|
||||
"ROOF_ENERGY_EFF_STARTING": "Very Poor",
|
||||
"MAINHEAT_ENERGY_EFF_STARTING": "Good",
|
||||
"MAINHEATC_ENERGY_EFF_STARTING": "Average",
|
||||
"LIGHTING_ENERGY_EFF_STARTING": "Average",
|
||||
"POTENTIAL_ENERGY_EFFICIENCY": 80,
|
||||
"ENVIRONMENT_IMPACT_POTENTIAL": 75,
|
||||
"ENERGY_CONSUMPTION_POTENTIAL": 152,
|
||||
"CO2_EMISSIONS_POTENTIAL": 2.9,
|
||||
'HOT_WATER_ENERGY_EFF_ENDING': "Good",
|
||||
"FLOOR_ENERGY_EFF_ENDING": "Unknown",
|
||||
"WINDOWS_ENERGY_EFF_ENDING": "Average",
|
||||
"WALLS_ENERGY_EFF_ENDING": "Very Poor",
|
||||
"SHEATING_ENERGY_EFF_ENDING": "Unknown",
|
||||
"ROOF_ENERGY_EFF_ENDING": "Very Poor",
|
||||
"MAINHEAT_ENERGY_EFF_ENDING": "Good",
|
||||
"MAINHEATC_ENERGY_EFF_ENDING": "Average",
|
||||
"LIGHTING_ENERGY_EFF_ENDING": "Average",
|
||||
}
|
||||
|
||||
home3 = Property(
|
||||
id=0,
|
||||
postcode=starting_epc3["postcode"],
|
||||
address1=starting_epc3["address1"],
|
||||
epc_client=EpcClient(auth_token="notoken"),
|
||||
data=starting_epc3
|
||||
)
|
||||
home3.get_components(cleaned)
|
||||
|
||||
data_processor3 = DataProcessor(None, newdata=True)
|
||||
data_processor3.insert_data(pd.DataFrame([home3.get_model_data()]))
|
||||
|
||||
data_processor3.pre_process()
|
||||
|
||||
starting_epc_data3 = data_processor3.get_component_features(suffix="_STARTING")
|
||||
ending_epc_data3 = data_processor3.get_component_features(suffix="_ENDING")
|
||||
fixed_data3 = data_processor3.get_fixed_features()
|
||||
|
||||
ending_lodgement_date3 = '2022-06-14'
|
||||
|
||||
ending_epc_data3["DAYS_TO_ENDING"] = data_processor3.calculate_days_to(ending_lodgement_date3)
|
||||
|
||||
recommendation3 = {
|
||||
"recommendation_id": 0,
|
||||
"type": "mechanical_ventilation"
|
||||
}
|
||||
|
||||
test_record3 = create_recommendation_scoring_data(
|
||||
property=home3,
|
||||
recommendation=recommendation3,
|
||||
starting_epc_data=starting_epc_data3,
|
||||
ending_epc_data=ending_epc_data3,
|
||||
fixed_data=fixed_data3,
|
||||
)
|
||||
test_record3 = pd.DataFrame([test_record3])
|
||||
|
||||
# Test the final cleaning:
|
||||
test_record3 = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=test_record3,
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"]
|
||||
).drop(columns=["LOCAL_AUTHORITY"])
|
||||
|
||||
test_record3 = DataProcessor.clean_missings_after_description_process(
|
||||
test_record3, [
|
||||
c for c in test_record3.columns if
|
||||
("thermal_transmittance" in c) or ("insulation_thickness" in c)
|
||||
]
|
||||
)
|
||||
|
||||
for c in test_record3.columns:
|
||||
if c in ["id", "SAP_ENDING", "HEAT_DEMAND_ENDING", "CARBON_ENDING"]:
|
||||
continue
|
||||
|
||||
assert test_record3[c].values[0] == row3[c]
|
||||
|
||||
def test_fireplaces(self, cleaned, cleaning_data):
|
||||
|
||||
starting_epc4 = {
|
||||
'low-energy-fixed-light-count': '', 'address': '9 Glebe Road, Asfordby Hill',
|
||||
'uprn-source': 'Energy Assessor', 'floor-height': '2.4', 'heating-cost-potential': '501',
|
||||
'unheated-corridor-length': '', 'hot-water-cost-potential': '70',
|
||||
'construction-age-band': 'England and Wales: 1930-1949', 'potential-energy-rating': 'C',
|
||||
'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Average',
|
||||
'environment-impact-potential': '76', 'glazed-type': 'double glazing, unknown install date',
|
||||
'heating-cost-current': '723', 'address3': '',
|
||||
'mainheatcont-description': 'Programmer and room thermostat', 'sheating-energy-eff': 'N/A',
|
||||
'property-type': 'House', 'local-authority-label': 'Melton',
|
||||
'fixed-lighting-outlets-count': '14', 'energy-tariff': 'dual',
|
||||
'mechanical-ventilation': 'natural', 'hot-water-cost-current': '98',
|
||||
'county': 'Leicestershire', 'postcode': 'LE14 3QT', 'solar-water-heating-flag': 'N',
|
||||
'constituency': 'E14000909', 'co2-emissions-potential': '2.4', 'number-heated-rooms': '5',
|
||||
'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '153',
|
||||
'local-authority': 'E07000133', 'built-form': 'Semi-Detached', 'number-open-fireplaces': '1',
|
||||
'windows-description': 'Fully double glazed', 'glazed-area': 'Normal',
|
||||
'inspection-date': '2022-06-27', 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '46',
|
||||
'address1': '9 Glebe Road', 'heat-loss-corridor': '', 'flat-storey-count': '',
|
||||
'constituency-label': 'Rutland and Melton', 'roof-energy-eff': 'Good',
|
||||
'total-floor-area': '87.0', 'building-reference-number': '10002396876',
|
||||
'environment-impact-current': '60', 'co2-emissions-current': '4.0',
|
||||
'roof-description': 'Pitched, 200 mm loft insulation', 'floor-energy-eff': 'N/A',
|
||||
'number-habitable-rooms': '5', 'address2': 'Asfordby Hill', 'hot-water-env-eff': 'Good',
|
||||
'posttown': 'MELTON MOWBRAY', 'mainheatc-energy-eff': 'Average',
|
||||
'main-fuel': 'mains gas (not community)', 'lighting-env-eff': 'Average',
|
||||
'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
|
||||
'lighting-description': 'Low energy lighting in 29% of fixed outlets', 'roof-env-eff': 'Good',
|
||||
'walls-energy-eff': 'Very Poor', 'photo-supply': '15.0', 'lighting-cost-potential': '79',
|
||||
'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
|
||||
'lodgement-datetime': '2022-06-27 15:28:18', 'flat-top-storey': '',
|
||||
'current-energy-rating': 'D',
|
||||
'secondheat-description': 'Room heaters, dual fuel (mineral and wood)',
|
||||
'walls-env-eff': 'Very Poor', 'transaction-type': 'ECO assessment', 'uprn': '100030539619',
|
||||
'current-energy-efficiency': '66', 'energy-consumption-current': '256',
|
||||
'mainheat-description': 'Boiler and radiators, mains gas', 'lighting-cost-current': '135',
|
||||
'lodgement-date': '2022-06-27', 'extension-count': '1', 'mainheatc-env-eff': 'Average',
|
||||
'lmk-key': '736b6f4803a11d9e45b49bf98f36eb8a7f357b0dd24f3e7cddef5295518e5bef',
|
||||
'wind-turbine-count': '0', 'tenure': 'Owner-occupied', 'floor-level': '',
|
||||
'potential-energy-efficiency': '78', 'hot-water-energy-eff': 'Good',
|
||||
'low-energy-lighting': '29',
|
||||
'walls-description': 'Solid brick, as built, no insulation (assumed)',
|
||||
'hotwater-description': 'From main system'
|
||||
}
|
||||
|
||||
row4 = {
|
||||
'UPRN': '100030539619', 'RDSAP_CHANGE': 7, 'HEAT_DEMAND_CHANGE': -41, 'CARBON_CHANGE': -0.5,
|
||||
'SAP_STARTING': 66, 'SAP_ENDING': 73, 'HEAT_DEMAND_STARTING': 256, 'HEAT_DEMAND_ENDING': 215,
|
||||
'CARBON_STARTING': 4.0, 'CARBON_ENDING': 3.5, 'PROPERTY_TYPE': 'House', 'BUILT_FORM': 'Semi-Detached',
|
||||
'CONSTITUENCY': 'E14000909', 'NUMBER_HABITABLE_ROOMS': 5.0, 'NUMBER_HEATED_ROOMS': 5.0,
|
||||
'FIXED_LIGHTING_OUTLETS_COUNT': 14.0, 'CONSTRUCTION_AGE_BAND': 'England and Wales: 1930-1949',
|
||||
'TRANSACTION_TYPE_STARTING': 'eco assessment', 'MECHANICAL_VENTILATION_STARTING': 'natural',
|
||||
'SECONDHEAT_DESCRIPTION_STARTING': 'Room heaters, dual fuel (mineral and wood)',
|
||||
'ENERGY_TARIFF_STARTING': 'dual', 'SOLAR_WATER_HEATING_FLAG_STARTING': 'N', 'PHOTO_SUPPLY_STARTING': 15.0,
|
||||
'GLAZED_TYPE_STARTING': 'double glazing, unknown install date', 'MULTI_GLAZE_PROPORTION_STARTING': 100.0,
|
||||
'LOW_ENERGY_LIGHTING_STARTING': 29.0, 'NUMBER_OPEN_FIREPLACES_STARTING': 1.0,
|
||||
'EXTENSION_COUNT_STARTING': 1.0, 'TOTAL_FLOOR_AREA_STARTING': 87.0, 'FLOOR_HEIGHT_STARTING': 2.4,
|
||||
'TRANSACTION_TYPE_ENDING': 'eco assessment', 'MECHANICAL_VENTILATION_ENDING': 'natural',
|
||||
'SECONDHEAT_DESCRIPTION_ENDING': 'Room heaters, dual fuel (mineral and wood)',
|
||||
'ENERGY_TARIFF_ENDING': 'dual', 'SOLAR_WATER_HEATING_FLAG_ENDING': 'N', 'PHOTO_SUPPLY_ENDING': 15.0,
|
||||
'GLAZED_TYPE_ENDING': 'double glazing, unknown install date', 'MULTI_GLAZE_PROPORTION_ENDING': 100.0,
|
||||
'LOW_ENERGY_LIGHTING_ENDING': 29.0, 'NUMBER_OPEN_FIREPLACES_ENDING': 0, 'EXTENSION_COUNT_ENDING': 1.0,
|
||||
'TOTAL_FLOOR_AREA_ENDING': 87.0, 'FLOOR_HEIGHT_ENDING': 2.4, 'DAYS_TO_STARTING': 2887,
|
||||
'DAYS_TO_ENDING': 2960, 'walls_thermal_transmittance': 1.7, 'is_cavity_wall': False,
|
||||
'is_filled_cavity': False, 'is_solid_brick': True, 'is_system_built': False, 'is_timber_frame': False,
|
||||
'is_granite_or_whinstone': False, 'is_as_built': True, 'is_cob': False, 'is_sandstone_or_limestone': False,
|
||||
'is_park_home': False, 'walls_insulation_thickness': 'none', 'external_insulation': False,
|
||||
'internal_insulation': False, 'walls_thermal_transmittance_ENDING': 1.7, 'is_park_home_ENDING': False,
|
||||
'walls_insulation_thickness_ENDING': 'none', 'external_insulation_ENDING': False,
|
||||
'internal_insulation_ENDING': False, 'floor_thermal_transmittance': 0.66, 'is_to_unheated_space': False,
|
||||
'is_to_external_air': False, 'is_suspended': False, 'is_solid': True, 'another_property_below': False,
|
||||
'floor_insulation_thickness': 'none', 'floor_thermal_transmittance_ENDING': 0.66,
|
||||
'floor_insulation_thickness_ENDING': 'none', 'roof_thermal_transmittance': 0.21, 'is_pitched': True,
|
||||
'is_roof_room': False, 'is_loft': True, 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False,
|
||||
'has_dwelling_above': False, 'roof_insulation_thickness': '200', 'roof_thermal_transmittance_ENDING': 0.21,
|
||||
'roof_insulation_thickness_ENDING': '200', 'heater_type': 'Unknown', 'system_type': 'from main system',
|
||||
'thermostat_characteristics': 'Unknown', 'heating_scope': 'Unknown', 'energy_recovery': 'Unknown',
|
||||
'hotwater_tariff_type': 'Unknown', 'extra_features': 'Unknown', 'chp_systems': 'Unknown',
|
||||
'distribution_system': 'Unknown', 'no_system_present': 'Unknown', 'appliance': 'Unknown',
|
||||
'heater_type_ENDING': 'Unknown', 'system_type_ENDING': 'from main system',
|
||||
'thermostat_characteristics_ENDING': 'Unknown', 'heating_scope_ENDING': 'Unknown',
|
||||
'energy_recovery_ENDING': 'Unknown', 'hotwater_tariff_type_ENDING': 'Unknown',
|
||||
'extra_features_ENDING': 'Unknown', 'chp_systems_ENDING': 'Unknown',
|
||||
'distribution_system_ENDING': 'Unknown', 'no_system_present_ENDING': 'Unknown',
|
||||
'appliance_ENDING': 'Unknown', 'has_radiators': True, 'has_fan_coil_units': False,
|
||||
'has_pipes_in_screed_above_insulation': False, 'has_pipes_in_insulated_timber_floor': False,
|
||||
'has_pipes_in_concrete_slab': False, 'has_boiler': True, 'has_air_source_heat_pump': False,
|
||||
'has_room_heaters': False, 'has_electric_storage_heaters': False, 'has_warm_air': False,
|
||||
'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False,
|
||||
'has_community_scheme': False, 'has_ground_source_heat_pump': False, 'has_no_system_present': False,
|
||||
'has_portable_electric_heaters': False, 'has_water_source_heat_pump': False,
|
||||
'has_electric_heat_pump': False, 'has_micro-cogeneration': False, 'has_solar_assisted_heat_pump': False,
|
||||
'has_exhaust_source_heat_pump': False, 'has_community_heat_pump': False, 'has_electric': False,
|
||||
'has_mains_gas': True, 'has_wood_logs': False, 'has_coal': False, 'has_oil': False,
|
||||
'has_wood_pellets': False, 'has_anthracite': False, 'has_dual_fuel_mineral_and_wood': False,
|
||||
'has_smokeless_fuel': False, 'has_lpg': False, 'has_b30k': False, 'has_electricaire': False,
|
||||
'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False, 'has_radiators_ENDING': True,
|
||||
'has_fan_coil_units_ENDING': False, 'has_pipes_in_screed_above_insulation_ENDING': False,
|
||||
'has_pipes_in_insulated_timber_floor_ENDING': False, 'has_pipes_in_concrete_slab_ENDING': False,
|
||||
'has_boiler_ENDING': True, 'has_air_source_heat_pump_ENDING': False, 'has_room_heaters_ENDING': False,
|
||||
'has_electric_storage_heaters_ENDING': False, 'has_warm_air_ENDING': False,
|
||||
'has_electric_underfloor_heating_ENDING': False, 'has_electric_ceiling_heating_ENDING': False,
|
||||
'has_community_scheme_ENDING': False, 'has_ground_source_heat_pump_ENDING': False,
|
||||
'has_no_system_present_ENDING': False, 'has_portable_electric_heaters_ENDING': False,
|
||||
'has_water_source_heat_pump_ENDING': False, 'has_electric_heat_pump_ENDING': False,
|
||||
'has_micro-cogeneration_ENDING': False, 'has_solar_assisted_heat_pump_ENDING': False,
|
||||
'has_exhaust_source_heat_pump_ENDING': False, 'has_community_heat_pump_ENDING': False,
|
||||
'has_electric_ENDING': False, 'has_mains_gas_ENDING': True, 'has_wood_logs_ENDING': False,
|
||||
'has_coal_ENDING': False, 'has_oil_ENDING': False, 'has_wood_pellets_ENDING': False,
|
||||
'has_anthracite_ENDING': False, 'has_dual_fuel_mineral_and_wood_ENDING': False,
|
||||
'has_smokeless_fuel_ENDING': False, 'has_lpg_ENDING': False, 'has_b30k_ENDING': False,
|
||||
'has_electricaire_ENDING': False, 'has_assumed_for_most_rooms_ENDING': False,
|
||||
'has_underfloor_heating_ENDING': False, 'thermostatic_control': 'room thermostat',
|
||||
'charging_system': 'Unknown', 'switch_system': 'programmer', 'no_control': 'Unknown',
|
||||
'dhw_control': 'Unknown', 'community_heating': 'Unknown', 'multiple_room_thermostats': False,
|
||||
'auxiliary_systems': 'Unknown', 'trvs': 'Unknown', 'rate_control': 'Unknown',
|
||||
'thermostatic_control_ENDING': 'room thermostat', 'charging_system_ENDING': 'Unknown',
|
||||
'switch_system_ENDING': 'programmer', 'no_control_ENDING': 'Unknown', 'dhw_control_ENDING': 'Unknown',
|
||||
'community_heating_ENDING': 'Unknown', 'multiple_room_thermostats_ENDING': False,
|
||||
'auxiliary_systems_ENDING': 'Unknown', 'trvs_ENDING': 'Unknown', 'rate_control_ENDING': 'Unknown',
|
||||
'glazing_type': 'double', 'glazing_type_ENDING': 'double', 'fuel_type': 'mains gas',
|
||||
'main-fuel_tariff_type': 'Unknown', 'is_community': False,
|
||||
'no_individual_heating_or_community_network': False, 'complex_fuel_type': 'Unknown',
|
||||
'fuel_type_ENDING': 'mains gas', 'main-fuel_tariff_type_ENDING': 'Unknown', 'is_community_ENDING': False,
|
||||
'no_individual_heating_or_community_network_ENDING': False, 'complex_fuel_type_ENDING': 'Unknown',
|
||||
'estimated_perimeter_STARTING': 37.54197650630557, 'estimated_perimeter_ENDING': 37.54197650630557,
|
||||
'HOT_WATER_ENERGY_EFF_STARTING': "Good",
|
||||
"FLOOR_ENERGY_EFF_STARTING": "Unknown",
|
||||
"WINDOWS_ENERGY_EFF_STARTING": "Average",
|
||||
"WALLS_ENERGY_EFF_STARTING": "Very Poor",
|
||||
"SHEATING_ENERGY_EFF_STARTING": "Unknown",
|
||||
"ROOF_ENERGY_EFF_STARTING": "Good",
|
||||
"MAINHEAT_ENERGY_EFF_STARTING": "Good",
|
||||
"MAINHEATC_ENERGY_EFF_STARTING": "Average",
|
||||
"LIGHTING_ENERGY_EFF_STARTING": "Average",
|
||||
"POTENTIAL_ENERGY_EFFICIENCY": 78,
|
||||
"ENVIRONMENT_IMPACT_POTENTIAL": 76,
|
||||
"ENERGY_CONSUMPTION_POTENTIAL": 153,
|
||||
"CO2_EMISSIONS_POTENTIAL": 2.4,
|
||||
'HOT_WATER_ENERGY_EFF_ENDING': "Good",
|
||||
"FLOOR_ENERGY_EFF_ENDING": "Unknown",
|
||||
"WINDOWS_ENERGY_EFF_ENDING": "Average",
|
||||
"WALLS_ENERGY_EFF_ENDING": "Very Poor",
|
||||
"SHEATING_ENERGY_EFF_ENDING": "Unknown",
|
||||
"ROOF_ENERGY_EFF_ENDING": "Good",
|
||||
"MAINHEAT_ENERGY_EFF_ENDING": "Good",
|
||||
"MAINHEATC_ENERGY_EFF_ENDING": "Average",
|
||||
"LIGHTING_ENERGY_EFF_ENDING": "Average",
|
||||
}
|
||||
|
||||
home4 = Property(
|
||||
id=0,
|
||||
postcode=starting_epc4["postcode"],
|
||||
address1=starting_epc4["address1"],
|
||||
epc_client=EpcClient(auth_token="notoken"),
|
||||
data=starting_epc4
|
||||
)
|
||||
home4.get_components(cleaned)
|
||||
|
||||
data_processor4 = DataProcessor(None, newdata=True)
|
||||
data_processor4.insert_data(pd.DataFrame([home4.get_model_data()]))
|
||||
|
||||
data_processor4.pre_process()
|
||||
|
||||
starting_epc_data4 = data_processor4.get_component_features(suffix="_STARTING")
|
||||
ending_epc_data4 = data_processor4.get_component_features(suffix="_ENDING")
|
||||
fixed_data4 = data_processor4.get_fixed_features()
|
||||
|
||||
ending_lodgement_date4 = '2022-09-08'
|
||||
|
||||
ending_epc_data4["DAYS_TO_ENDING"] = data_processor4.calculate_days_to(ending_lodgement_date4)
|
||||
|
||||
recommendation4 = {
|
||||
"recommendation_id": 0,
|
||||
"type": "sealing_open_fireplace"
|
||||
}
|
||||
|
||||
test_record4 = create_recommendation_scoring_data(
|
||||
property=home4,
|
||||
recommendation=recommendation4,
|
||||
starting_epc_data=starting_epc_data4,
|
||||
ending_epc_data=ending_epc_data4,
|
||||
fixed_data=fixed_data4,
|
||||
)
|
||||
test_record4 = pd.DataFrame([test_record4])
|
||||
|
||||
# Test the final cleaning:
|
||||
test_record4 = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=test_record4,
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"]
|
||||
).drop(columns=["LOCAL_AUTHORITY"])
|
||||
|
||||
test_record4 = DataProcessor.clean_missings_after_description_process(
|
||||
test_record4, [
|
||||
c for c in test_record4.columns if
|
||||
("thermal_transmittance" in c) or ("insulation_thickness" in c)
|
||||
]
|
||||
)
|
||||
|
||||
for c in test_record4.columns:
|
||||
if c in ["id", "SAP_ENDING", "HEAT_DEMAND_ENDING", "CARBON_ENDING"]:
|
||||
continue
|
||||
|
||||
assert test_record4[c].values[0] == row4[c]
|
||||
78
etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py
Normal file
78
etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
from utils.s3 import save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
|
||||
from utils.logger import setup_logger
|
||||
from etl.epc.settings import EARLIEST_EPC_DATE
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
class AirSourceHeatPumpEfficiency:
|
||||
|
||||
def __init__(self, file_directories, cleaned_lookup):
|
||||
"""
|
||||
:param file_directories: A list of directories where files are stored.
|
||||
:param cleaned_lookup: A dictionary containing cleaned lookup data.
|
||||
"""
|
||||
self.file_directories = file_directories
|
||||
self.cleaned_lookup = cleaned_lookup
|
||||
|
||||
self.results = []
|
||||
|
||||
def create_dataset(self):
|
||||
logger.info("Creating solar photo supply dataset")
|
||||
for dir in tqdm(self.file_directories):
|
||||
filepath = dir / "certificates.csv"
|
||||
df = pd.read_csv(filepath, low_memory=False)
|
||||
df = df[~pd.isnull(df["UPRN"])]
|
||||
df["UPRN"] = df["UPRN"].astype(int).astype(str)
|
||||
# Take entries after SAP12
|
||||
df["LODGEMENT_DATE"] = pd.to_datetime(df["LODGEMENT_DATE"])
|
||||
df = df[df["LODGEMENT_DATE"] > EARLIEST_EPC_DATE]
|
||||
|
||||
df = df[
|
||||
~df["TENURE"].isin(
|
||||
[
|
||||
"unknown",
|
||||
"Not defined - use in the case of a new dwelling for which the intended tenure in not known. "
|
||||
"It is not to be used for an existing dwelling"
|
||||
]
|
||||
)
|
||||
]
|
||||
|
||||
# Take entries that contain an air source heat pump
|
||||
df = df[
|
||||
df["MAINHEAT_DESCRIPTION"].str.contains("air source heat pump", case=False, na=False)
|
||||
]
|
||||
# Get the columns we're interested in
|
||||
df = df[
|
||||
[
|
||||
"MAINHEAT_DESCRIPTION",
|
||||
"MAINHEAT_ENERGY_EFF",
|
||||
"MAINHEATCONT_DESCRIPTION",
|
||||
"MAINHEATC_ENERGY_EFF",
|
||||
"MAIN_FUEL",
|
||||
"HOTWATER_DESCRIPTION",
|
||||
"HOT_WATER_ENERGY_EFF",
|
||||
"MAINS_GAS_FLAG"
|
||||
]
|
||||
]
|
||||
|
||||
counts = df.groupby(
|
||||
[
|
||||
"MAINHEAT_DESCRIPTION",
|
||||
"MAINHEAT_ENERGY_EFF",
|
||||
"MAINHEATCONT_DESCRIPTION",
|
||||
"MAINHEATC_ENERGY_EFF",
|
||||
"MAIN_FUEL",
|
||||
"HOTWATER_DESCRIPTION",
|
||||
"HOT_WATER_ENERGY_EFF",
|
||||
"MAINS_GAS_FLAG"
|
||||
]
|
||||
).size().reset_index(name="count")
|
||||
|
||||
# Drop rows that have a missing PROPERTY_TYPE, BUILT_FORM, CONSTRUCTION_AGE_BAND, TOTAL_FLOOR_AREA
|
||||
for col in ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA"]:
|
||||
df = df[~pd.isnull(df[col])]
|
||||
# Take newest LODGEMENT_DATE per UPRN
|
||||
df = df.sort_values(by="LODGEMENT_DATE", ascending=False).drop_duplicates(subset=["UPRN"])
|
||||
24
etl/air_source_heat_pump/app.py
Normal file
24
etl/air_source_heat_pump/app.py
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
from pathlib import Path
|
||||
from backend.app.plan.utils import get_cleaned
|
||||
from etl.air_source_heat_pump.AirSourceHeatPumpEfficiency import AirSourceHeatPumpEfficiency
|
||||
|
||||
DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"
|
||||
|
||||
|
||||
def app():
|
||||
"""
|
||||
This code reads in the EPC dataset and looks at the efficiency values for heating systems that inclue air source
|
||||
heat pumps. This dataset is then used to inform the recommendations for the air source heat pump, so we know
|
||||
how to set the simulation
|
||||
:return:
|
||||
"""
|
||||
|
||||
directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
|
||||
cleaned_lookup = get_cleaned()
|
||||
|
||||
ashp_data_client = AirSourceHeatPumpEfficiency(
|
||||
file_directories=directories,
|
||||
cleaned_lookup=cleaned_lookup
|
||||
)
|
||||
|
||||
ashp_data_client.create_dataset()
|
||||
|
|
@ -73,6 +73,9 @@ def app():
|
|||
suspended_floor_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="suspended_floor_insulation", header=0)
|
||||
solid_floor_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="solid_floor_insulation", header=0)
|
||||
ewi_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="external_wall_insulation", header=0)
|
||||
lel_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="low_energy_lighting", header=0)
|
||||
flat_roof_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="flat_roof_insulation", header=0)
|
||||
window_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="window_glazing", header=0)
|
||||
|
||||
# Form a single table to be uploaded
|
||||
costs = pd.concat(
|
||||
|
|
@ -83,6 +86,8 @@ def app():
|
|||
suspended_floor_costs,
|
||||
solid_floor_costs,
|
||||
ewi_costs,
|
||||
lel_costs,
|
||||
flat_roof_costs
|
||||
]
|
||||
)
|
||||
|
||||
|
|
|
|||
211
etl/customers/gla_croydon_demo/asset_list.py
Normal file
211
etl/customers/gla_croydon_demo/asset_list.py
Normal file
|
|
@ -0,0 +1,211 @@
|
|||
import pandas as pd
|
||||
from utils.s3 import save_csv_to_s3
|
||||
|
||||
USER_ID = 8
|
||||
PORTFOLIO_ID = 67
|
||||
|
||||
archetype_1_uprns = [100020604138, 200001188299, 100020578756, 200001187196, 200001192253, 100020581792, 200001188304,
|
||||
100020625813, 100020618060, 100020585305, 100020617489, 100020615039, 100020618076, 100020588913,
|
||||
200001187197, 100020671205, 100020576940, 100020619814, 100020576472, 100020618083]
|
||||
archetype_2_uprns = [100020698027, 10001007455, 100020653785, 10090383198, 100020665632, 100020620659, 100020615603,
|
||||
100020609610, 100020625597, 100020665656, 100020665640, 100020587905, 100020665630, 100020624351,
|
||||
100020625451, 100020624348, 100020666735, 100020653786, 100020576458, 100020657902, 100020624350,
|
||||
100020637405, 100020666734, 100020616325, 100020666716, 100020653783, 100020665645, 100020642337,
|
||||
100020665638, 100022904981, 100020688226, 100020630285, 100020626800, 100020665634, 100022907528,
|
||||
100020665652, 100020624347, 100020666721, 100020585002, 10014055968, 10001008257, 100020621438,
|
||||
100020576459, 100020665643, 100020665654, 100022917303]
|
||||
archetype_3_uprns = [100020577523, 100020616446, 100020605342, 100020594652, 100020585394, 100020601138, 100020597485,
|
||||
100020614883, 100020633162, 100020697787, 200001185785, 100020646842, 100020581449, 100020595611,
|
||||
100020641814, 100020575611, 100020652986, 100020654671, 100020647336, 100020610518, 100020607980,
|
||||
100020692380, 100020581690]
|
||||
archetype_4_uprns = [100020650603, 100020582907, 100020605116, 100020650607, 100020589325, 100020655500, 100020642537,
|
||||
200001187539, 100020631683, 100020610165, 100020596436, 100020598277, 100020660228]
|
||||
|
||||
|
||||
def app():
|
||||
"""
|
||||
We shall define a small portfolio of properties, based in Croydon
|
||||
:return:
|
||||
"""
|
||||
|
||||
# Firstly, read in the EPC data for Croydon
|
||||
epc_data = pd.read_csv(
|
||||
"local_data/all-domestic-certificates/domestic-E09000008-Croydon/certificates.csv",
|
||||
low_memory=False
|
||||
)
|
||||
|
||||
z = epc_data.groupby(["WALLS_DESCRIPTION", "WALLS_ENERGY_EFF"]).size().reset_index(name="count")
|
||||
z = z[z["MAINHEAT_DESCRIPTION"] == "Boiler and radiators, mains gas"]
|
||||
|
||||
# Filter on entries where we have a UPRN
|
||||
epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
|
||||
|
||||
# Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this
|
||||
epc_data["LODGEMENT_DATE"] = pd.to_datetime(epc_data["LODGEMENT_DATE"])
|
||||
|
||||
epc_data = epc_data.sort_values("LODGEMENT_DATE", ascending=False).drop_duplicates("UPRN")
|
||||
|
||||
# Now filter on social properties
|
||||
epc_data = epc_data[epc_data["TENURE"].isin(["rental (social)", "Rented (social)"])]
|
||||
# There are 17337 properties with a registered EPC in Croydon
|
||||
# Take below EPC C properties
|
||||
epc_data = epc_data[epc_data["CURRENT_ENERGY_EFFICIENCY"].astype(int) < 69]
|
||||
# 7994 properties are below EPC C (46%)
|
||||
|
||||
# 79% D, 19% E, 1% F, 0.2% G - it probably makes the most sense to focus on E and D properties
|
||||
epc_data["CURRENT_ENERGY_RATING"].value_counts(normalize=True)
|
||||
|
||||
# For the purpose of the sample, take the properties have surveys done in the last 3 years
|
||||
# This gives us 1351 remaining properties
|
||||
three_years_ago = pd.Timestamp.now() - pd.DateOffset(days=int(3 * 365))
|
||||
epc_data = epc_data[epc_data["LODGEMENT_DATE"] >= three_years_ago]
|
||||
|
||||
# Archetype 1: defined below:
|
||||
# 1) House
|
||||
# 2) Unfilled cavity
|
||||
# 3) A roof that could be insulated (flat or pitched with no more than 50mm insulation)
|
||||
# 4) EPC E or D
|
||||
# 24 properties
|
||||
archetype_1_sample = epc_data[
|
||||
epc_data["PROPERTY_TYPE"].isin(["House"]) &
|
||||
(epc_data["CURRENT_ENERGY_RATING"].isin(["D", "E"])) &
|
||||
epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"]) &
|
||||
epc_data["ROOF_DESCRIPTION"].isin(
|
||||
[
|
||||
"Pitched, 12 mm loft insulation",
|
||||
"Pitched, 0 mm loft insulation",
|
||||
"Pitched, no insulation",
|
||||
"Pitched, 50 mm loft insulation",
|
||||
"Flat, no insulation (assumed)",
|
||||
"Pitched, no insulation (assumed)"
|
||||
]
|
||||
)
|
||||
]
|
||||
archetype_1_sample_asset_list = archetype_1_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
|
||||
archetype_1_sample_asset_list["ARCHETYPE"] = "Archetype 1"
|
||||
|
||||
# Archetype 2: defined below:
|
||||
# 1) Flat
|
||||
# 2) Unfilled cavity
|
||||
# 3) Another property above
|
||||
# 4) EPC E
|
||||
# 57 properties here
|
||||
archetype_2_sample = epc_data[
|
||||
epc_data["PROPERTY_TYPE"].isin(["Flat"]) &
|
||||
(epc_data["CURRENT_ENERGY_RATING"].isin(["E", "D"])) &
|
||||
epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"]) &
|
||||
epc_data["ROOF_DESCRIPTION"].isin(
|
||||
[
|
||||
"(another dwelling above)"
|
||||
]
|
||||
)
|
||||
]
|
||||
archetype_2_sample_asset_list = archetype_2_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
|
||||
archetype_2_sample_asset_list["ARCHETYPE"] = "Archetype 2"
|
||||
|
||||
# Archetype 3: defined below:
|
||||
# 1) EPC E or below
|
||||
# 2) Solid brick wall
|
||||
# 3) House
|
||||
# 4) Pitched roof with no insulation
|
||||
# Just 7 properties (more expensive to retrofit)
|
||||
archetype_3_sample = epc_data[
|
||||
epc_data["PROPERTY_TYPE"].isin(["House"]) &
|
||||
(epc_data["CURRENT_ENERGY_RATING"].isin(["E", "F", "G"])) &
|
||||
epc_data["WALLS_DESCRIPTION"].isin(["Solid brick, as built, no insulation (assumed)"]) &
|
||||
epc_data["ROOF_DESCRIPTION"].isin(
|
||||
[
|
||||
"Pitched, no insulation",
|
||||
"Pitched, limited insulation (assumed)",
|
||||
"Pitched, 100 mm loft insulation",
|
||||
"Pitched, no insulation (assumed)",
|
||||
]
|
||||
)
|
||||
]
|
||||
archetype_3_sample_asset_list = archetype_3_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
|
||||
archetype_3_sample_asset_list["ARCHETYPE"] = "Archetype 3"
|
||||
|
||||
# Archetype 4: defined below:
|
||||
# 1) Maisonette
|
||||
# 2) Empty cavity
|
||||
# 3) EPC E
|
||||
# 16 properties here
|
||||
archetype_4_sample = epc_data[
|
||||
epc_data["PROPERTY_TYPE"].isin(["Maisonette"]) &
|
||||
epc_data["WALLS_DESCRIPTION"].isin(
|
||||
["Cavity wall, as built, no insulation (assumed)"]
|
||||
)
|
||||
]
|
||||
|
||||
archetype_4_sample_asset_list = archetype_4_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
|
||||
archetype_4_sample_asset_list["ARCHETYPE"] = "Archetype 4"
|
||||
|
||||
asset_list = pd.concat(
|
||||
[
|
||||
archetype_1_sample_asset_list,
|
||||
archetype_2_sample_asset_list,
|
||||
archetype_3_sample_asset_list,
|
||||
archetype_4_sample_asset_list
|
||||
]
|
||||
)
|
||||
|
||||
asset_list = asset_list.rename(
|
||||
columns={
|
||||
"UPRN": "uprn",
|
||||
"ADDRESS1": "address",
|
||||
"POSTCODE": "postcode",
|
||||
"ARCHETYPE": "archetype"
|
||||
}
|
||||
)
|
||||
|
||||
asset_list["uprn"] = asset_list["uprn"].astype(int)
|
||||
|
||||
# We end up with some properties that are currently an EPC C, but we do not have this data in the download, so we
|
||||
# manually remove
|
||||
# 1) 3 Reid Close, CR5 3BL
|
||||
# 2) Flat 6, Collier Court 2A, St. Peters Road CR0 1HD
|
||||
asset_list = asset_list[
|
||||
~asset_list["uprn"].isin(
|
||||
[
|
||||
100020576460,
|
||||
100020624352,
|
||||
]
|
||||
)
|
||||
]
|
||||
# We have slightly too many properties, so we take a random sample of each archetype
|
||||
# achetype_1_size = 20
|
||||
# achetype_2_size = 46
|
||||
# achetype_3_size = 23
|
||||
# achetype_4_size = 13
|
||||
# archetype_1_uprns = asset_list[asset_list["archetype"] == "Archetype 1"]["uprn"].sample(
|
||||
# int(achetype_1_size)
|
||||
# ).tolist()
|
||||
# archetype_2_uprns = asset_list[asset_list["archetype"] == "Archetype 2"]["uprn"].sample(
|
||||
# int(achetype_2_size)
|
||||
# ).tolist()
|
||||
# archetype_3_uprns = asset_list[asset_list["archetype"] == "Archetype 3"]["uprn"].sample(
|
||||
# int(achetype_3_size)
|
||||
# ).tolist()
|
||||
# archetype_4_uprns = asset_list[asset_list["archetype"] == "Archetype 4"]["uprn"].sample(
|
||||
# int(achetype_4_size)
|
||||
# ).tolist()
|
||||
uprns_to_keep = archetype_1_uprns + archetype_2_uprns + archetype_3_uprns + archetype_4_uprns
|
||||
asset_list = asset_list[asset_list["uprn"].isin(uprns_to_keep)]
|
||||
|
||||
filename = f"{USER_ID}/{PORTFOLIO_ID}/inputs.csv"
|
||||
save_csv_to_s3(
|
||||
dataframe=asset_list,
|
||||
bucket_name="retrofit-plan-inputs-dev",
|
||||
file_name=filename
|
||||
)
|
||||
|
||||
body = {
|
||||
"portfolio_id": str(PORTFOLIO_ID),
|
||||
"housing_type": "Social",
|
||||
"goal": "Increase EPC",
|
||||
"goal_value": "C",
|
||||
"trigger_file_path": filename,
|
||||
"budget": None,
|
||||
"exclusions": ["floor_insulation"]
|
||||
}
|
||||
print(body)
|
||||
760
etl/customers/gla_croydon_demo/slides.py
Normal file
760
etl/customers/gla_croydon_demo/slides.py
Normal file
|
|
@ -0,0 +1,760 @@
|
|||
"""
|
||||
This script contains the code to generate the data required to populate the slides
|
||||
We connect to the database amd extract the data for the portfolio needed so it is recommended to use
|
||||
a environment akin to the backend to run this script
|
||||
"""
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from backend.app.db.connection import db_engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from utils.s3 import read_csv_from_s3
|
||||
from etl.customers.slide_utils import (
|
||||
plot_epc_distribution,
|
||||
get_property_details_by_portfolio_id,
|
||||
get_plan_by_portfolio_id,
|
||||
get_properties_with_default_recommendations,
|
||||
create_powerpoint,
|
||||
create_recommendations_summary
|
||||
)
|
||||
from backend.ml_models.AnnualBillSavings import AnnualBillSavings
|
||||
|
||||
USER_ID = 8
|
||||
PORTFOLIO_ID_1 = 67
|
||||
PORTFOLIO_ID_2 = 68
|
||||
EPC_TARGET_1 = "C"
|
||||
EPC_TARGET_2 = "A"
|
||||
SAP_TARGET_1 = 69
|
||||
SAP_TARGET_2 = 100
|
||||
CUSTOMER_KEY = "gla-demo"
|
||||
|
||||
# Sample UPRNS
|
||||
archetype_1_sample = ['100020604138', '200001192253', '100020581792', '100020576940', '200001187196', '100020618060',
|
||||
'100020625813', '100020578756', '100020618076', '200001187197', '100020619814', '100020617489',
|
||||
'100020588913']
|
||||
|
||||
archetype_2_sample = ['100020585002', '100020615603', '100020665652', '100020626800', '100020624347', '100020624348',
|
||||
'100020576459', '10001007455', '100020666716', '100020609610', '100020625451', '100020625597',
|
||||
'100020624351', '100020665634', '100020624350', '100020665640', '100020665632', '100022917303',
|
||||
'100020665656', '10014055968', '100020630285', '100020665638', '100020616325', '100020637405',
|
||||
'100020698027', '100020657902', '100020688226', '100020653786', '100020642337', '100020665643']
|
||||
|
||||
archetype_3_sample = ['100020594652', '100020697787', '100020577523', '100020633162', '100020601138', '100020595611',
|
||||
'100020597485', '100020614883', '100020605342', '100020654671', '100020575611', '100020607980',
|
||||
'200001185785', '100020616446', '100020692380']
|
||||
|
||||
archetype_4_sample = ['100020596436', '100020610165', '200001187539', '100020655500', '100020582907', '100020598277',
|
||||
'100020650607', '100020605116', '100020650603']
|
||||
|
||||
|
||||
def scenario_1():
|
||||
# Connect to database
|
||||
session = sessionmaker(bind=db_engine)()
|
||||
|
||||
########################################################################
|
||||
# Get the data we need
|
||||
########################################################################
|
||||
|
||||
portfolio_id = PORTFOLIO_ID_1
|
||||
|
||||
# Get the asset list
|
||||
asset_list = read_csv_from_s3(
|
||||
"retrofit-plan-inputs-dev", f"{USER_ID}/67/inputs.csv"
|
||||
)
|
||||
asset_list = pd.DataFrame(asset_list)
|
||||
|
||||
# Get the properties for the portfolio
|
||||
properties = get_properties_with_default_recommendations(session, portfolio_id)
|
||||
properties_df = pd.DataFrame(properties)
|
||||
|
||||
# We now pull the data for the property details
|
||||
property_details = get_property_details_by_portfolio_id(session, portfolio_id)
|
||||
property_details_df = pd.DataFrame(property_details)
|
||||
# We estimate bills based on the adjusted_energy_consumption
|
||||
property_details_df["energy_bill"] = property_details_df["adjusted_energy_consumption"].apply(
|
||||
lambda x: AnnualBillSavings.calculate_annual_bill(x)
|
||||
)
|
||||
# Merge on uprn
|
||||
property_details_df = property_details_df.merge(
|
||||
properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
|
||||
on="property_id"
|
||||
)
|
||||
|
||||
plans = get_plan_by_portfolio_id(session, portfolio_id)
|
||||
plans_df = pd.DataFrame(plans)
|
||||
|
||||
# Unnest the recommendations. Each recommendation is a list of dictionaries
|
||||
recommendations_exploded = properties_df["recommendations"].explode().tolist()
|
||||
recommendations_df = pd.DataFrame([r for r in recommendations_exploded if not pd.isnull(r)])
|
||||
# Add uprn on
|
||||
recommendations_df = recommendations_df.merge(
|
||||
properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
|
||||
how="left",
|
||||
on="property_id"
|
||||
)
|
||||
|
||||
recommendations_summary = create_recommendations_summary(
|
||||
recommendations_df,
|
||||
properties_df,
|
||||
property_details_df,
|
||||
SAP_TARGET_1
|
||||
)
|
||||
|
||||
# Calculate % changes of energ, co2 and abs
|
||||
recommendations_summary["carbon_percent_change"] = (
|
||||
recommendations_summary["total_carbon"] / recommendations_summary["current_co2"]
|
||||
)
|
||||
|
||||
recommendations_summary["energy_percent_change"] = (
|
||||
recommendations_summary["adjusted_heat_demand"] / recommendations_summary["current_energy"]
|
||||
)
|
||||
|
||||
recommendations_summary["bills_percent_change"] = (
|
||||
recommendations_summary["total_bill_savings"] / recommendations_summary["current_energy_bill"]
|
||||
)
|
||||
|
||||
########################
|
||||
# Overview
|
||||
########################
|
||||
overview_totals = recommendations_summary.sum()
|
||||
overview_means = recommendations_summary.mean()
|
||||
|
||||
########################
|
||||
# Measures
|
||||
########################
|
||||
measures_count = recommendations_df.groupby("type")["id"].count().reset_index()
|
||||
wall_insulation_measures = measures_count[
|
||||
measures_count["type"].isin(["cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation"])
|
||||
]["id"].sum()
|
||||
ventilation_measures = measures_count[
|
||||
measures_count["type"].isin(["mechanical_ventilation"])
|
||||
]["id"].sum()
|
||||
roof_insulation_measures = measures_count[
|
||||
measures_count["type"].isin(["loft_insulation", "flat_roof_insulation"])
|
||||
]["id"].sum()
|
||||
floor_insulation_measures = measures_count[
|
||||
measures_count["type"].isin(["solid_floor_insulation", "suspended_floor_insulation"])
|
||||
]["id"].sum()
|
||||
windows = measures_count[
|
||||
measures_count["type"].isin(["windows_glazing"])
|
||||
]["id"].sum()
|
||||
heating = measures_count[
|
||||
measures_count["type"].isin(["heating"])
|
||||
]["id"].sum()
|
||||
heating_controls = measures_count[
|
||||
measures_count["type"].isin(["heating_control"])
|
||||
]["id"].sum()
|
||||
solar = measures_count[
|
||||
measures_count["type"].isin(["solar_pv"])
|
||||
]["id"].sum()
|
||||
other = measures_count[
|
||||
~measures_count["type"].isin([
|
||||
"cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation",
|
||||
"loft_insulation", "flat_roof_insulation", "solid_floor_insulation",
|
||||
"suspended_floor_insulation", "windows_glazing", "heating", "heating_control", "solar_pv",
|
||||
"mechanical_ventilation"
|
||||
])
|
||||
]["id"].sum()
|
||||
|
||||
# Summary information by each archetype
|
||||
########################
|
||||
# Archetype 1
|
||||
########################
|
||||
archetype_1 = asset_list[asset_list["archetype"] == "Archetype 1"]
|
||||
recommendations_arch_1_summary = recommendations_summary[
|
||||
recommendations_summary["uprn"].astype(str).isin(archetype_1["uprn"].values)
|
||||
]
|
||||
|
||||
arch_1_property_details = property_details_df[
|
||||
property_details_df["uprn"].astype(str).isin(archetype_1["uprn"].values)
|
||||
]
|
||||
arch_1_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
|
||||
|
||||
# Take the mean, median and maximum of each value
|
||||
cols_to_keep = ["total_cost", "total_carbon", "total_bill_savings", "total_sap_points", "adjusted_heat_demand",
|
||||
"energy_percent_change", "carbon_percent_change", "bills_percent_change"]
|
||||
arch_1_recommendation_min = recommendations_arch_1_summary.min()[cols_to_keep]
|
||||
arch_1_recommendation_max = recommendations_arch_1_summary.max()[cols_to_keep]
|
||||
arch_1_recommendation_means = recommendations_arch_1_summary.mean()[cols_to_keep]
|
||||
arch_1_totals = recommendations_arch_1_summary.sum()[cols_to_keep]
|
||||
|
||||
annual_total_co2 = recommendations_arch_1_summary["total_carbon"].sum()
|
||||
annual_total_bills = recommendations_arch_1_summary["total_bill_savings"].sum()
|
||||
annual_total_energy_savings = recommendations_arch_1_summary["adjusted_heat_demand"].sum()
|
||||
archetype_measures = \
|
||||
recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_1["uprn"].values)].groupby("type")[
|
||||
"id"].count().reset_index()
|
||||
|
||||
cost_text = (f"{round(arch_1_recommendation_means['total_cost'], 2)}: "
|
||||
f"{arch_1_recommendation_min['total_cost']} - {arch_1_recommendation_max['total_cost']}")
|
||||
|
||||
sap_text = (f"{round(arch_1_recommendation_means['total_sap_points'], 2)}: "
|
||||
f"{arch_1_recommendation_min['total_sap_points']} - {arch_1_recommendation_max['total_sap_points']}")
|
||||
|
||||
energy_text = (f"{round(arch_1_recommendation_means['adjusted_heat_demand'], 2)}: "
|
||||
f"{arch_1_recommendation_min['adjusted_heat_demand']} - "
|
||||
f"{arch_1_recommendation_max['adjusted_heat_demand']}")
|
||||
|
||||
energy_percent_text = (f"{round(arch_1_recommendation_means['energy_percent_change'], 2)}: "
|
||||
f"{arch_1_recommendation_min['energy_percent_change']} - "
|
||||
f"{arch_1_recommendation_max['energy_percent_change']}")
|
||||
|
||||
carbon_text = (f"{round(arch_1_recommendation_means['total_carbon'], 2)}: "
|
||||
f"{arch_1_recommendation_min['total_carbon']} - {arch_1_recommendation_max['total_carbon']}")
|
||||
|
||||
carbon_percent_text = (f"{round(arch_1_recommendation_means['carbon_percent_change'], 2)}: "
|
||||
f"{arch_1_recommendation_min['carbon_percent_change']} - "
|
||||
f"{arch_1_recommendation_max['carbon_percent_change']}")
|
||||
|
||||
bill_text = (f"{round(arch_1_recommendation_means['total_bill_savings'], 2)}: "
|
||||
f"{arch_1_recommendation_min['total_bill_savings']} - "
|
||||
f"{arch_1_recommendation_max['total_bill_savings']}")
|
||||
|
||||
bill_percent_text = (f"{round(arch_1_recommendation_means['bills_percent_change'], 2)}: "
|
||||
f"{arch_1_recommendation_min['bills_percent_change']} - "
|
||||
f"{arch_1_recommendation_max['bills_percent_change']}")
|
||||
|
||||
########################
|
||||
# Archetype 2
|
||||
########################
|
||||
archetype_2 = asset_list[asset_list["archetype"] == "Archetype 2"]
|
||||
recommendations_arch_2_summary = recommendations_summary[
|
||||
recommendations_summary["uprn"].astype(str).isin(archetype_2["uprn"].values)
|
||||
]
|
||||
|
||||
arch_2_property_details = property_details_df[
|
||||
property_details_df["uprn"].astype(str).isin(archetype_2["uprn"].values)
|
||||
]
|
||||
arch_2_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
|
||||
|
||||
# Take the mean, median and maximum of each value
|
||||
arch_2_recommendation_min = recommendations_arch_2_summary.min()
|
||||
arch_2_recommendation_max = recommendations_arch_2_summary.max()
|
||||
arch_2_recommendation_means = recommendations_arch_2_summary.mean().round(2)
|
||||
|
||||
total_cost = recommendations_arch_2_summary["total_cost"].sum()
|
||||
annual_total_co2 = recommendations_arch_2_summary["total_carbon"].sum()
|
||||
annual_total_bills = recommendations_arch_2_summary["total_bill_savings"].sum()
|
||||
annual_total_energy_savings = recommendations_arch_2_summary["adjusted_heat_demand"].sum()
|
||||
archetype_measures = \
|
||||
recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_2["uprn"].values)].groupby("type")[
|
||||
"id"].count().reset_index()
|
||||
|
||||
cost_text = (f"{round(arch_2_recommendation_means['total_cost'], 2)}: "
|
||||
f"{arch_2_recommendation_min['total_cost']} - {arch_2_recommendation_max['total_cost']}")
|
||||
|
||||
sap_text = (f"{round(arch_2_recommendation_means['total_sap_points'], 2)}: "
|
||||
f"{arch_2_recommendation_min['total_sap_points']} - {arch_2_recommendation_max['total_sap_points']}")
|
||||
|
||||
energy_text = (f"{round(arch_2_recommendation_means['adjusted_heat_demand'], 2)}: "
|
||||
f"{arch_2_recommendation_min['adjusted_heat_demand']} - "
|
||||
f"{arch_2_recommendation_max['adjusted_heat_demand']}")
|
||||
|
||||
energy_percent_text = (f"{round(arch_2_recommendation_means['energy_percent_change'], 2)}: "
|
||||
f"{arch_2_recommendation_min['energy_percent_change']} - "
|
||||
f"{arch_2_recommendation_max['energy_percent_change']}")
|
||||
|
||||
carbon_text = (f"{round(arch_2_recommendation_means['total_carbon'], 2)}: "
|
||||
f"{arch_2_recommendation_min['total_carbon']} - {arch_2_recommendation_max['total_carbon']}")
|
||||
|
||||
carbon_percent_text = (f"{round(arch_2_recommendation_means['carbon_percent_change'], 2)}: "
|
||||
f"{arch_2_recommendation_min['carbon_percent_change']} - "
|
||||
f"{arch_2_recommendation_max['carbon_percent_change']}")
|
||||
|
||||
bill_text = (f"{round(arch_2_recommendation_means['total_bill_savings'], 2)}: "
|
||||
f"{arch_2_recommendation_min['total_bill_savings']} - "
|
||||
f"{arch_2_recommendation_max['total_bill_savings']}")
|
||||
|
||||
bill_percent_text = (f"{round(arch_2_recommendation_means['bills_percent_change'], 2)}: "
|
||||
f"{arch_2_recommendation_min['bills_percent_change']} - "
|
||||
f"{arch_2_recommendation_max['bills_percent_change']}")
|
||||
|
||||
########################
|
||||
# Archetype 3
|
||||
########################
|
||||
archetype_3 = asset_list[asset_list["archetype"] == "Archetype 3"]
|
||||
recommendations_arch_3_summary = recommendations_summary[
|
||||
recommendations_summary["uprn"].astype(str).isin(archetype_3["uprn"].values)
|
||||
]
|
||||
|
||||
arch_3_property_details = property_details_df[
|
||||
property_details_df["uprn"].astype(str).isin(archetype_3["uprn"].values)
|
||||
]
|
||||
arch_3_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
|
||||
|
||||
# Take the mean, median and maximum of each value
|
||||
arch_3_recommendation_min = recommendations_arch_3_summary.min()
|
||||
arch_3_recommendation_max = recommendations_arch_3_summary.max()
|
||||
arch_3_recommendation_means = recommendations_arch_3_summary.mean()
|
||||
|
||||
total_cost = recommendations_arch_3_summary["total_cost"].sum()
|
||||
annual_total_co2 = recommendations_arch_3_summary["total_carbon"].sum()
|
||||
annual_total_bills = recommendations_arch_3_summary["total_bill_savings"].sum()
|
||||
annual_total_energy_savings = recommendations_arch_3_summary["adjusted_heat_demand"].sum()
|
||||
archetype_measures = \
|
||||
recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_3["uprn"].values)].groupby("type")[
|
||||
"id"].count().reset_index()
|
||||
|
||||
cost_text = (f"{round(arch_3_recommendation_means['total_cost'], 2)}: "
|
||||
f"{arch_3_recommendation_min['total_cost']} - {arch_3_recommendation_max['total_cost']}")
|
||||
|
||||
sap_text = (f"{round(arch_3_recommendation_means['total_sap_points'], 2)}: "
|
||||
f"{arch_3_recommendation_min['total_sap_points']} - {arch_3_recommendation_max['total_sap_points']}")
|
||||
|
||||
energy_text = (f"{round(arch_3_recommendation_means['adjusted_heat_demand'], 2)}: "
|
||||
f"{arch_3_recommendation_min['adjusted_heat_demand']} - "
|
||||
f"{arch_3_recommendation_max['adjusted_heat_demand']}")
|
||||
|
||||
energy_percent_text = (f"{round(arch_3_recommendation_means['energy_percent_change'], 2)}: "
|
||||
f"{arch_3_recommendation_min['energy_percent_change']} - "
|
||||
f"{arch_3_recommendation_max['energy_percent_change']}")
|
||||
|
||||
carbon_text = (f"{round(arch_3_recommendation_means['total_carbon'], 2)}: "
|
||||
f"{arch_3_recommendation_min['total_carbon']} - {arch_3_recommendation_max['total_carbon']}")
|
||||
|
||||
carbon_percent_text = (f"{round(arch_3_recommendation_means['carbon_percent_change'], 2)}: "
|
||||
f"{arch_3_recommendation_min['carbon_percent_change']} - "
|
||||
f"{arch_3_recommendation_max['carbon_percent_change']}")
|
||||
|
||||
bill_text = (f"{round(arch_3_recommendation_means['total_bill_savings'], 2)}: "
|
||||
f"{arch_3_recommendation_min['total_bill_savings']} - "
|
||||
f"{arch_3_recommendation_max['total_bill_savings']}")
|
||||
|
||||
bill_percent_text = (f"{round(arch_3_recommendation_means['bills_percent_change'], 2)}: "
|
||||
f"{arch_3_recommendation_min['bills_percent_change']} - "
|
||||
f"{arch_3_recommendation_max['bills_percent_change']}")
|
||||
|
||||
########################
|
||||
# Archetype 4
|
||||
########################
|
||||
archetype_4 = asset_list[asset_list["archetype"] == "Archetype 4"]
|
||||
recommendations_arch_4_summary = recommendations_summary[
|
||||
recommendations_summary["uprn"].astype(str).isin(archetype_4["uprn"].values)
|
||||
]
|
||||
|
||||
arch_4_property_details = property_details_df[
|
||||
property_details_df["uprn"].astype(str).isin(archetype_4["uprn"].values)
|
||||
]
|
||||
arch_4_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
|
||||
|
||||
# Take the mean, median and maximum of each value
|
||||
arch_4_recommendation_min = recommendations_arch_4_summary.min()
|
||||
arch_4_recommendation_max = recommendations_arch_4_summary.max()
|
||||
arch_4_recommendation_means = recommendations_arch_4_summary.mean()
|
||||
|
||||
total_cost = recommendations_arch_4_summary["total_cost"].sum()
|
||||
annual_total_co2 = recommendations_arch_4_summary["total_carbon"].sum()
|
||||
annual_total_bills = recommendations_arch_4_summary["total_bill_savings"].sum()
|
||||
annual_total_energy_savings = recommendations_arch_4_summary["adjusted_heat_demand"].sum()
|
||||
archetype_measures = \
|
||||
recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_4["uprn"].values)].groupby("type")[
|
||||
"id"].count().reset_index()
|
||||
|
||||
cost_text = (f"{round(arch_4_recommendation_means['total_cost'], 2)}: "
|
||||
f"{arch_4_recommendation_min['total_cost']} - {arch_4_recommendation_max['total_cost']}")
|
||||
|
||||
sap_text = (f"{round(arch_4_recommendation_means['total_sap_points'], 2)}: "
|
||||
f"{arch_4_recommendation_min['total_sap_points']} - {arch_4_recommendation_max['total_sap_points']}")
|
||||
|
||||
energy_text = (f"{round(arch_4_recommendation_means['adjusted_heat_demand'], 2)}: "
|
||||
f"{arch_4_recommendation_min['adjusted_heat_demand']} - "
|
||||
f"{arch_4_recommendation_max['adjusted_heat_demand']}")
|
||||
|
||||
energy_percent_text = (f"{round(arch_4_recommendation_means['energy_percent_change'], 2)}: "
|
||||
f"{arch_4_recommendation_min['energy_percent_change']} - "
|
||||
f"{arch_4_recommendation_max['energy_percent_change']}")
|
||||
|
||||
carbon_text = (f"{round(arch_4_recommendation_means['total_carbon'], 2)}: "
|
||||
f"{arch_4_recommendation_min['total_carbon']} - {arch_4_recommendation_max['total_carbon']}")
|
||||
|
||||
carbon_percent_text = (f"{round(arch_4_recommendation_means['carbon_percent_change'], 2)}: "
|
||||
f"{arch_4_recommendation_min['carbon_percent_change']} - "
|
||||
f"{arch_4_recommendation_max['carbon_percent_change']}")
|
||||
|
||||
bill_text = (f"{round(arch_4_recommendation_means['total_bill_savings'], 2)}: "
|
||||
f"{arch_4_recommendation_min['total_bill_savings']} - "
|
||||
f"{arch_4_recommendation_max['total_bill_savings']}")
|
||||
|
||||
bill_percent_text = (f"{round(arch_4_recommendation_means['bills_percent_change'], 2)}: "
|
||||
f"{arch_4_recommendation_min['bills_percent_change']} - "
|
||||
f"{arch_4_recommendation_max['bills_percent_change']}")
|
||||
|
||||
########################
|
||||
# Overview
|
||||
########################
|
||||
overview_totals = recommendations_summary.sum()
|
||||
|
||||
|
||||
def make_sample():
|
||||
# sample_proportion = 67 / 102
|
||||
# Get the asset list
|
||||
asset_list = read_csv_from_s3(
|
||||
"retrofit-plan-inputs-dev", f"{USER_ID}/67/inputs.csv"
|
||||
)
|
||||
asset_list = pd.DataFrame(asset_list)
|
||||
|
||||
# From the asset list, we deduce how many properties we need
|
||||
# Need to figure out the sizes
|
||||
archetype_1_sample_size = 13
|
||||
archetype_2_sample_size = 30
|
||||
archetype_3_sample_size = 15
|
||||
archetype_4_sample_size = 9
|
||||
|
||||
# We take the sample and we'll keep the uprns static
|
||||
archetype_1_sample = asset_list[
|
||||
asset_list["archetype"] == "Archetype 1"
|
||||
].sample(archetype_1_sample_size)["uprn"].to_list()
|
||||
|
||||
archetype_2_sample = asset_list[
|
||||
asset_list["archetype"] == "Archetype 2"
|
||||
].sample(archetype_2_sample_size)["uprn"].to_list()
|
||||
|
||||
archetype_3_sample = asset_list[
|
||||
asset_list["archetype"] == "Archetype 3"
|
||||
].sample(archetype_3_sample_size)["uprn"].to_list()
|
||||
|
||||
archetype_4_sample = asset_list[
|
||||
asset_list["archetype"] == "Archetype 4"
|
||||
].sample(archetype_4_sample_size)["uprn"].to_list()
|
||||
|
||||
|
||||
def scenario_2():
|
||||
# Connect to database
|
||||
session = sessionmaker(bind=db_engine)()
|
||||
|
||||
########################################################################
|
||||
# Get the data we need
|
||||
########################################################################
|
||||
|
||||
portfolio_id = PORTFOLIO_ID_2
|
||||
|
||||
# Get the asset list
|
||||
asset_list = read_csv_from_s3(
|
||||
"retrofit-plan-inputs-dev", f"{USER_ID}/67/inputs.csv"
|
||||
)
|
||||
asset_list = pd.DataFrame(asset_list)
|
||||
|
||||
sample_uprns = archetype_1_sample + archetype_2_sample + archetype_3_sample + archetype_4_sample
|
||||
|
||||
# Filter on sample uprns
|
||||
asset_list = asset_list[asset_list["uprn"].astype(str).isin(sample_uprns)]
|
||||
|
||||
# Get the properties for the portfolio
|
||||
properties = get_properties_with_default_recommendations(session, portfolio_id)
|
||||
properties_df = pd.DataFrame(properties)
|
||||
properties_df = properties_df[properties_df["uprn"].astype(str).isin(sample_uprns)]
|
||||
|
||||
# We now pull the data for the property details
|
||||
property_details = get_property_details_by_portfolio_id(session, portfolio_id)
|
||||
property_details_df = pd.DataFrame(property_details)
|
||||
property_details_df = property_details_df[property_details_df["property_id"].isin(properties_df["id"].values)]
|
||||
# We estimate bills based on the adjusted_energy_consumption
|
||||
property_details_df["energy_bill"] = property_details_df["adjusted_energy_consumption"].apply(
|
||||
lambda x: AnnualBillSavings.calculate_annual_bill(x)
|
||||
)
|
||||
# Merge on uprn
|
||||
property_details_df = property_details_df.merge(
|
||||
properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
|
||||
on="property_id"
|
||||
)
|
||||
|
||||
plans = get_plan_by_portfolio_id(session, portfolio_id)
|
||||
plans_df = pd.DataFrame(plans)
|
||||
|
||||
# Unnest the recommendations. Each recommendation is a list of dictionaries
|
||||
recommendations_exploded = properties_df["recommendations"].explode().tolist()
|
||||
recommendations_df = pd.DataFrame([r for r in recommendations_exploded if not pd.isnull(r)])
|
||||
# Add uprn on
|
||||
recommendations_df = recommendations_df.merge(
|
||||
properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
|
||||
how="left",
|
||||
on="property_id"
|
||||
)
|
||||
|
||||
recommendations_summary = create_recommendations_summary(
|
||||
recommendations_df,
|
||||
properties_df,
|
||||
property_details_df,
|
||||
SAP_TARGET_1
|
||||
)
|
||||
|
||||
# Calculate % changes of energ, co2 and abs
|
||||
recommendations_summary["carbon_percent_change"] = (
|
||||
recommendations_summary["total_carbon"] / recommendations_summary["current_co2"]
|
||||
)
|
||||
|
||||
recommendations_summary["energy_percent_change"] = (
|
||||
recommendations_summary["adjusted_heat_demand"] / recommendations_summary["current_energy"]
|
||||
)
|
||||
|
||||
recommendations_summary["bills_percent_change"] = (
|
||||
recommendations_summary["total_bill_savings"] / recommendations_summary["current_energy_bill"]
|
||||
)
|
||||
|
||||
########################
|
||||
# Overview
|
||||
########################
|
||||
overview_totals = recommendations_summary.sum()
|
||||
overview_means = recommendations_summary.mean()
|
||||
|
||||
########################
|
||||
# Measures
|
||||
########################
|
||||
measures_count = recommendations_df.groupby("type")["id"].count().reset_index()
|
||||
wall_insulation_measures = measures_count[
|
||||
measures_count["type"].isin(["cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation"])
|
||||
]["id"].sum()
|
||||
ventilation_measures = measures_count[
|
||||
measures_count["type"].isin(["mechanical_ventilation"])
|
||||
]["id"].sum()
|
||||
roof_insulation_measures = measures_count[
|
||||
measures_count["type"].isin(["loft_insulation", "flat_roof_insulation"])
|
||||
]["id"].sum()
|
||||
floor_insulation_measures = measures_count[
|
||||
measures_count["type"].isin(["solid_floor_insulation", "suspended_floor_insulation"])
|
||||
]["id"].sum()
|
||||
windows = measures_count[
|
||||
measures_count["type"].isin(["windows_glazing"])
|
||||
]["id"].sum()
|
||||
heating = measures_count[
|
||||
measures_count["type"].isin(["heating"])
|
||||
]["id"].sum()
|
||||
heating_controls = measures_count[
|
||||
measures_count["type"].isin(["heating_control"])
|
||||
]["id"].sum()
|
||||
solar = measures_count[
|
||||
measures_count["type"].isin(["solar_pv"])
|
||||
]["id"].sum()
|
||||
other = measures_count[
|
||||
~measures_count["type"].isin([
|
||||
"cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation",
|
||||
"loft_insulation", "flat_roof_insulation", "solid_floor_insulation",
|
||||
"suspended_floor_insulation", "windows_glazing", "heating", "heating_control", "solar_pv",
|
||||
"mechanical_ventilation"
|
||||
])
|
||||
]["id"].sum()
|
||||
|
||||
z = recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_3_sample)]
|
||||
|
||||
recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_3_sample)]["type"].value_counts()
|
||||
|
||||
# Summary information by each archetype
|
||||
########################
|
||||
# Archetype 1
|
||||
########################
|
||||
archetype_1 = asset_list[asset_list["archetype"] == "Archetype 1"]
|
||||
recommendations_arch_1_summary = recommendations_summary[
|
||||
recommendations_summary["uprn"].astype(str).isin(archetype_1["uprn"].values)
|
||||
]
|
||||
|
||||
arch_1_property_details = property_details_df[
|
||||
property_details_df["uprn"].astype(str).isin(archetype_1["uprn"].values)
|
||||
]
|
||||
arch_1_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
|
||||
|
||||
# Take the mean, median and maximum of each value
|
||||
arch_1_recommendation_min = recommendations_arch_1_summary.min()
|
||||
arch_1_recommendation_max = recommendations_arch_1_summary.max()
|
||||
arch_1_recommendation_means = recommendations_arch_1_summary.mean()
|
||||
|
||||
arch_1_totals = recommendations_arch_1_summary.sum()
|
||||
|
||||
annual_total_co2 = recommendations_arch_1_summary["total_carbon"].sum()
|
||||
annual_total_bills = recommendations_arch_1_summary["total_bill_savings"].sum()
|
||||
annual_total_energy_savings = recommendations_arch_1_summary["adjusted_heat_demand"].sum()
|
||||
archetype_measures = \
|
||||
recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_1["uprn"].values)].groupby("type")[
|
||||
"id"].count().reset_index()
|
||||
|
||||
cost_text = (f"{round(arch_1_recommendation_means['total_cost'], 2)}: "
|
||||
f"{arch_1_recommendation_min['total_cost']} - {arch_1_recommendation_max['total_cost']}")
|
||||
|
||||
sap_text = (f"{round(arch_1_recommendation_means['total_sap_points'], 2)}: "
|
||||
f"{arch_1_recommendation_min['total_sap_points']} - {arch_1_recommendation_max['total_sap_points']}")
|
||||
|
||||
energy_text = (f"{round(arch_1_recommendation_means['adjusted_heat_demand'], 2)}: "
|
||||
f"{arch_1_recommendation_min['adjusted_heat_demand']} - "
|
||||
f"{arch_1_recommendation_max['adjusted_heat_demand']}")
|
||||
|
||||
energy_percent_text = (f"{round(arch_1_recommendation_means['energy_percent_change'], 2)}: "
|
||||
f"{arch_1_recommendation_min['energy_percent_change']} - "
|
||||
f"{arch_1_recommendation_max['energy_percent_change']}")
|
||||
|
||||
carbon_text = (f"{round(arch_1_recommendation_means['total_carbon'], 2)}: "
|
||||
f"{arch_1_recommendation_min['total_carbon']} - {arch_1_recommendation_max['total_carbon']}")
|
||||
|
||||
carbon_percent_text = (f"{round(arch_1_recommendation_means['carbon_percent_change'], 2)}: "
|
||||
f"{arch_1_recommendation_min['carbon_percent_change']} - "
|
||||
f"{arch_1_recommendation_max['carbon_percent_change']}")
|
||||
|
||||
bill_text = (f"{round(arch_1_recommendation_means['total_bill_savings'], 2)}: "
|
||||
f"{arch_1_recommendation_min['total_bill_savings']} - "
|
||||
f"{arch_1_recommendation_max['total_bill_savings']}")
|
||||
|
||||
bill_percent_text = (f"{round(arch_1_recommendation_means['bills_percent_change'], 2)}: "
|
||||
f"{arch_1_recommendation_min['bills_percent_change']} - "
|
||||
f"{arch_1_recommendation_max['bills_percent_change']}")
|
||||
|
||||
########################
|
||||
# Archetype 2
|
||||
########################
|
||||
archetype_2 = asset_list[asset_list["archetype"] == "Archetype 2"]
|
||||
recommendations_arch_2_summary = recommendations_summary[
|
||||
recommendations_summary["uprn"].astype(str).isin(archetype_2["uprn"].values)
|
||||
]
|
||||
|
||||
arch_2_property_details = property_details_df[
|
||||
property_details_df["uprn"].astype(str).isin(archetype_2["uprn"].values)
|
||||
]
|
||||
arch_2_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
|
||||
|
||||
# Take the mean, median and maximum of each value
|
||||
arch_2_recommendation_min = recommendations_arch_2_summary.min()
|
||||
arch_2_recommendation_max = recommendations_arch_2_summary.max()
|
||||
arch_2_recommendation_means = recommendations_arch_2_summary.mean().round(2)
|
||||
|
||||
total_cost = recommendations_arch_2_summary["total_cost"].sum()
|
||||
annual_total_co2 = recommendations_arch_2_summary["total_carbon"].sum()
|
||||
annual_total_bills = recommendations_arch_2_summary["total_bill_savings"].sum()
|
||||
annual_total_energy_savings = recommendations_arch_2_summary["adjusted_heat_demand"].sum()
|
||||
archetype_measures = \
|
||||
recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_2["uprn"].values)].groupby("type")[
|
||||
"id"].count().reset_index()
|
||||
|
||||
cost_text = (f"{round(arch_2_recommendation_means['total_cost'], 2)}: "
|
||||
f"{arch_2_recommendation_min['total_cost']} - {arch_2_recommendation_max['total_cost']}")
|
||||
|
||||
sap_text = (f"{round(arch_2_recommendation_means['total_sap_points'], 2)}: "
|
||||
f"{arch_2_recommendation_min['total_sap_points']} - {arch_2_recommendation_max['total_sap_points']}")
|
||||
|
||||
energy_text = (f"{round(arch_2_recommendation_means['adjusted_heat_demand'], 2)}: "
|
||||
f"{arch_2_recommendation_min['adjusted_heat_demand']} - "
|
||||
f"{arch_2_recommendation_max['adjusted_heat_demand']}")
|
||||
|
||||
energy_percent_text = (f"{round(arch_2_recommendation_means['energy_percent_change'], 2)}: "
|
||||
f"{arch_2_recommendation_min['energy_percent_change']} - "
|
||||
f"{arch_2_recommendation_max['energy_percent_change']}")
|
||||
|
||||
carbon_text = (f"{round(arch_2_recommendation_means['total_carbon'], 2)}: "
|
||||
f"{arch_2_recommendation_min['total_carbon']} - {arch_2_recommendation_max['total_carbon']}")
|
||||
|
||||
carbon_percent_text = (f"{round(arch_2_recommendation_means['carbon_percent_change'], 2)}: "
|
||||
f"{arch_2_recommendation_min['carbon_percent_change']} - "
|
||||
f"{arch_2_recommendation_max['carbon_percent_change']}")
|
||||
|
||||
bill_text = (f"{round(arch_2_recommendation_means['total_bill_savings'], 2)}: "
|
||||
f"{arch_2_recommendation_min['total_bill_savings']} - "
|
||||
f"{arch_2_recommendation_max['total_bill_savings']}")
|
||||
|
||||
bill_percent_text = (f"{round(arch_2_recommendation_means['bills_percent_change'], 2)}: "
|
||||
f"{arch_2_recommendation_min['bills_percent_change']} - "
|
||||
f"{arch_2_recommendation_max['bills_percent_change']}")
|
||||
|
||||
########################
|
||||
# Archetype 3
|
||||
########################
|
||||
archetype_3 = asset_list[asset_list["archetype"] == "Archetype 3"]
|
||||
recommendations_arch_3_summary = recommendations_summary[
|
||||
recommendations_summary["uprn"].astype(str).isin(archetype_3["uprn"].values)
|
||||
]
|
||||
|
||||
arch_3_property_details = property_details_df[
|
||||
property_details_df["uprn"].astype(str).isin(archetype_3["uprn"].values)
|
||||
]
|
||||
arch_3_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
|
||||
|
||||
# Take the mean, median and maximum of each value
|
||||
arch_3_recommendation_min = recommendations_arch_3_summary.min()
|
||||
arch_3_recommendation_max = recommendations_arch_3_summary.max()
|
||||
arch_3_recommendation_means = recommendations_arch_3_summary.mean()
|
||||
|
||||
total_cost = recommendations_arch_3_summary["total_cost"].sum()
|
||||
annual_total_co2 = recommendations_arch_3_summary["total_carbon"].sum()
|
||||
annual_total_bills = recommendations_arch_3_summary["total_bill_savings"].sum()
|
||||
annual_total_energy_savings = recommendations_arch_3_summary["adjusted_heat_demand"].sum()
|
||||
archetype_measures = \
|
||||
recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_3["uprn"].values)].groupby("type")[
|
||||
"id"].count().reset_index()
|
||||
|
||||
cost_text = (f"{round(arch_3_recommendation_means['total_cost'], 2)}: "
|
||||
f"{arch_3_recommendation_min['total_cost']} - {arch_3_recommendation_max['total_cost']}")
|
||||
|
||||
sap_text = (f"{round(arch_3_recommendation_means['total_sap_points'], 2)}: "
|
||||
f"{arch_3_recommendation_min['total_sap_points']} - {arch_3_recommendation_max['total_sap_points']}")
|
||||
|
||||
energy_text = (f"{round(arch_3_recommendation_means['adjusted_heat_demand'], 2)}: "
|
||||
f"{arch_3_recommendation_min['adjusted_heat_demand']} - "
|
||||
f"{arch_3_recommendation_max['adjusted_heat_demand']}")
|
||||
|
||||
energy_percent_text = (f"{round(arch_3_recommendation_means['energy_percent_change'], 2)}: "
|
||||
f"{arch_3_recommendation_min['energy_percent_change']} - "
|
||||
f"{arch_3_recommendation_max['energy_percent_change']}")
|
||||
|
||||
carbon_text = (f"{round(arch_3_recommendation_means['total_carbon'], 2)}: "
|
||||
f"{arch_3_recommendation_min['total_carbon']} - {arch_3_recommendation_max['total_carbon']}")
|
||||
|
||||
carbon_percent_text = (f"{round(arch_3_recommendation_means['carbon_percent_change'], 2)}: "
|
||||
f"{arch_3_recommendation_min['carbon_percent_change']} - "
|
||||
f"{arch_3_recommendation_max['carbon_percent_change']}")
|
||||
|
||||
bill_text = (f"{round(arch_3_recommendation_means['total_bill_savings'], 2)}: "
|
||||
f"{arch_3_recommendation_min['total_bill_savings']} - "
|
||||
f"{arch_3_recommendation_max['total_bill_savings']}")
|
||||
|
||||
bill_percent_text = (f"{round(arch_3_recommendation_means['bills_percent_change'], 2)}: "
|
||||
f"{arch_3_recommendation_min['bills_percent_change']} - "
|
||||
f"{arch_3_recommendation_max['bills_percent_change']}")
|
||||
|
||||
########################
|
||||
# Archetype 4
|
||||
########################
|
||||
archetype_4 = asset_list[asset_list["archetype"] == "Archetype 4"]
|
||||
recommendations_arch_4_summary = recommendations_summary[
|
||||
recommendations_summary["uprn"].astype(str).isin(archetype_4["uprn"].values)
|
||||
]
|
||||
|
||||
arch_4_property_details = property_details_df[
|
||||
property_details_df["uprn"].astype(str).isin(archetype_4["uprn"].values)
|
||||
]
|
||||
arch_4_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
|
||||
|
||||
# Take the mean, median and maximum of each value
|
||||
arch_4_recommendation_min = recommendations_arch_4_summary.min()
|
||||
arch_4_recommendation_max = recommendations_arch_4_summary.max()
|
||||
arch_4_recommendation_means = recommendations_arch_4_summary.mean()
|
||||
|
||||
total_cost = recommendations_arch_4_summary["total_cost"].sum()
|
||||
annual_total_co2 = recommendations_arch_4_summary["total_carbon"].sum()
|
||||
annual_total_bills = recommendations_arch_4_summary["total_bill_savings"].sum()
|
||||
annual_total_energy_savings = recommendations_arch_4_summary["adjusted_heat_demand"].sum()
|
||||
archetype_measures = \
|
||||
recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_4["uprn"].values)].groupby("type")[
|
||||
"id"].count().reset_index()
|
||||
|
||||
cost_text = (f"{round(arch_4_recommendation_means['total_cost'], 2)}: "
|
||||
f"{arch_4_recommendation_min['total_cost']} - {arch_4_recommendation_max['total_cost']}")
|
||||
|
||||
sap_text = (f"{round(arch_4_recommendation_means['total_sap_points'], 2)}: "
|
||||
f"{arch_4_recommendation_min['total_sap_points']} - {arch_4_recommendation_max['total_sap_points']}")
|
||||
|
||||
energy_text = (f"{round(arch_4_recommendation_means['adjusted_heat_demand'], 2)}: "
|
||||
f"{arch_4_recommendation_min['adjusted_heat_demand']} - "
|
||||
f"{arch_4_recommendation_max['adjusted_heat_demand']}")
|
||||
|
||||
energy_percent_text = (f"{round(arch_4_recommendation_means['energy_percent_change'], 2)}: "
|
||||
f"{arch_4_recommendation_min['energy_percent_change']} - "
|
||||
f"{arch_4_recommendation_max['energy_percent_change']}")
|
||||
|
||||
carbon_text = (f"{round(arch_4_recommendation_means['total_carbon'], 2)}: "
|
||||
f"{arch_4_recommendation_min['total_carbon']} - {arch_4_recommendation_max['total_carbon']}")
|
||||
|
||||
carbon_percent_text = (f"{round(arch_4_recommendation_means['carbon_percent_change'], 2)}: "
|
||||
f"{arch_4_recommendation_min['carbon_percent_change']} - "
|
||||
f"{arch_4_recommendation_max['carbon_percent_change']}")
|
||||
|
||||
bill_text = (f"{round(arch_4_recommendation_means['total_bill_savings'], 2)}: "
|
||||
f"{arch_4_recommendation_min['total_bill_savings']} - "
|
||||
f"{arch_4_recommendation_max['total_bill_savings']}")
|
||||
|
||||
bill_percent_text = (f"{round(arch_4_recommendation_means['bills_percent_change'], 2)}: "
|
||||
f"{arch_4_recommendation_min['bills_percent_change']} - "
|
||||
f"{arch_4_recommendation_max['bills_percent_change']}")
|
||||
129
etl/customers/immo/pilot/asset_list.py
Normal file
129
etl/customers/immo/pilot/asset_list.py
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
import pandas as pd
|
||||
from utils.s3 import read_excel_from_s3
|
||||
from utils.s3 import save_csv_to_s3
|
||||
|
||||
USER_ID = 8
|
||||
PORTFOLIO_ID = 70
|
||||
|
||||
council_tax_bands = [
|
||||
{'address': '8 Corporation Road', 'postcode': 'DY2 7PX', 'band': 'A'},
|
||||
{'address': '21 Wells Road', 'postcode': 'DY5 3TB', 'band': 'A'},
|
||||
{'address': '27 Milton Road', 'postcode': 'WV14 8HZ', 'band': 'A'},
|
||||
{'address': '195 Ashenhurst Road', 'postcode': 'DY1 2JB', 'band': 'A'},
|
||||
{'address': '53 Bromley', 'postcode': 'DY5 4PJ', 'band': 'A'},
|
||||
{'address': '91 Osprey Drive', 'postcode': 'DY1 2JS', 'band': 'B'},
|
||||
{'address': '47 Fairfield Road', 'postcode': 'DY8 5UJ', 'band': 'B'},
|
||||
{'address': '150 Huntingtree Road', 'postcode': 'B63 4HP', 'band': 'C'},
|
||||
{'address': '6 Beech Road', 'postcode': 'DY1 4BP', 'band': 'A'},
|
||||
{'address': '5 Oaklands', 'postcode': 'B62 0JA', 'band': 'A'},
|
||||
]
|
||||
council_tax_bands = pd.DataFrame(council_tax_bands)
|
||||
|
||||
# This is information we need to override on the EPC itself, for instance if a new survey has been conducted and
|
||||
# that has not reached the API
|
||||
patches = [
|
||||
{
|
||||
'address': '6 Beech Road', 'postcode': 'DY1 4BP',
|
||||
'walls-description': 'Cavity wall, filled cavity',
|
||||
'walls-energy-eff': 'Good',
|
||||
'roof-description': 'Pitched, 12 mm loft insulation',
|
||||
'roof-energy-eff': 'Very Poor',
|
||||
'windows-description': 'Fully double glazed',
|
||||
'windows-energy-eff': 'Good',
|
||||
'mainheat-description': 'Room heaters, electric',
|
||||
'mainheat-energy-eff': 'Very Poor',
|
||||
'mainheatcont-description': 'Appliance thermostats',
|
||||
'mainheatc-energy-eff': 'Good',
|
||||
'lighting-description': 'Low energy lighting in 25% of fixed outlets',
|
||||
'lighting-energy-eff': 'Good',
|
||||
'floor-description': 'Solid, no insulation (assumed)',
|
||||
'secondheat-description': 'None',
|
||||
'current-energy-efficiency': '32',
|
||||
'energy-consumption-current': '491',
|
||||
'co2-emissions-current': '5.0',
|
||||
'potential-energy-efficiency': '87'
|
||||
}
|
||||
]
|
||||
|
||||
# This is information that is found as a result of the non-invasives, that mean that certain measures
|
||||
# have been installed already. To reflect this in the front end, it is included in the recommendation, however
|
||||
# the cost is removed and instead, a message is presented saying that the measure is already installed.
|
||||
already_installed = [
|
||||
{
|
||||
'address': '5 Oaklands',
|
||||
'postcode': 'B62 0JA',
|
||||
"already_installed": ["windows_glazing"]
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def app():
|
||||
raw_asset_list = read_excel_from_s3(
|
||||
bucket_name="retrofit-datalake-dev",
|
||||
file_key="customers/Immo/IMMO Sample Assets_Dudley.xlsx",
|
||||
header_row=0
|
||||
)
|
||||
raw_asset_list = raw_asset_list.drop(columns=["Unnamed: 0"])
|
||||
# Extract address and postcode
|
||||
raw_asset_list["address"] = raw_asset_list["Full Address"].str.split(",").str[0]
|
||||
raw_asset_list["postcode"] = raw_asset_list["Full Address"].str.split(",").str[-1].str.strip()
|
||||
|
||||
asset_list = raw_asset_list.merge(council_tax_bands, how="left", on=["address", "postcode"])
|
||||
|
||||
# We're provided with number of bathrooms and number of bedrooms.
|
||||
asset_list = asset_list.rename(
|
||||
columns={
|
||||
"No. of Beds": "n_bedrooms",
|
||||
"No. of WC's": "n_bathrooms"
|
||||
}
|
||||
)
|
||||
|
||||
# Store the asset list in s3
|
||||
filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
|
||||
save_csv_to_s3(
|
||||
dataframe=asset_list,
|
||||
bucket_name="retrofit-plan-inputs-dev",
|
||||
file_name=filename
|
||||
)
|
||||
|
||||
# Store overrides in s3
|
||||
already_installed_filename = f"{USER_ID}/{PORTFOLIO_ID}/already_installed.json"
|
||||
save_csv_to_s3(
|
||||
dataframe=pd.DataFrame(already_installed),
|
||||
bucket_name="retrofit-plan-inputs-dev",
|
||||
file_name=already_installed_filename
|
||||
)
|
||||
|
||||
# Store patches in s3
|
||||
patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.json"
|
||||
save_csv_to_s3(
|
||||
dataframe=pd.DataFrame(patches),
|
||||
bucket_name="retrofit-plan-inputs-dev",
|
||||
file_name=patches_filename
|
||||
)
|
||||
|
||||
# EPC C portoflio
|
||||
body = {
|
||||
"portfolio_id": str(PORTFOLIO_ID),
|
||||
"housing_type": "Private",
|
||||
"goal": "Increase EPC",
|
||||
"goal_value": "C",
|
||||
"trigger_file_path": filename,
|
||||
"already_installed_file_path": already_installed_filename,
|
||||
"patches_file_path": patches_filename,
|
||||
"budget": None,
|
||||
}
|
||||
print(body)
|
||||
|
||||
# EPC B portoflio
|
||||
body = {
|
||||
"portfolio_id": str(PORTFOLIO_ID + 1),
|
||||
"housing_type": "Private",
|
||||
"goal": "Increase EPC",
|
||||
"goal_value": "B",
|
||||
"trigger_file_path": filename,
|
||||
"already_installed_file_path": already_installed_filename,
|
||||
"patches_file_path": patches_filename,
|
||||
"budget": None,
|
||||
}
|
||||
print(body)
|
||||
210
etl/customers/immo/pilot/non_invasive.py
Normal file
210
etl/customers/immo/pilot/non_invasive.py
Normal file
|
|
@ -0,0 +1,210 @@
|
|||
# import extract_msg
|
||||
from datetime import datetime
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from backend.app.db.connection import db_engine
|
||||
from backend.app.db.functions.non_intrusive_surveys import upload_non_intrusive_survey_notes
|
||||
|
||||
|
||||
def parse_msg_body(text):
|
||||
# Split the text into lines
|
||||
lines = text.split('\r\n')
|
||||
|
||||
# Dictionary to hold the parsed data
|
||||
data = {}
|
||||
|
||||
# Process each line
|
||||
for line in lines:
|
||||
# Remove all asterisks and extra whitespace
|
||||
clean_line = line.replace('*', '').strip()
|
||||
|
||||
if clean_line: # Ensure the line is not empty after cleaning
|
||||
# Attempt to split clean '=' if present
|
||||
if '=' in clean_line:
|
||||
clean_line = clean_line.replace(' = ', ': ')
|
||||
|
||||
# Use line content as a key with a default value indicating presence
|
||||
# Generate a unique key for lines without '='
|
||||
data[f"Info{len(data) + 1}"] = clean_line
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def app():
|
||||
"""
|
||||
This code retrieves the results of the non-invasive surveys, to be stored in S3
|
||||
:return:
|
||||
"""
|
||||
|
||||
# filepath = ("/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/5 Oaklands B62 "
|
||||
# "0JA/Immo - 5 Oaklands Halesowen B62 0JA.msg")
|
||||
# filepath = ("/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/6 Beech Rd DY1 "
|
||||
# "4BP/IMMO - 6 Beech Road Dudley DY1 4BP.msg")
|
||||
# filepath = (
|
||||
# "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/8 Corporation Rd DY2 "
|
||||
# "7PX/IMMO - 8 Corporation Road Dudley DY2 7PX.msg"
|
||||
# )
|
||||
# filepath = (
|
||||
# "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/21 Wells Rd DY5 3TB/"
|
||||
# "IMMO - 21 Wells Road Brierley Hill DY5 3TB.msg"
|
||||
# )
|
||||
# filepath = (
|
||||
# "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/47 Fairfield Rd DY8 "
|
||||
# "5UJ/IMMO - 47 Fairfield Road Wordsley Stourbridge DY8 5UJ.msg"
|
||||
# )
|
||||
# filepath = (
|
||||
# "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/91 Osprey Drive DY1 "
|
||||
# "2JS/IMMO - 91 Osprey Drive Dudley DY1 2JS.msg"
|
||||
# )
|
||||
# filepath = (
|
||||
# "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/195 Ashenhurst Rd DY1 "
|
||||
# "2JB/IMMO - 195 Ashenhurst Road Dudley DY1 2JB.msg"
|
||||
# )
|
||||
# filepath = (
|
||||
# "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/27 Milton Rd DY1 2JB/IMMO "
|
||||
# "- 27 Milton Road Coseley Bilston WV14 8HZ.msg"
|
||||
# )
|
||||
#
|
||||
# with extract_msg.Message(filepath) as msg:
|
||||
# body = msg.body
|
||||
#
|
||||
# from pprint import pprint
|
||||
# pprint(parse_msg_body(body))
|
||||
|
||||
# We manually create the non-invasive notes for the pilot
|
||||
non_invasive_notes = [
|
||||
{
|
||||
'uprn': 90028499,
|
||||
# 'address': '5 Oaklands',
|
||||
# 'postcode': 'B62 0JA',
|
||||
'surveyor': 'Carl Fitzgerald - The Warmfront Team',
|
||||
'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
|
||||
'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
|
||||
'CIGA check and extracting the cavity, replacing with bead insulation. '
|
||||
'There is a shared alleyway with the neighbour, that is a solid brick wall.',
|
||||
'Wall Render': 'Partial render between top of ground floor window and bottom of 1st floor window',
|
||||
'Existing solar PV': 'No existing solar',
|
||||
'Orientation': 'Front house direction: North East, Back house direction: South West',
|
||||
'Access to mains?': 'Property has access to the mains',
|
||||
},
|
||||
{
|
||||
'uprn': 90055152,
|
||||
# 'address': '6 Beech Road',
|
||||
# 'postcode': 'DY1 4BP',
|
||||
'surveyor': 'Carl Fitzgerald - The Warmfront Team',
|
||||
'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
|
||||
'Wall Insulation': '1st floor is solid brick with external wall insulation. 2nd floor is cavity, '
|
||||
'retro drilled, containing loose fibre insulation. Consider getting a '
|
||||
'CIGA check and extracting the cavity, replacing with bead insulation.',
|
||||
'Wall Render': None,
|
||||
'Existing solar PV': 'No existing solar',
|
||||
'Orientation': 'Side house direction: North East',
|
||||
'Access to mains?': 'Property has access to the mains',
|
||||
},
|
||||
{
|
||||
'uprn': 90070461,
|
||||
# 'address': '8 Corporation Road',
|
||||
# 'postcode': 'DY2 7PX',
|
||||
'surveyor': 'Carl Fitzgerald - The Warmfront Team',
|
||||
'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
|
||||
'Wall Insulation': "External wall insulation",
|
||||
'Wall Render': "Render finish throughout",
|
||||
'Existing solar PV': 'No existing solar',
|
||||
'Orientation': 'Front house direction: North East, Back house direction: South West',
|
||||
'Access to mains?': None,
|
||||
},
|
||||
{
|
||||
'uprn': 90022227,
|
||||
# 'address': '21 Wells Road',
|
||||
# 'postcode': 'DY5 3TB',
|
||||
'surveyor': 'Carl Fitzgerald - The Warmfront Team',
|
||||
'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
|
||||
'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
|
||||
'CIGA check and extracting the cavity, replacing with bead insulation.',
|
||||
'Wall Render': None,
|
||||
'Existing solar PV': 'No existing solar',
|
||||
'Orientation': 'Front house direction: East, Back house direction: West',
|
||||
'Access to mains?': 'Property has access to the mains',
|
||||
},
|
||||
{
|
||||
'uprn': 90077535,
|
||||
# 'address': '47 Fairfield Road',
|
||||
# 'postcode': 'DY8 5UJ',
|
||||
'surveyor': 'Carl Fitzgerald - The Warmfront Team',
|
||||
'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
|
||||
'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
|
||||
'CIGA check and extracting the cavity, replacing with bead insulation.',
|
||||
'Wall Render': None,
|
||||
'Existing solar PV': 'No existing solar',
|
||||
'Orientation': 'Front house direction: East, Back house direction: West',
|
||||
'Access to mains?': 'Property has access to the mains',
|
||||
},
|
||||
{
|
||||
'uprn': 90060989,
|
||||
# 'address': '53 Bromley',
|
||||
# 'postcode': 'DY5 4PJ',
|
||||
'surveyor': 'Carl Fitzgerald - The Warmfront Team',
|
||||
'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
|
||||
'Wall Insulation': "Filled at build, partially filled - celotex/king board, 50mm cavity remaining - "
|
||||
"recommends a cavity wall fill",
|
||||
"Roof": "Hipped roof",
|
||||
'Existing solar PV': 'No existing solar',
|
||||
'Orientation': "Front house direction: North, Back house direction: South, Side house direction: West",
|
||||
'Access to mains?': 'Property has access to the mains',
|
||||
},
|
||||
{
|
||||
'uprn': 90048026,
|
||||
# 'address': '91 Osprey Drive',
|
||||
# 'postcode': 'DY1 2JS',
|
||||
'surveyor': 'Carl Fitzgerald - The Warmfront Team',
|
||||
'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
|
||||
'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
|
||||
'CIGA check and extracting the cavity, replacing with bead insulation.',
|
||||
'Wall Render': 'Tile hung front and rear of property',
|
||||
'Existing solar PV': 'No existing solar',
|
||||
'Orientation': 'Side house direction: East',
|
||||
'Access to mains?': 'Property has access to the mains',
|
||||
},
|
||||
{
|
||||
'uprn': 90093693,
|
||||
# 'address': '150 Huntingtree Road',
|
||||
# 'postcode': 'B63 4HP',
|
||||
'surveyor': 'Carl Fitzgerald - The Warmfront Team',
|
||||
'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
|
||||
'Heating': 'Electric (storage heaters)',
|
||||
'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
|
||||
'CIGA check and extracting the cavity, replacing with bead insulation.',
|
||||
"Roof": "Hipped roof",
|
||||
'Existing solar PV': 'No existing solar',
|
||||
'Orientation': "Front house direction: North West, Back house direction: South East, Side house direction: "
|
||||
"North East",
|
||||
},
|
||||
{
|
||||
'uprn': 90051858,
|
||||
# 'address': '195 Ashenhurst Road',
|
||||
# 'postcode': 'DY1 2JB',
|
||||
'surveyor': 'Carl Fitzgerald - The Warmfront Team',
|
||||
'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
|
||||
'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
|
||||
'CIGA check and extracting the cavity, replacing with bead insulation.',
|
||||
'Wall Render': "Solid render front and rear of property",
|
||||
'Existing solar PV': 'No existing solar',
|
||||
'Orientation': 'Front house direction: South, Back house direction: North',
|
||||
'Access to mains?': 'Property has access to the mains',
|
||||
},
|
||||
{
|
||||
'uprn': 90106884,
|
||||
# 'address': '27 Milton Road',
|
||||
# 'postcode': 'WV14 8HZ',
|
||||
'surveyor': 'Carl Fitzgerald - The Warmfront Team',
|
||||
'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
|
||||
'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
|
||||
'CIGA check and extracting the cavity, replacing with bead insulation.',
|
||||
'Wall Render': "Solid render front and rear of property",
|
||||
'Existing solar PV': 'No existing solar',
|
||||
'Orientation': 'Front house direction: South East, Back house direction: North West',
|
||||
'Access to mains?': 'Property has access to the mains',
|
||||
},
|
||||
]
|
||||
|
||||
session = sessionmaker(bind=db_engine)()
|
||||
upload_non_intrusive_survey_notes(session=session, non_invasive_notes=non_invasive_notes, batch_size=500)
|
||||
1
etl/customers/immo/pilot/requirements.txt
Normal file
1
etl/customers/immo/pilot/requirements.txt
Normal file
|
|
@ -0,0 +1 @@
|
|||
extract-msg
|
||||
293
etl/customers/slide_utils.py
Normal file
293
etl/customers/slide_utils.py
Normal file
|
|
@ -0,0 +1,293 @@
|
|||
from pptx.enum.text import PP_ALIGN # NOQA
|
||||
from pptx import Presentation
|
||||
from pptx.util import Inches, Pt
|
||||
import matplotlib.pyplot as plt
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy.sql import true
|
||||
from backend.app.db.utils import row2dict
|
||||
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
|
||||
from backend.app.db.models.recommendations import Recommendation
|
||||
from backend.app.db.models.recommendations import Plan
|
||||
from backend.app.utils import sap_to_epc
|
||||
|
||||
EPC_COLOURS = {
|
||||
"A": "#028051",
|
||||
"B": "#14b759",
|
||||
"C": "#8ecd46",
|
||||
"D": "#fdd401",
|
||||
"E": "#fdab67",
|
||||
"F": "#ee8023",
|
||||
"G": "#e71437"
|
||||
}
|
||||
|
||||
|
||||
def get_properties_with_default_recommendations(session: Session, portfolio_id: int):
|
||||
"""
|
||||
Fetch properties for a given portfolio_id along with their default recommendations,
|
||||
ensuring that all properties are retrieved even if they don't have recommendations
|
||||
where default is True.
|
||||
|
||||
:param session: The SQLAlchemy session used to execute the query.
|
||||
:param portfolio_id: The ID of the portfolio for which to retrieve properties and recommendations.
|
||||
:return: A list of dictionaries, where each dictionary represents a property including
|
||||
its associated default recommendations if any.
|
||||
"""
|
||||
# Adjust the join to correctly filter recommendations while including all properties
|
||||
query = session.query(PropertyModel, Recommendation).outerjoin(Recommendation,
|
||||
(Recommendation.property_id == PropertyModel.id) & (
|
||||
Recommendation.default == true())) \
|
||||
.filter(PropertyModel.portfolio_id == portfolio_id) \
|
||||
.all()
|
||||
|
||||
properties = {}
|
||||
for property, recommendation in query:
|
||||
# Ensure the property is added once with an empty list of recommendations initially
|
||||
if property.id not in properties:
|
||||
properties[property.id] = row2dict(property)
|
||||
properties[property.id]['recommendations'] = []
|
||||
|
||||
# Append recommendations if they exist and meet the criteria (already filtered by the query)
|
||||
if recommendation and recommendation.default:
|
||||
properties[property.id]['recommendations'].append(row2dict(recommendation))
|
||||
|
||||
return list(properties.values())
|
||||
|
||||
|
||||
def get_property_details_by_portfolio_id(session: Session, portfolio_id: int):
|
||||
"""
|
||||
This function retrieves all property details associated with a given portfolio_id.
|
||||
|
||||
:param session: The SQLAlchemy session used to execute the query.
|
||||
:param portfolio_id: The ID of the portfolio for which to retrieve property details.
|
||||
:return: A list of dictionaries, where each dictionary represents a property's details.
|
||||
Returns an empty list if no property details are found.
|
||||
"""
|
||||
property_details = session.query(PropertyDetailsEpcModel).filter(
|
||||
PropertyDetailsEpcModel.portfolio_id == portfolio_id).all()
|
||||
|
||||
# Convert the SQLAlchemy objects to dictionaries
|
||||
property_details_dict = [row2dict(pd) for pd in property_details] if property_details else []
|
||||
|
||||
return property_details_dict
|
||||
|
||||
|
||||
def get_plan_by_portfolio_id(session: Session, portfolio_id: int):
|
||||
"""
|
||||
This function retrieves all plans associated with a given portfolio_id.
|
||||
|
||||
:param session: The SQLAlchemy session used to execute the query.
|
||||
:param portfolio_id: The ID of the portfolio for which to retrieve plans.
|
||||
:return: A list of dictionaries, where each dictionary represents a plan.
|
||||
Returns an empty list if no plans are found.
|
||||
"""
|
||||
plans = session.query(Plan).filter(Plan.portfolio_id == portfolio_id).all()
|
||||
|
||||
# Convert the SQLAlchemy objects to dictionaries
|
||||
plans_dict = [row2dict(plan) for plan in plans] if plans else []
|
||||
|
||||
return plans_dict
|
||||
|
||||
|
||||
def plot_epc_distribution(df, customer_key, title='Your Units', background_color='white', bar_height=0.4, font_size=15):
|
||||
"""
|
||||
Plots a horizontal bar chart of EPC rating distribution with adjustable bar thickness and text sizes.
|
||||
Allows setting the plot background color and dynamically adjusts text size and bar spacing.
|
||||
|
||||
:param df: DataFrame with columns ['current_epc_rating', 'count', 'percentage']
|
||||
:param title: Title of the plot
|
||||
:param background_color: Background color of the plot
|
||||
:param bar_height: Thickness of the bars (default 0.4)
|
||||
:param font_size: Base font size for text annotations (default 15)
|
||||
"""
|
||||
# Calculate dynamic figure size or adjust based on preferences
|
||||
square_size = max(6, len(df) * 0.6) # Ensure minimum size and adjust based on number of entries
|
||||
fig, ax = plt.subplots(figsize=(square_size, square_size))
|
||||
fig.patch.set_facecolor(background_color) # Set figure background color
|
||||
ax.set_facecolor(background_color) # Set axes background color
|
||||
|
||||
df['percentage'] = df['percentage'].round(1) # Round the percentage values to 1 decimal place
|
||||
df_sorted = df.sort_values('percentage', ascending=True)
|
||||
|
||||
# Plot bars with specified height for adjustable thickness
|
||||
bars = ax.barh(df_sorted['current_epc_rating'], df_sorted['percentage'],
|
||||
color=df_sorted['current_epc_rating'].map(EPC_COLOURS), edgecolor='none', height=bar_height)
|
||||
|
||||
epc_rating_font_size = font_size * 2 # EPC rating font size larger than base font size
|
||||
count_percentage_font_size = font_size # Count (percentage) font size as base font size
|
||||
|
||||
# Annotate bars with EPC ratings inside and count with percentage values outside
|
||||
for index, bar in enumerate(bars):
|
||||
width = bar.get_width()
|
||||
epc_rating = df_sorted.iloc[index]['current_epc_rating']
|
||||
count = df_sorted.iloc[index]['count']
|
||||
percentage = df_sorted.iloc[index]['percentage']
|
||||
|
||||
# EPC rating inside the bar with increased font size
|
||||
ax.text(width - (width * 0.05), bar.get_y() + bar.get_height() / 2,
|
||||
f"{epc_rating}", va='center', ha='right', color='white', fontsize=epc_rating_font_size)
|
||||
|
||||
# Count and percentage outside the bar, original font size
|
||||
ax.text(width + 1, bar.get_y() + bar.get_height() / 2,
|
||||
f"{count} ({percentage}%)", va='center', color='black', fontsize=count_percentage_font_size)
|
||||
|
||||
ax.set_title(title, fontsize=font_size * 1.2) # Adjust title font size proportionally
|
||||
ax.tick_params(axis='x', which='both', bottom=False, top=False,
|
||||
labelbottom=False) # Remove x-axis tick marks and values
|
||||
ax.tick_params(axis='y', which='both', left=False, right=False,
|
||||
labelleft=False) # Remove y-axis tick marks and labels
|
||||
ax.spines['top'].set_visible(False) # Remove top spine
|
||||
ax.spines['right'].set_visible(False) # Remove right spine
|
||||
ax.spines['left'].set_visible(False) # Remove left spine
|
||||
ax.spines['bottom'].set_visible(False) # Remove bottom spine
|
||||
|
||||
plt.tight_layout() # Adjust layout
|
||||
plt.show()
|
||||
|
||||
# Save the figure as an image
|
||||
figure_path = f'etl/customers/{customer_key}/epc_distribution_plot.png'
|
||||
fig.savefig(figure_path, bbox_inches='tight')
|
||||
plt.close(fig) # Close the figure to free memory
|
||||
|
||||
return fig, figure_path
|
||||
|
||||
|
||||
def save_plot_to_image(figure, path='plot.png'):
|
||||
"""
|
||||
Saves a matplotlib figure to an image file for insertion into PowerPoint.
|
||||
"""
|
||||
figure.savefig(path, bbox_inches='tight')
|
||||
plt.close(figure)
|
||||
|
||||
|
||||
def save_figure_as_image(figure, filename='temp_plot.png'):
|
||||
"""
|
||||
Saves a matplotlib figure to an image file.
|
||||
"""
|
||||
figure.savefig(filename, dpi=300)
|
||||
plt.close(figure) # Close the figure to prevent it from displaying in notebooks or Python environments
|
||||
|
||||
|
||||
def add_commentary_with_bullets(slide, commentary, top_inches, left_inches=Inches(1), width_inches=Inches(8),
|
||||
height_inches=Inches(2)):
|
||||
"""
|
||||
Adds commentary with bullet points to a slide.
|
||||
|
||||
:param slide: The slide object to add the commentary to.
|
||||
:param commentary: The commentary text, with sections separated by newlines for bullet points.
|
||||
:param top_inches: The top position of the commentary text box.
|
||||
:param left_inches: The left position of the commentary text box.
|
||||
:param width_inches: The width of the commentary text box.
|
||||
:param height_inches: The height of the commentary text box.
|
||||
"""
|
||||
txBox = slide.shapes.add_textbox(left_inches, top_inches, width_inches, height_inches)
|
||||
tf = txBox.text_frame
|
||||
|
||||
# Configure text frame
|
||||
tf.word_wrap = True
|
||||
tf.auto_size = True
|
||||
tf.paragraphs[0].alignment = PP_ALIGN.LEFT
|
||||
|
||||
# Split the commentary into sections for bullet points
|
||||
sections = commentary.split("\n")
|
||||
|
||||
for i, section in enumerate(sections):
|
||||
if i > 0:
|
||||
p = tf.add_paragraph() # Add a new paragraph for each section after the first
|
||||
else:
|
||||
p = tf.paragraphs[0] # Use the first paragraph for the first section
|
||||
p.text = section
|
||||
p.space_after = Pt(14) # Adjust space after each bullet point as needed
|
||||
p.font.size = Pt(14) # Adjust font size as needed
|
||||
p.level = 0 # Bullet level, can be adjusted for nested bullets
|
||||
p.space_before = Pt(0)
|
||||
|
||||
|
||||
def add_slide_with_image(prs, title, img_path=None, commentary=None):
|
||||
"""
|
||||
Adds a slide with an image (if provided) and optional commentary. If no image is provided,
|
||||
places the commentary text in the middle of the slide.
|
||||
"""
|
||||
slide_layout = prs.slide_layouts[5] # Title and Content layout
|
||||
slide = prs.slides.add_slide(slide_layout)
|
||||
title_placeholder = slide.shapes.title
|
||||
title_placeholder.text = title
|
||||
|
||||
# Determine the position of the commentary text box based on whether an image is included
|
||||
if img_path:
|
||||
# Add the image
|
||||
slide.shapes.add_picture(img_path, Inches(1), Inches(1.5), Inches(8), Inches(4.5))
|
||||
# Position for commentary when image is present
|
||||
commentary_top = Inches(6)
|
||||
else:
|
||||
# Position for commentary when image is not present (centered vertically)
|
||||
commentary_top = Inches(3)
|
||||
|
||||
# Add commentary if provided
|
||||
if commentary:
|
||||
add_commentary_with_bullets(slide, commentary, commentary_top)
|
||||
|
||||
|
||||
def create_powerpoint(data, save_location):
|
||||
"""
|
||||
Creates a PowerPoint presentation based on provided data and optional commentaries.
|
||||
|
||||
:param data: A dictionary containing the data needed for each slide.
|
||||
:param save_location: The file path where the PowerPoint presentation will be saved.
|
||||
"""
|
||||
prs = Presentation()
|
||||
|
||||
for slide, slide_data in data.items():
|
||||
slide_figure_path = data[slide].get('image_path')
|
||||
text = data[slide].get('text')
|
||||
title = data[slide].get('title', "")
|
||||
add_slide_with_image(prs, title, slide_figure_path, text)
|
||||
|
||||
# Save the presentation
|
||||
prs.save(save_location)
|
||||
|
||||
|
||||
def create_recommendations_summary(recommendations_df, properties_df, property_details_df, sap_target):
|
||||
# Aggregate the impact of the recommendations
|
||||
# We want:
|
||||
# Total number of sap points
|
||||
# total valuation impact
|
||||
# total bill savings
|
||||
# total cost
|
||||
# Total Co2 impact
|
||||
recommendations_summary = recommendations_df.groupby(["property_id"]).agg(
|
||||
total_sap_points=("sap_points", "sum"),
|
||||
total_valuation_impact=("property_valuation_increase", "sum"),
|
||||
total_bill_savings=("energy_cost_savings", "sum"),
|
||||
total_cost=("estimated_cost", "sum"),
|
||||
total_carbon=("co2_equivalent_savings", "sum"),
|
||||
adjusted_heat_demand=("adjusted_heat_demand", "sum")
|
||||
).reset_index()
|
||||
# Merge on current sap points, current CO2, current adjusted_heat_demand, current annual bill
|
||||
recommendations_summary = recommendations_summary.merge(
|
||||
properties_df[["id", "uprn", "current_sap_points"]].rename(columns={"id": "property_id"}), on="property_id",
|
||||
how="left"
|
||||
)
|
||||
|
||||
recommendations_summary["expected_sap_points"] = (
|
||||
recommendations_summary["current_sap_points"] + recommendations_summary["total_sap_points"]
|
||||
)
|
||||
recommendations_summary["expected_epc_rating"] = recommendations_summary["expected_sap_points"].apply(
|
||||
lambda x: sap_to_epc(x)
|
||||
)
|
||||
recommendations_summary["sap_difference"] = sap_target - recommendations_summary["expected_sap_points"]
|
||||
|
||||
if property_details_df is not None:
|
||||
recommendations_summary = recommendations_summary.merge(
|
||||
property_details_df[["uprn", "co2_emissions", "adjusted_energy_consumption", "energy_bill"]].rename(
|
||||
columns={
|
||||
"id": "property_id",
|
||||
"co2_emissions": "current_co2",
|
||||
"adjusted_energy_consumption": "current_energy",
|
||||
"energy_bill": "current_energy_bill"
|
||||
}
|
||||
),
|
||||
on="uprn",
|
||||
how="left"
|
||||
)
|
||||
|
||||
return recommendations_summary
|
||||
195
etl/customers/urban_splash/asset_list.py
Normal file
195
etl/customers/urban_splash/asset_list.py
Normal file
|
|
@ -0,0 +1,195 @@
|
|||
import os
|
||||
|
||||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from utils.s3 import read_excel_from_s3
|
||||
from backend.SearchEpc import SearchEpc
|
||||
from epc_api.client import EpcClient
|
||||
from utils.s3 import save_csv_to_s3
|
||||
|
||||
# Read in the .env file in backend
|
||||
load_dotenv(dotenv_path="backend/.env")
|
||||
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
||||
|
||||
USER_ID = 8
|
||||
PORTFOLIO_ID = 66
|
||||
SECOND_SCENARIO_PORTFOLIO_ID = 65
|
||||
|
||||
# We also create a second portfolio for a subset of properties that do not meet the install requirements
|
||||
# We drop these uprns from the first plan
|
||||
second_portfolio_uprns = [
|
||||
10070056840, 10070056846, 10070056847, 10070056843, 10070056848, 10070056844, 10070056849,
|
||||
10070056829, 10070056920, 10023345463
|
||||
]
|
||||
|
||||
|
||||
def app():
|
||||
"""
|
||||
This application will read in the Urban Splash data, in the dev AWS account, and pre-process it. There are a
|
||||
few issues with the file, including incorrect postcodes.
|
||||
|
||||
The customer is interested in the following:
|
||||
- Getting properties to an EPC C
|
||||
- Doing do within a budget of £5,000
|
||||
:return:
|
||||
"""
|
||||
|
||||
potential_postcodes = ["BD9 5BQ", "BD9 5BR", "BD9 5BN"]
|
||||
|
||||
raw_asset_list = read_excel_from_s3(
|
||||
bucket_name="retrofit-datalake-dev",
|
||||
file_key="customers/urban_splash/raw_asset_list/USRF - Velvet Mill EPC.xlsx",
|
||||
header_row=2
|
||||
)
|
||||
|
||||
# We have a series of apartment numbers that are "Apartment 001", "Apartment 002", etc. We need to convert these
|
||||
# to "Apartment 1", "Apartment 2", etc.
|
||||
raw_asset_list["address1"] = raw_asset_list["Unit Number"].str.replace(
|
||||
"Apartment 00", "Apartment ", regex=True
|
||||
)
|
||||
raw_asset_list["address1"] = raw_asset_list["address1"].str.replace(
|
||||
"Apartment 0", "Apartment ", regex=True
|
||||
)
|
||||
|
||||
# For each entry in the asset list, we make an api call to the EPC database to get the EPC data. We'll retrieve the
|
||||
# uprn for the property, as well as a nice address and postcode that we can use. We'll also try and deduce the
|
||||
# likely wall construction, since many of the homes are new builds, based on their newest EPC
|
||||
|
||||
epc_data = []
|
||||
processed_asset_list = []
|
||||
for _, row in tqdm(raw_asset_list.iterrows(), total=len(raw_asset_list)):
|
||||
|
||||
newest_epc = None
|
||||
idx = 0
|
||||
|
||||
while newest_epc is None:
|
||||
postcode = potential_postcodes[idx]
|
||||
searcher = SearchEpc(
|
||||
address1=row.address1, postcode=postcode, auth_token=EPC_AUTH_TOKEN, os_api_key=""
|
||||
)
|
||||
searcher.find_property(skip_os=True)
|
||||
|
||||
if searcher.newest_epc is None:
|
||||
if idx == len(potential_postcodes) - 1:
|
||||
break
|
||||
idx += 1
|
||||
else:
|
||||
newest_epc = searcher.newest_epc
|
||||
|
||||
if newest_epc is None:
|
||||
raise Exception("FX ME")
|
||||
|
||||
if row["Beds"] == "Studio":
|
||||
number_heated_rooms = 2
|
||||
number_habitable_rooms = 2
|
||||
else:
|
||||
# Assume one room for communal space, one room for bathroom
|
||||
number_heated_rooms = row["Beds"] + 2
|
||||
number_habitable_rooms = row["Beds"] + 2
|
||||
|
||||
to_append = {
|
||||
**row.to_dict(),
|
||||
"uprn": newest_epc["uprn"],
|
||||
"address": newest_epc["address1"],
|
||||
"postcode": newest_epc["postcode"],
|
||||
# "walls-description": newest_epc["walls-description"],
|
||||
# "roof-description": newest_epc["roof-description"],
|
||||
# "floor-description": newest_epc["floor-description"],
|
||||
# "total-floor-area": newest_epc["total-floor-area"],
|
||||
"full-address": newest_epc["address"],
|
||||
"number-heated-rooms": number_heated_rooms,
|
||||
"number-habitable-rooms": number_habitable_rooms,
|
||||
}
|
||||
|
||||
processed_asset_list.append(to_append)
|
||||
epc_data.append(newest_epc)
|
||||
|
||||
processed_asset_list_df = pd.DataFrame(processed_asset_list)
|
||||
|
||||
epc_data_df = pd.DataFrame(epc_data)
|
||||
|
||||
# We store this data
|
||||
# Store the data in s3
|
||||
filename = f"{USER_ID}/{PORTFOLIO_ID}/test_inputs.csv"
|
||||
save_csv_to_s3(
|
||||
dataframe=processed_asset_list_df[
|
||||
~processed_asset_list_df["uprn"].astype(int).isin(second_portfolio_uprns)
|
||||
],
|
||||
bucket_name="retrofit-plan-inputs-dev",
|
||||
file_name=filename
|
||||
)
|
||||
|
||||
body = {
|
||||
"portfolio_id": str(PORTFOLIO_ID),
|
||||
"housing_type": "Private",
|
||||
"goal": "Increase EPC",
|
||||
"goal_value": "C",
|
||||
"trigger_file_path": filename,
|
||||
"budget": None,
|
||||
}
|
||||
print(body)
|
||||
|
||||
subset = processed_asset_list_df[
|
||||
processed_asset_list_df["uprn"].astype(int).isin(second_portfolio_uprns)
|
||||
]
|
||||
|
||||
filename2 = f"{USER_ID}/{SECOND_SCENARIO_PORTFOLIO_ID}/test_inputs.csv"
|
||||
save_csv_to_s3(
|
||||
dataframe=subset,
|
||||
bucket_name="retrofit-plan-inputs-dev",
|
||||
file_name=filename2
|
||||
)
|
||||
|
||||
body = {
|
||||
"portfolio_id": str(SECOND_SCENARIO_PORTFOLIO_ID),
|
||||
"housing_type": "Private",
|
||||
"goal": "Increase EPC",
|
||||
"goal_value": "C",
|
||||
"trigger_file_path": filename,
|
||||
"budget": None,
|
||||
}
|
||||
print(body)
|
||||
|
||||
# Some basic analysis on the heating, heating controls and hot water systems
|
||||
|
||||
# All of the heating systems are rated very poor, poor or average. When it's average, they are all also
|
||||
# "Room heaters, electric", but the house has "Programmer and appliance thermostats" for the heating controls.
|
||||
# which is more efficient
|
||||
pd.set_option('display.max_rows', 500)
|
||||
pd.set_option('display.max_columns', 500)
|
||||
pd.set_option('display.width', 1000)
|
||||
|
||||
# Heating
|
||||
print(epc_data_df[["mainheat-description", "mainheatcont-description", "mainheat-energy-eff"]].drop_duplicates())
|
||||
# mainheat-description mainheatcont-description mainheat-energy-eff
|
||||
# 0 Room heaters, electric Programmer and room thermostat Very Poor
|
||||
# 12 Room heaters, electric Programmer and appliance thermostats Average
|
||||
# 20 Electric storage heaters, radiators Celect-type controls Poor
|
||||
|
||||
# Hot water
|
||||
print(epc_data_df[["hotwater-description", "hot-water-energy-eff"]].drop_duplicates())
|
||||
# hotwater-description hot-water-energy-eff
|
||||
# 0 Electric immersion, standard tariff Very Poor
|
||||
# 12 Electric immersion, off-peak Average
|
||||
|
||||
# We now retrieve EPCS for all of the properties that are in these postcodes very obviously for the velvet mill
|
||||
# We'll use this information to get a sense of the likely wall/roof/floor construction for the properties
|
||||
|
||||
# client = EpcClient(auth_token=EPC_AUTH_TOKEN)
|
||||
#
|
||||
# neighbouring_epcs = []
|
||||
# for pc in potential_postcodes:
|
||||
# response = client.domestic.search(params={"postcode": pc}, size=1000)
|
||||
# data = response["rows"]
|
||||
#
|
||||
# # keep just rows that are clearly for the velvet mill
|
||||
# data = [x for x in data if "velvet" in x["address1"].lower()]
|
||||
#
|
||||
# neighbouring_epcs.extend(data)
|
||||
#
|
||||
# neighbouring_epcs_df = pd.DataFrame(neighbouring_epcs)
|
||||
# neighbouring_epcs_df["walls-description"].value_counts()
|
||||
# neighbouring_epcs_df["roof-description"].value_counts()
|
||||
# neighbouring_epcs_df["floor-description"].value_counts()
|
||||
352
etl/customers/urban_splash/slides.py
Normal file
352
etl/customers/urban_splash/slides.py
Normal file
|
|
@ -0,0 +1,352 @@
|
|||
"""
|
||||
This script contains the code to generate the data required to populate the slides
|
||||
We connect to the database amd extract the data for the portfolio needed so it is recommended to use
|
||||
a environment akin to the backend to run this script
|
||||
"""
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from backend.app.db.connection import db_engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from etl.customers.slide_utils import (
|
||||
plot_epc_distribution,
|
||||
get_property_details_by_portfolio_id,
|
||||
get_plan_by_portfolio_id,
|
||||
get_properties_with_default_recommendations,
|
||||
create_powerpoint,
|
||||
create_recommendations_summary
|
||||
)
|
||||
|
||||
PORTFOLIO_ID = 66
|
||||
SECOND_SCENARIO_PORTFOLIO_ID = 65
|
||||
EPC_TARGET = "C"
|
||||
SAP_TARGET = 69
|
||||
CUSTOMER_KEY = "urban_splash"
|
||||
|
||||
|
||||
def app():
|
||||
# Connect to database
|
||||
session = sessionmaker(bind=db_engine)()
|
||||
|
||||
########################################################################
|
||||
# Get the data we need
|
||||
########################################################################
|
||||
|
||||
# Get the properties for the portfolio
|
||||
properties = get_properties_with_default_recommendations(session, PORTFOLIO_ID)
|
||||
properties_df = pd.DataFrame(properties)
|
||||
|
||||
# We now pull the data for the property details
|
||||
property_details = get_property_details_by_portfolio_id(session, PORTFOLIO_ID)
|
||||
property_details_df = pd.DataFrame(property_details)
|
||||
# Merge on uprn
|
||||
property_details_df = property_details_df.merge(
|
||||
properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
|
||||
on="property_id"
|
||||
)
|
||||
|
||||
plans = get_plan_by_portfolio_id(session, PORTFOLIO_ID)
|
||||
plans_df = pd.DataFrame(plans)
|
||||
|
||||
# Unnest the recommendations. Each recommendation is a list of dictionaries
|
||||
recommendations_exploded = properties_df["recommendations"].explode().tolist()
|
||||
recommendations_df = pd.DataFrame([r for r in recommendations_exploded if not pd.isnull(r)])
|
||||
|
||||
recommendations_summary = create_recommendations_summary(recommendations_df, properties_df, SAP_TARGET)
|
||||
|
||||
# Get the data for the second scenario portfolio
|
||||
properties_second_scenario = get_properties_with_default_recommendations(session, SECOND_SCENARIO_PORTFOLIO_ID)
|
||||
properties_second_scenario_df = pd.DataFrame(properties_second_scenario)
|
||||
|
||||
propert_details_second_scenario = get_property_details_by_portfolio_id(session, SECOND_SCENARIO_PORTFOLIO_ID)
|
||||
property_details_second_scenario_df = pd.DataFrame(propert_details_second_scenario)
|
||||
# Merge on uprn
|
||||
property_details_second_scenario_df = property_details_second_scenario_df.merge(
|
||||
properties_second_scenario_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
|
||||
on="property_id"
|
||||
)
|
||||
|
||||
plans_second_scenario = get_plan_by_portfolio_id(session, SECOND_SCENARIO_PORTFOLIO_ID)
|
||||
plans_second_scenario_df = pd.DataFrame(plans_second_scenario)
|
||||
# Merge on uprn so we can compare properties across portfolios
|
||||
plans_second_scenario_df = plans_second_scenario_df.merge(
|
||||
properties_second_scenario_df[["uprn", "id"]].rename(columns={"id": "property_id"}), on="property_id"
|
||||
)
|
||||
|
||||
recommendations_exploded_second_scenario = properties_second_scenario_df["recommendations"].explode().tolist()
|
||||
recommendations_second_scenario_df = pd.DataFrame(
|
||||
[r for r in recommendations_exploded_second_scenario if not pd.isnull(r)]
|
||||
)
|
||||
|
||||
recommendations_summary_second_scenario = create_recommendations_summary(
|
||||
recommendations_second_scenario_df, properties_second_scenario_df, SAP_TARGET
|
||||
)
|
||||
|
||||
# Combine the data for both scenarios
|
||||
full_property_details = pd.concat([property_details_df, property_details_second_scenario_df])
|
||||
full_properties = pd.concat([properties_df, properties_second_scenario_df])
|
||||
|
||||
epc_rating_summary = full_properties.groupby("current_epc_rating").size().reset_index(name="count")
|
||||
epc_rating_summary["percentage"] = epc_rating_summary["count"] / epc_rating_summary["count"].sum() * 100
|
||||
|
||||
########################################################################
|
||||
# We pull out the data for the slides
|
||||
########################################################################
|
||||
|
||||
############
|
||||
# Slide 1:
|
||||
############
|
||||
# visual
|
||||
epc_plot, figure_path = plot_epc_distribution(
|
||||
epc_rating_summary, CUSTOMER_KEY, title="", background_color="white", bar_height=0.75, font_size=15
|
||||
)
|
||||
|
||||
# floor area - upper and lower bounds
|
||||
|
||||
# Take just properties that are below EPC C
|
||||
properties_needing_work = full_properties[
|
||||
full_properties["current_sap_points"] < SAP_TARGET
|
||||
]
|
||||
property_details_needing_work = full_property_details[
|
||||
full_property_details["uprn"].isin(properties_needing_work["uprn"])
|
||||
]
|
||||
|
||||
min_area, max_area, average_area = (
|
||||
full_property_details["total_floor_area"].min(),
|
||||
full_property_details["total_floor_area"].max(),
|
||||
full_property_details["total_floor_area"].mean()
|
||||
)
|
||||
|
||||
# Annual energy consumption - upper and lower bounds
|
||||
min_energy_consumption, max_energy_consumption, average_consumption, total_consumption = (
|
||||
property_details_needing_work["adjusted_energy_consumption"].min(),
|
||||
property_details_needing_work["adjusted_energy_consumption"].max(),
|
||||
property_details_needing_work["adjusted_energy_consumption"].mean(),
|
||||
property_details_needing_work["adjusted_energy_consumption"].sum()
|
||||
)
|
||||
|
||||
# Co2 emissions - upper and lower bounds
|
||||
min_co2, max_co2, average_co2, total_co2 = (
|
||||
property_details_needing_work["co2_emissions"].min(),
|
||||
property_details_needing_work["co2_emissions"].max(),
|
||||
property_details_needing_work["co2_emissions"].mean(),
|
||||
property_details_needing_work["co2_emissions"].sum()
|
||||
)
|
||||
|
||||
# Valuation: upper and lower bounds and average - take positive values in case we have just a sample
|
||||
valuation_df = properties_df[properties_df["current_valuation"] > 0]
|
||||
min_valuation, max_valuation, average_valuation = (
|
||||
valuation_df["current_valuation"].min(),
|
||||
valuation_df["current_valuation"].max(),
|
||||
valuation_df["current_valuation"].median()
|
||||
)
|
||||
|
||||
recommendations_df.keys()
|
||||
|
||||
slide_1_commentary = (
|
||||
f"Floor areas range from {min_area} to {max_area} square meters, with an average of {average_area} square "
|
||||
f"meters. \n"
|
||||
f"Annual energy consumption ranges from {min_energy_consumption} to {max_energy_consumption} kWh, with an "
|
||||
f"average of {average_consumption} kWh. \n"
|
||||
f"CO2 emissions range from {min_co2} to {max_co2} tonnes, with an average of {average_co2} tonnes. \n"
|
||||
f"Valuations range from £{min_valuation} to £{max_valuation} £, with an average of £"
|
||||
f"{average_valuation}.\n"
|
||||
)
|
||||
|
||||
############
|
||||
# Slide 2:
|
||||
############
|
||||
# What it would take to hit EPC C
|
||||
|
||||
# We calculate the number of units that will make it to an EPC C
|
||||
|
||||
units_hitting_target = recommendations_summary[
|
||||
recommendations_summary["expected_epc_rating"] == EPC_TARGET
|
||||
]
|
||||
|
||||
n_units_to_target = units_hitting_target.shape[0]
|
||||
|
||||
measures = "Electrical heating system upgrades & heating controls and Hot water system improvements"
|
||||
|
||||
# Costs
|
||||
(
|
||||
expected_cost_per_unit_lower,
|
||||
expected_cost_per_unit_upper,
|
||||
expected_project_cost,
|
||||
) = (
|
||||
units_hitting_target["total_cost"].min(),
|
||||
units_hitting_target["total_cost"].max(),
|
||||
units_hitting_target["total_cost"].sum()
|
||||
)
|
||||
|
||||
# Per property
|
||||
# Take positive entries just in case we we have a sample
|
||||
valuation_impact_df = plans_df[plans_df["property_id"].isin(units_hitting_target["property_id"])]
|
||||
valuation_impact_df = valuation_impact_df[valuation_impact_df["valuation_increase_lower_bound"] > 0]
|
||||
min_valuation_impact, max_valuation_impact, average_valuation_impact = (
|
||||
valuation_impact_df["valuation_increase_lower_bound"].median(),
|
||||
valuation_impact_df["valuation_increase_upper_bound"].median(),
|
||||
valuation_impact_df["valuation_increase_average"].median()
|
||||
)
|
||||
|
||||
# Bill savings per property
|
||||
min_bill_savings, max_bill_savings, average_bill_savings = (
|
||||
units_hitting_target["total_bill_savings"].min(),
|
||||
units_hitting_target["total_bill_savings"].max(),
|
||||
units_hitting_target["total_bill_savings"].mean()
|
||||
)
|
||||
|
||||
# Total CO2 reduction of portfolio
|
||||
min_co2_reduction, max_co2_reduction, average_co2_reduction, total_co2_reduction = (
|
||||
units_hitting_target["total_carbon"].min(),
|
||||
units_hitting_target["total_carbon"].max(),
|
||||
units_hitting_target["total_carbon"].mean(),
|
||||
units_hitting_target["total_carbon"].sum()
|
||||
)
|
||||
|
||||
slide_2_commentary = (
|
||||
f"{n_units_to_target} units expected to achieve EPC {EPC_TARGET} \n"
|
||||
f"Expected cost: {expected_cost_per_unit_lower} - {expected_cost_per_unit_upper}, total project: £"
|
||||
f"{expected_project_cost}\n"
|
||||
f"Measures include: {measures}\n"
|
||||
f"Valuation increase per property: £{min_valuation_impact}-{max_valuation_impact}, average: £"
|
||||
f"{average_valuation_impact}\n"
|
||||
f"Bill savings per property: £{min_bill_savings}-{max_bill_savings}, average: £{average_bill_savings}\n"
|
||||
f"Total CO2 reduction: {min_co2_reduction}-{max_co2_reduction} tonnes, average: {average_co2_reduction}\n"
|
||||
f"tonnes, total for the {n_units_to_target} properties: {total_co2_reduction} tonnes\n"
|
||||
)
|
||||
|
||||
############
|
||||
# Slide 3:
|
||||
############
|
||||
|
||||
units_missed_target = recommendations_summary_second_scenario.copy()
|
||||
|
||||
n_units_missed_target = units_missed_target.shape[0]
|
||||
|
||||
# How close were the properties that missed the target
|
||||
# We calculate the difference between the expected sap points and the lower bound sap points for the target
|
||||
|
||||
# min_difference, max_difference, average_difference = (
|
||||
# np.ceil(units_missed_target["sap_difference"].min()),
|
||||
# np.ceil(units_missed_target["sap_difference"].max()),
|
||||
# np.ceil(units_missed_target["sap_difference"].mean())
|
||||
# )
|
||||
|
||||
second_scenario_measures = ("Electrical heating system upgrades & heating controls, Hot water system improvements "
|
||||
"and internal wall insulation")
|
||||
|
||||
# Just take all of the units in the second scenario, since they're borderline
|
||||
units_hitting_target_second_scenario = recommendations_summary_second_scenario[
|
||||
# (recommendations_summary_second_scenario["expected_epc_rating"] == EPC_TARGET) &
|
||||
(recommendations_summary_second_scenario["uprn"].isin(units_missed_target["uprn"].values))
|
||||
]
|
||||
|
||||
n_units_hitting_second_scenario = units_hitting_target_second_scenario[
|
||||
units_hitting_target_second_scenario["expected_epc_rating"] == EPC_TARGET
|
||||
].shape[0]
|
||||
|
||||
# Impact on second scenario
|
||||
# Costs
|
||||
(
|
||||
expected_cost_per_unit_lower_second_scenario,
|
||||
expected_cost_per_unit_upper_second_scenario,
|
||||
expected_project_cost_second_scenario,
|
||||
) = (
|
||||
recommendations_summary_second_scenario["total_cost"].min(),
|
||||
recommendations_summary_second_scenario["total_cost"].max(),
|
||||
recommendations_summary_second_scenario["total_cost"].sum()
|
||||
)
|
||||
|
||||
valuation_impact_df_second_scenario = plans_second_scenario_df[
|
||||
plans_second_scenario_df["uprn"].isin(units_hitting_target_second_scenario["uprn"])
|
||||
]
|
||||
valuation_impact_df_second_scenario = valuation_impact_df_second_scenario[
|
||||
valuation_impact_df_second_scenario["valuation_increase_lower_bound"] > 0
|
||||
]
|
||||
(
|
||||
min_valuation_impact_second_scenario,
|
||||
max_valuation_impact_second_scenario,
|
||||
average_valuation_impact_second_scenario
|
||||
) = (
|
||||
valuation_impact_df_second_scenario["valuation_increase_lower_bound"].median(),
|
||||
valuation_impact_df_second_scenario["valuation_increase_upper_bound"].median(),
|
||||
valuation_impact_df_second_scenario["valuation_increase_average"].median()
|
||||
)
|
||||
|
||||
# Bill savings per property
|
||||
min_bill_savings_second_scenario, max_bill_savings_second_scenario, average_bill_savings_second_scenario = (
|
||||
units_hitting_target_second_scenario["total_bill_savings"].min(),
|
||||
units_hitting_target_second_scenario["total_bill_savings"].max(),
|
||||
units_hitting_target_second_scenario["total_bill_savings"].mean()
|
||||
)
|
||||
|
||||
# Total CO2 reduction of portfolio
|
||||
(
|
||||
min_co2_reduction_second_scenario,
|
||||
max_co2_reduction_second_scenario,
|
||||
average_co2_reduction_second_scenario,
|
||||
total_co2_reduction_second_scenario
|
||||
) = (
|
||||
units_hitting_target_second_scenario["total_carbon"].min(),
|
||||
units_hitting_target_second_scenario["total_carbon"].max(),
|
||||
units_hitting_target_second_scenario["total_carbon"].mean(),
|
||||
units_hitting_target_second_scenario["total_carbon"].sum()
|
||||
)
|
||||
|
||||
# Values for the leftovers
|
||||
units_missing_second_scenario = recommendations_summary_second_scenario[
|
||||
(recommendations_summary_second_scenario["expected_epc_rating"] != EPC_TARGET) &
|
||||
(recommendations_summary_second_scenario["uprn"].isin(units_missed_target["uprn"].values))
|
||||
]
|
||||
|
||||
min_difference_second_scenario, max_difference_second_scenario, average_difference_second_scenario = (
|
||||
np.ceil(units_missing_second_scenario["sap_difference"].min()),
|
||||
np.ceil(units_missing_second_scenario["sap_difference"].max()),
|
||||
np.ceil(units_missing_second_scenario["sap_difference"].mean())
|
||||
)
|
||||
|
||||
slide_3_text = (
|
||||
f"{n_units_missed_target} units look like they would miss the EPC {EPC_TARGET} by {min_difference}-"
|
||||
f"{max_difference} points \n"
|
||||
"When on site, an assessor may be able to identify further improvements to bring the properties up to an EPC "
|
||||
f"{EPC_TARGET}.\n"
|
||||
f"We have looked at a more extensive package for these properties, including: {second_scenario_measures}\n"
|
||||
f"Of the {n_units_missed_target} properties, a further {units_hitting_target_second_scenario.shape[0]} are "
|
||||
f"expected to achieve EPC {EPC_TARGET} with these measures.\n"
|
||||
f"Expected cost: {expected_cost_per_unit_lower_second_scenario} - "
|
||||
f"{expected_cost_per_unit_upper_second_scenario}, "
|
||||
f"total project: £"
|
||||
f"{expected_project_cost_second_scenario}\n"
|
||||
f"Valuation increase per property: £{min_valuation_impact_second_scenario}-"
|
||||
f"{max_valuation_impact_second_scenario}, average: £"
|
||||
f"{average_valuation_impact_second_scenario}\n"
|
||||
f"Bill savings per property: £{min_bill_savings_second_scenario}-{max_bill_savings_second_scenario}, "
|
||||
f"average: £{average_bill_savings_second_scenario}\n"
|
||||
f"Total CO2 reduction: {min_co2_reduction_second_scenario}-{max_co2_reduction_second_scenario} tonnes, "
|
||||
f"average: "
|
||||
f"{average_co2_reduction_second_scenario}\n"
|
||||
f"tonnes, total for the {n_units_hitting_second_scenario} properties: {total_co2_reduction_second_scenario} "
|
||||
f"tonnes\n"
|
||||
f"Even in the second scenario, the remaining {units_missing_second_scenario.shape[0]} properties are expected "
|
||||
f"to miss EPC {EPC_TARGET} by {min_difference_second_scenario} point on average - they should be visited by "
|
||||
f"an assessor"
|
||||
)
|
||||
|
||||
slide_data = {
|
||||
'slide_1': {
|
||||
"title": "EPC Rating Distribution",
|
||||
'image_path': figure_path, # Pass the path to the saved image
|
||||
"text": slide_1_commentary
|
||||
},
|
||||
"slide_2": {
|
||||
"title": f"Properties that achieve EPC {EPC_TARGET}",
|
||||
"text": slide_2_commentary,
|
||||
},
|
||||
"slide 3": {
|
||||
"title": f"Properties that miss EPC {EPC_TARGET}",
|
||||
"text": slide_3_text
|
||||
}
|
||||
}
|
||||
|
||||
save_location = f"etl/customers/{CUSTOMER_KEY}/{CUSTOMER_KEY}_tech_slides.pptx"
|
||||
create_powerpoint(slide_data, save_location)
|
||||
787
etl/eligibility/Eligibility.py
Normal file
787
etl/eligibility/Eligibility.py
Normal file
|
|
@ -0,0 +1,787 @@
|
|||
from recommendations.recommendation_utils import convert_thickness_to_numeric
|
||||
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
|
||||
from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes
|
||||
from etl.epc_clean.epc_attributes.FloorAttributes import FloorAttributes
|
||||
|
||||
|
||||
class Eligibility:
|
||||
"""
|
||||
Given the epc data about a property, this class holds the logic for determining if the home
|
||||
is eligible for a specific retrofit measure.
|
||||
|
||||
For example, this could be whether the loft has insulation below a standardised threshold, or
|
||||
if it has an empty cavity
|
||||
|
||||
Further to this, this class is responsible for determining if the property is suitable for specific funding
|
||||
schemes
|
||||
"""
|
||||
|
||||
loft = None
|
||||
cavity = None
|
||||
solid_wall = None
|
||||
room_roof = None
|
||||
flat_roof = None
|
||||
suspended_floor = None
|
||||
solid_floor = None
|
||||
|
||||
# schemes based on Warmfront now
|
||||
gbis_warmfront = None
|
||||
eco4_warmfront = None
|
||||
# Schemes based on full eligibility
|
||||
gbis = None
|
||||
eco4 = None
|
||||
|
||||
# If the loft has less than 100mm of insulation, we classify the home has needing loft insulation
|
||||
LOFT_INSULATION_THRESHOLD = 100
|
||||
HIGH_LOFT_INSULATION_THRESHOLD = 269
|
||||
|
||||
# Because EPCS have different values for tenure, we need to remap them to a common set of values
|
||||
tenure_remap = {
|
||||
'NO DATA!': "unknown",
|
||||
'Not defined - use in the case of a new dwelling for which the intended tenure in not known. It is no':
|
||||
"unknown",
|
||||
'Owner-occupied': 'Owner-occupied',
|
||||
'Rented (private)': 'Rented (private)',
|
||||
'Rented (social)': 'Rented (social)',
|
||||
'owner-occupied': 'Owner-occupied',
|
||||
'rental (private)': 'Rented (private)',
|
||||
'rental (social)': 'Rented (social)',
|
||||
'unknown': "unknown",
|
||||
}
|
||||
|
||||
def __init__(self, epc, cleaned):
|
||||
self.epc = epc
|
||||
self.cleaned = cleaned
|
||||
|
||||
self.walls = self.parse_fabric("walls-description")
|
||||
self.roof = self.parse_fabric("roof-description")
|
||||
self.floor = self.parse_fabric("floor-description")
|
||||
|
||||
self.tenure = self.tenure_remap.get(self.epc["tenure"], None)
|
||||
|
||||
def parse_fabric(self, key):
|
||||
|
||||
# Get the cleaned version of the description
|
||||
remapped = [
|
||||
data for data in self.cleaned[key] if
|
||||
data["original_description"] == self.epc[key]
|
||||
]
|
||||
if remapped:
|
||||
return remapped[0]
|
||||
|
||||
if "SAP05:" in self.epc[key]:
|
||||
# This is a placeholder method for handling this but this will occur in the case of a very old
|
||||
# EPC and therefore we just skip
|
||||
self.epc[key] = "(assumed)"
|
||||
|
||||
if key == "walls-description":
|
||||
cleaner_cls = WallAttributes(self.epc[key])
|
||||
|
||||
elif key == "roof-description":
|
||||
cleaner_cls = RoofAttributes(self.epc[key])
|
||||
|
||||
elif key == "floor-description":
|
||||
cleaner_cls = FloorAttributes(self.epc[key])
|
||||
|
||||
else:
|
||||
raise ValueError("Invalid key")
|
||||
output = cleaner_cls.process()
|
||||
output["clean_description"] = cleaner_cls.description.replace("(assumed)", "").rstrip().capitalize()
|
||||
|
||||
return output
|
||||
|
||||
def loft_insulation(self, loft_thickness_threshold: int = None):
|
||||
"""
|
||||
Given the description of roof, this function determines whether or not the property is suitable for loft
|
||||
insulation. A loft existing insulation with a thickness below loft_thickness_threshold, is deemed to
|
||||
be suitable for loft insulation
|
||||
:param loft_thickness_threshold: Integer, Optional. If provided, any loft found with insulation lower than
|
||||
this thickness is deemed to be suitable for loft insulation. If this
|
||||
parameter is not provided, this method will default to the variable specified
|
||||
in LOFT_INSULATION_THRESHOLD
|
||||
"""
|
||||
|
||||
loft_thickness_threshold = (
|
||||
self.LOFT_INSULATION_THRESHOLD if loft_thickness_threshold is None else loft_thickness_threshold
|
||||
)
|
||||
|
||||
high_loft_thickness_threshold = self.HIGH_LOFT_INSULATION_THRESHOLD
|
||||
|
||||
# We firstly check if the roof is a loft
|
||||
is_loft = self.roof["is_pitched"] and (not self.roof["is_roof_room"])
|
||||
|
||||
if not is_loft:
|
||||
self.loft = {
|
||||
"suitability": False,
|
||||
"thickness": None,
|
||||
"reason": "roof not loft",
|
||||
"thickness_classification": None
|
||||
}
|
||||
return
|
||||
|
||||
# If it is a loft, we'll convert the textual thickenss to a numerical value we can easily use
|
||||
insulation_thickness = convert_thickness_to_numeric(
|
||||
string_thickness=self.roof["insulation_thickness"],
|
||||
is_pitched=self.roof["is_pitched"],
|
||||
is_flat=self.roof["is_flat"]
|
||||
)
|
||||
|
||||
if insulation_thickness <= 100:
|
||||
thickness_classification = "0-100mm"
|
||||
elif insulation_thickness <= high_loft_thickness_threshold:
|
||||
thickness_classification = "100-270mm"
|
||||
else:
|
||||
thickness_classification = "270mm+"
|
||||
|
||||
if insulation_thickness <= loft_thickness_threshold:
|
||||
# We produce a thiclkness classification for the loft
|
||||
# 0 - 100mm insulation
|
||||
# 100 - 270mm insulation
|
||||
# 270mm+ insulation
|
||||
|
||||
self.loft = {
|
||||
"suitability": True,
|
||||
"thickness": insulation_thickness,
|
||||
"reason": None,
|
||||
"thickness_classification": thickness_classification
|
||||
}
|
||||
return
|
||||
|
||||
# Insulation is already thick enough
|
||||
self.loft = {
|
||||
"suitability": False,
|
||||
"thickness": insulation_thickness,
|
||||
"reason": "existing insulation",
|
||||
"thickness_classification": thickness_classification
|
||||
}
|
||||
return
|
||||
|
||||
def cavity_insulation(self):
|
||||
|
||||
"""
|
||||
Given the description of the walls, this function determines if the property is suitable for cavity wall
|
||||
insulation
|
||||
:return:
|
||||
"""
|
||||
|
||||
is_cavity = self.walls["is_cavity_wall"]
|
||||
is_empty = (not self.walls["is_filled_cavity"])
|
||||
is_as_built = (
|
||||
self.walls["is_as_built"] and self.walls["insulation_thickness"] not in ["average", "above average"]
|
||||
and self.walls["is_assumed"]
|
||||
)
|
||||
is_partial_filled = "partial" in self.walls["clean_description"].lower()
|
||||
# We look for potentially under performing cavities - anything that is assumed, as built and insulated
|
||||
is_underperforming = (
|
||||
self.walls["is_as_built"] and self.walls["insulation_thickness"] in ["average"] and self.walls["is_assumed"]
|
||||
)
|
||||
|
||||
is_unfilled_cavity = is_cavity and (is_empty and not is_partial_filled)
|
||||
is_partial_filled_cavity = is_cavity and is_partial_filled
|
||||
is_assumed_filled_cavity = is_cavity and is_as_built
|
||||
is_underperforming_cavity = is_cavity and is_underperforming
|
||||
|
||||
# Check if it has internal or external wall insulation
|
||||
has_internal_wall_insulation = self.walls["internal_insulation"]
|
||||
has_external_wall_insulation = self.walls["external_insulation"]
|
||||
|
||||
if has_internal_wall_insulation or has_external_wall_insulation:
|
||||
self.cavity = {
|
||||
"suitability": False,
|
||||
"type": "internal or external wall insulation"
|
||||
}
|
||||
return
|
||||
|
||||
if is_unfilled_cavity:
|
||||
self.cavity = {
|
||||
"suitability": True,
|
||||
"type": "empty",
|
||||
}
|
||||
return
|
||||
|
||||
if is_assumed_filled_cavity:
|
||||
self.cavity = {
|
||||
"suitability": True,
|
||||
"type": "as built assumed",
|
||||
}
|
||||
return
|
||||
|
||||
if is_partial_filled_cavity:
|
||||
self.cavity = {
|
||||
"suitability": True,
|
||||
"type": "partial"
|
||||
}
|
||||
return
|
||||
|
||||
if is_underperforming_cavity:
|
||||
self.cavity = {
|
||||
"suitability": True,
|
||||
"type": "underperforming"
|
||||
}
|
||||
return
|
||||
|
||||
self.cavity = {
|
||||
"suitability": False,
|
||||
"type": "full"
|
||||
}
|
||||
|
||||
def solid_wall_insulation(self):
|
||||
"""
|
||||
Given the description of the walls, this function determines if the property is suitable for solid wall
|
||||
insulation
|
||||
:return:
|
||||
"""
|
||||
|
||||
is_solid = self.walls["is_solid_brick"]
|
||||
is_insulated = self.walls["insulation_thickness"] in ["average", "above average"]
|
||||
|
||||
if is_solid and is_insulated:
|
||||
self.solid_wall = {
|
||||
"suitability": True,
|
||||
}
|
||||
return
|
||||
|
||||
self.solid_wall = {
|
||||
"suitability": False,
|
||||
}
|
||||
|
||||
def room_roof_insulation(self):
|
||||
is_room_roof = self.roof["is_roof_room"]
|
||||
|
||||
if not is_room_roof:
|
||||
self.room_roof = {
|
||||
"suitability": False,
|
||||
"thickness": None
|
||||
}
|
||||
return
|
||||
|
||||
insulation_thickness = convert_thickness_to_numeric(
|
||||
self.roof["insulation_thickness"],
|
||||
self.roof["is_pitched"],
|
||||
self.roof["is_flat"]
|
||||
)
|
||||
|
||||
self.room_roof = {
|
||||
"suitability": is_room_roof and insulation_thickness == 0,
|
||||
"thickness": insulation_thickness
|
||||
}
|
||||
|
||||
def flat_roof_insulation(self):
|
||||
is_flat = self.roof["is_flat"]
|
||||
|
||||
if not is_flat:
|
||||
self.flat_roof = {
|
||||
"suitability": False,
|
||||
"thickness": None
|
||||
}
|
||||
return
|
||||
|
||||
insulation_thickness = convert_thickness_to_numeric(
|
||||
self.roof["insulation_thickness"],
|
||||
self.roof["is_pitched"],
|
||||
self.roof["is_flat"]
|
||||
)
|
||||
|
||||
self.flat_roof = {
|
||||
"suitability": is_flat and insulation_thickness <= 100,
|
||||
"thickness": insulation_thickness
|
||||
}
|
||||
|
||||
def suspended_floor_insulation(self):
|
||||
|
||||
if "no_data" in self.floor.keys():
|
||||
if self.floor["no_data"]:
|
||||
self.suspended_floor = {
|
||||
"suitability": False,
|
||||
}
|
||||
return
|
||||
|
||||
is_suspended = self.floor["is_suspended"]
|
||||
is_insulated = self.floor["insulation_thickness"] in ["average", "above average"]
|
||||
|
||||
self.suspended_floor = {
|
||||
"suitability": is_suspended and (not is_insulated),
|
||||
}
|
||||
return
|
||||
|
||||
def solid_floor_insulation(self):
|
||||
|
||||
if "no_data" in self.floor.keys():
|
||||
if self.floor["no_data"]:
|
||||
self.solid_floor = {
|
||||
"suitability": False,
|
||||
}
|
||||
return
|
||||
|
||||
is_solid = self.floor["is_solid"]
|
||||
is_insulated = self.floor["insulation_thickness"] in ["average", "above average"]
|
||||
|
||||
self.solid_floor = {
|
||||
"suitability": is_solid and (not is_insulated),
|
||||
}
|
||||
return
|
||||
|
||||
def check_gbis_warmfront(self):
|
||||
"""
|
||||
The Eligibility criteria for the Great British Insulation Scheme (GBIS) can be found here:
|
||||
https://www.ofgem.gov.uk/environmental-and-social-schemes/great-british-insulation-scheme/homeowners-and-tenants
|
||||
|
||||
At a high level, the criteria is the following:
|
||||
- The home must be within council tax bands A-D in England, A-E in Scotland, A-E in Wales
|
||||
- It must have an EPC rating of D or below
|
||||
|
||||
For the moment, we won't check whether a property is in the correct council tax band. There is likely
|
||||
to be public data for this since there is a govenment website which allows you to search for properties:
|
||||
https://www.gov.uk/council-tax-bands
|
||||
This data is possibly contained on the council tax valuation list but it remains to be see (seems unlikely)
|
||||
whether or not the data is openly accessible
|
||||
https://www.gov.uk/government/statistics/quality-assurance-of-administrative-data-in-the-uk-house-price-index
|
||||
/valuation-office-agency-council-tax-valuation-lists
|
||||
|
||||
Currently, we tailor this module to the Warmfront Team and their delivery capabilities (both practically and
|
||||
commercially). Therefore, we will check:
|
||||
1) Whether the property is an EPC D or below
|
||||
2) Whether the property is suitible for cavity wall insulation
|
||||
|
||||
However, GBIS applies to many insulation measures, which can be seen in the ofgem document
|
||||
|
||||
GBIS does not have any minimum upgrade requirement so we don't need to simulate the post retrofit sap score
|
||||
using the machine learning model
|
||||
"""
|
||||
|
||||
# Check if the property is suitable for cavity wall
|
||||
self.cavity_insulation()
|
||||
|
||||
current_sap = int(self.epc["current-energy-efficiency"])
|
||||
# We have a strict suitability check and a non-strict check
|
||||
|
||||
# Perfect strictness
|
||||
if (self.cavity["type"] == "empty") and (current_sap < 69):
|
||||
self.gbis_warmfront = {
|
||||
"eligible": True,
|
||||
"strict": True,
|
||||
"message": "Perfect suitability",
|
||||
}
|
||||
return
|
||||
|
||||
# Near perfect
|
||||
if self.cavity["suitability"] and (current_sap < 69):
|
||||
self.gbis_warmfront = {
|
||||
"eligible": True,
|
||||
"strict": True,
|
||||
"message": "Near perfect suitability",
|
||||
}
|
||||
return
|
||||
|
||||
self.gbis_warmfront = {
|
||||
"eligible": False,
|
||||
"strict": False,
|
||||
"message": "All conditions fail",
|
||||
}
|
||||
|
||||
def check_eco4_warmfront(self):
|
||||
"""
|
||||
This funciton will check if the property is eligible for funding under the ECO4 scheme
|
||||
|
||||
For the moment, this function will consider just measures that can be implemented by the
|
||||
Warmfront team, therefore we will only check if a property has an uninsulated loft AND uninsulated
|
||||
cavity
|
||||
|
||||
We use Ofgem's V1.1 ECO 4 guidance document for the conditions under which a property is elligible
|
||||
This document can be found here:
|
||||
https://www.ofgem.gov.uk/sites/default/files/2023-02/ECO4%20Delivery%20Guidance%20v1.1%20%281%29.pdf
|
||||
|
||||
The conditions (to be reviewed) to be eligible for retrofit, under ECO4, are the following:
|
||||
1) The property is a social home (This is assumed prior to this function as this code will often
|
||||
be run on property lists provided by a HA
|
||||
2) The property is an EPC E or below
|
||||
3) The property has an unfilled cavity and uninsulated loft
|
||||
4) After retrofit, the property will hit an EPC C
|
||||
|
||||
Note: This criteria will likely be adjusted depending on the properties that can be served right now
|
||||
|
||||
If the post_retrofit_sap is provided, then is this value is 69 or higher, the property will be deemed
|
||||
to be eligible for ECO4 funding. If the post_retrofit_sap is not provided, the property will be
|
||||
deemed to be eligible, conditional to the post_retrofit_sap score check
|
||||
:param post_retrofit_sap:
|
||||
:return:
|
||||
"""
|
||||
|
||||
current_sap = int(self.epc["current-energy-efficiency"])
|
||||
self.cavity_insulation()
|
||||
self.loft_insulation()
|
||||
|
||||
# We put in a placeholder when the roof is not a loft
|
||||
if self.loft["reason"] == "roof not loft":
|
||||
self.loft["thickness"] = 999
|
||||
|
||||
# Case 1: No conditions meet
|
||||
if not self.cavity["suitability"] and (self.loft["thickness"] > 100) and current_sap >= 55:
|
||||
self.eco4_warmfront = {
|
||||
"eligible": False,
|
||||
"strict": False,
|
||||
"message": "All conditions fail",
|
||||
"cavity_type": self.cavity["type"],
|
||||
"loft_type": self.loft["thickness_classification"]
|
||||
}
|
||||
return
|
||||
|
||||
# Case 2 - perfect match
|
||||
if (self.cavity["type"] == "empty") and (self.loft["thickness"] <= 100) and (current_sap < 55):
|
||||
self.eco4_warmfront = {
|
||||
"eligible": True,
|
||||
"strict": True,
|
||||
"message": "Perfect suitability",
|
||||
"cavity_type": self.cavity["type"],
|
||||
"loft_type": self.loft["thickness_classification"]
|
||||
}
|
||||
return
|
||||
|
||||
# Case 2.5 - near perfect match - but we would not recommend this using the model
|
||||
if self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap < 55):
|
||||
self.eco4_warmfront = {
|
||||
"eligible": True,
|
||||
"strict": True,
|
||||
"message": "Near perfect suitability",
|
||||
"cavity_type": self.cavity["type"],
|
||||
"loft_type": self.loft["thickness_classification"]
|
||||
}
|
||||
return
|
||||
|
||||
# Case 3 - cavity is suitable, loft is within 150mm, sap is good
|
||||
if self.cavity["suitability"] and (self.loft["thickness"] <= 150) and (current_sap < 55):
|
||||
self.eco4_warmfront = {
|
||||
"eligible": True,
|
||||
"strict": False,
|
||||
"message": "Meets cavity, loft borderline, meets sap",
|
||||
"cavity_type": self.cavity["type"],
|
||||
"loft_type": self.loft["thickness_classification"]
|
||||
}
|
||||
return
|
||||
|
||||
# Case 3 - cavity is suitable, loft is not, sap is good
|
||||
if self.cavity["suitability"] and (self.loft["thickness"] > 150) and (current_sap < 55):
|
||||
self.eco4_warmfront = {
|
||||
"eligible": True,
|
||||
"strict": False,
|
||||
"message": "Meets cavity and sap",
|
||||
"cavity_type": self.cavity["type"],
|
||||
"loft_type": self.loft["thickness_classification"]
|
||||
}
|
||||
return
|
||||
|
||||
# Case 4 - cavity is not suitable, loft is, sap is not - we say this is not elifible
|
||||
if not self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap < 55):
|
||||
self.eco4_warmfront = {
|
||||
"eligible": False,
|
||||
"strict": False,
|
||||
"message": "failed fabric check",
|
||||
"cavity_type": self.cavity["type"],
|
||||
"loft_type": self.loft["thickness_classification"]
|
||||
}
|
||||
return
|
||||
|
||||
# Case 5 - cavity and loft suitable, sap too high
|
||||
if self.cavity["suitability"] and (self.loft["thickness"] <= 150) and (current_sap >= 55):
|
||||
self.eco4_warmfront = {
|
||||
"eligible": True,
|
||||
"strict": False,
|
||||
"message": "Meets fabric, fails SAP check",
|
||||
"cavity_type": self.cavity["type"],
|
||||
"loft_type": self.loft["thickness_classification"]
|
||||
}
|
||||
return
|
||||
|
||||
# Case 6 - meets just cavity
|
||||
if self.cavity["suitability"] and (self.loft["thickness"] > 100) and (current_sap >= 55):
|
||||
self.eco4_warmfront = {
|
||||
"eligible": True,
|
||||
"strict": False,
|
||||
"message": "Meets just cavity",
|
||||
"cavity_type": self.cavity["type"],
|
||||
"loft_type": self.loft["thickness_classification"]
|
||||
}
|
||||
return
|
||||
|
||||
# Case 7 - fails cavity, loft but meets sap
|
||||
if not self.cavity["suitability"] and (self.loft["thickness"] > 100) and (current_sap < 55):
|
||||
self.eco4_warmfront = {
|
||||
"eligible": False,
|
||||
"strict": False,
|
||||
"message": "Fails cavity and loft, meets SAP",
|
||||
"cavity_type": self.cavity["type"],
|
||||
"loft_type": self.loft["thickness_classification"]
|
||||
}
|
||||
return
|
||||
|
||||
# Case 8 - fails cavity, meets loft, fails sap
|
||||
if not self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap >= 55):
|
||||
self.eco4_warmfront = {
|
||||
"eligible": False,
|
||||
"strict": False,
|
||||
"message": "Fails cavity, meets loft, fails SAP",
|
||||
"cavity_type": self.cavity["type"],
|
||||
"loft_type": self.loft["thickness_classification"]
|
||||
}
|
||||
return
|
||||
|
||||
raise ValueError("Implement me")
|
||||
|
||||
def check_gbis(self):
|
||||
|
||||
"""
|
||||
The Eligibility criteria for the Great British Insulation Scheme (GBIS) can be found here:
|
||||
https://www.ofgem.gov.uk/environmental-and-social-schemes/great-british-insulation-scheme/homeowners-and-tenants
|
||||
|
||||
Full delivery guidance and be downloaded here:
|
||||
https://www.ofgem.gov.uk/sites/default/files/2023-08/Great%20British%20Insulation%20Scheme%20Delivery
|
||||
%20Guidance%20V101693416860968.pdf
|
||||
|
||||
For social housing, the criteria is the following:
|
||||
|
||||
If the property is currently an EPC D:
|
||||
- It's valid for innovation measures only but not a heating control measure
|
||||
- The property must be rented at below the market rate. All eligible social housing is treated based on the
|
||||
low income group, therefore the tennant must be in receipt of one the eligible benefits
|
||||
|
||||
If the property is currently an EPC E or below:
|
||||
- It's valid for all eligible insulation measures
|
||||
- The property must be rented at below the market rate. All eligible social housing is treated based on the
|
||||
low income group, therefore the tennant must be in receipt of one the eligible benefits
|
||||
|
||||
From GBIS guidance document:
|
||||
Determining whether the premises are let below market rate
|
||||
|
||||
3.101 Social housing under this provision will only be eligible where the housing is let below
|
||||
the market rate. The supplier must produce a declaration signed by a social landlord
|
||||
providing confirmation that the social housing premises are let below the market rate,
|
||||
or where the premises are currently void, have previously and will be let below the
|
||||
market rate. The declaration to be signed by a social landlord is included within the
|
||||
Eligibility and Pre-Retrofit Declaration form. This declaration form must be retained by
|
||||
suppliers and be available on request for audit purposes.
|
||||
|
||||
3.102 Where social housing is let at or above the market rate, the property can be treated as
|
||||
a private domestic premises, where the occupant meets the eligibility requirements.
|
||||
See section on PRS from paragraph 1.13 for more information.
|
||||
|
||||
This method searches ALL of the possible measures that can be implemented under GBIS. This includes:
|
||||
- cavity wall (including party wall)
|
||||
- loft
|
||||
- solid wall
|
||||
- pitched roof
|
||||
- flat roof
|
||||
- under-floor
|
||||
- solid floor
|
||||
- park home
|
||||
- room-in-roof
|
||||
|
||||
:return:
|
||||
"""
|
||||
|
||||
self.cavity_insulation()
|
||||
self.loft_insulation()
|
||||
self.solid_wall_insulation()
|
||||
self.room_roof_insulation()
|
||||
self.flat_roof_insulation()
|
||||
self.suspended_floor_insulation()
|
||||
self.solid_floor_insulation()
|
||||
|
||||
current_sap = int(self.epc["current-energy-efficiency"])
|
||||
is_below_e = current_sap <= 54
|
||||
is_below_c = current_sap <= 68
|
||||
|
||||
needs_measure = (
|
||||
self.cavity["suitability"] or
|
||||
self.loft["suitability"] or
|
||||
self.solid_wall["suitability"] or
|
||||
self.room_roof["suitability"] or
|
||||
self.flat_roof["suitability"] or
|
||||
self.suspended_floor["suitability"] or
|
||||
self.solid_floor["suitability"]
|
||||
)
|
||||
|
||||
if self.tenure == "Rented (social)":
|
||||
|
||||
if is_below_c and (not is_below_e):
|
||||
# this is a placeholder methodology
|
||||
self.gbis = {
|
||||
"eligible": int(self.epc["potential-energy-efficiency"]) > 68,
|
||||
"message": "contingent on innovation measure delivery"
|
||||
}
|
||||
return
|
||||
elif is_below_e:
|
||||
self.gbis = {
|
||||
"eligible": needs_measure,
|
||||
"message": "eligible under fabric measure"
|
||||
}
|
||||
return
|
||||
else:
|
||||
self.gbis = {
|
||||
"eligible": False,
|
||||
"message": "not eligible"
|
||||
}
|
||||
return
|
||||
|
||||
elif self.tenure == "Rented (private)":
|
||||
self.gbis = {
|
||||
"eligible": is_below_c and needs_measure,
|
||||
"message": "eligible under fabric measure"
|
||||
}
|
||||
return
|
||||
elif self.tenure == "Owner-occupied":
|
||||
self.gbis = {
|
||||
"eligible": False,
|
||||
"message": "Out-of-scope"
|
||||
}
|
||||
return
|
||||
|
||||
elif (self.tenure is None) or self.tenure == "unknown":
|
||||
self.gbis = {
|
||||
"eligible": needs_measure,
|
||||
"message": "unknown tenure"
|
||||
}
|
||||
return
|
||||
else:
|
||||
raise ValueError("Implement me other tenure types")
|
||||
|
||||
def check_eco4(self):
|
||||
"""
|
||||
Because ECO4 supports nearly all measures. If we have commercial agreements in place then a large number
|
||||
of homes would be eligible for eco funding, if identified.
|
||||
|
||||
These are the eligibility criteria we consider for this process:
|
||||
Privately rented, Help to heat group
|
||||
- Sap E-G
|
||||
- Must receive one of solid wall insulation, first time central heating or district heating control
|
||||
- The property must already have cavity walls and roof insulated
|
||||
|
||||
Social Housing, SAP D
|
||||
- Innovation measures and insulation measures to meet the minimum insulation requirement
|
||||
- Improvement to at least band C
|
||||
- Fabric measures
|
||||
- If receiving any heating measures, must have at least one insulation measure first
|
||||
|
||||
Social Housing, SAP E-G
|
||||
- Insulation measures, first time central heating, renewable heating, district heating connection,
|
||||
innovation measures
|
||||
- Improvement to D (F & G properties) or C (E properties)
|
||||
- If receiving any heating measure, must already have cavity and roof insulation
|
||||
|
||||
Privately rented, ECO4 Flex route 1, 2, 3, 4
|
||||
- Must have SAP E-G
|
||||
- Most measures eligible, but must receive one of solid wall insulation, first time central heating,
|
||||
renewable heating and district heating control
|
||||
- Improvement to D (F & G properties) or C (E properties)
|
||||
- All homes receiving heating measures must first have insulated cavity/roof
|
||||
|
||||
|
||||
The flex routes are given here:
|
||||
https://so-eco.co.uk/what-is-eco4-flex/#:~:text=One%20way%20to%20gain%20ECO4,
|
||||
including%20elderly%20residents%20and%20lodgers.
|
||||
|
||||
:return:
|
||||
"""
|
||||
|
||||
self.cavity_insulation()
|
||||
self.loft_insulation()
|
||||
self.solid_wall_insulation()
|
||||
self.room_roof_insulation()
|
||||
self.flat_roof_insulation()
|
||||
self.suspended_floor_insulation()
|
||||
self.solid_floor_insulation()
|
||||
|
||||
current_sap = int(self.epc["current-energy-efficiency"])
|
||||
is_below_e = current_sap <= 54
|
||||
is_below_c = current_sap <= 68
|
||||
sap_potential = int(self.epc["potential-energy-efficiency"])
|
||||
|
||||
first_time_central_heating = "boiler" not in self.epc["mainheat-description"].lower()
|
||||
|
||||
needs_fabric_measure = (
|
||||
self.cavity["suitability"] or
|
||||
self.loft["suitability"] or
|
||||
self.solid_wall["suitability"] or
|
||||
self.room_roof["suitability"] or
|
||||
self.flat_roof["suitability"] or
|
||||
self.suspended_floor["suitability"] or
|
||||
self.solid_floor["suitability"]
|
||||
)
|
||||
|
||||
if current_sap <= 38 and sap_potential >= 55:
|
||||
# sap needs to get to at least a D
|
||||
expected_to_meet_upgrades = True
|
||||
elif current_sap <= 68 and sap_potential >= 69:
|
||||
# sap needs to get to at least a C
|
||||
expected_to_meet_upgrades = True
|
||||
else:
|
||||
expected_to_meet_upgrades = False
|
||||
|
||||
if self.tenure == "Rented (social)":
|
||||
if is_below_c and (not is_below_e) and expected_to_meet_upgrades:
|
||||
# If the property is a D, then it's eligible under innovation measures but requires improvement to a
|
||||
# band C
|
||||
self.eco4 = {
|
||||
"eligible": True,
|
||||
"message": "eligible under innovation measure and improvement to band C"
|
||||
}
|
||||
elif is_below_e and expected_to_meet_upgrades:
|
||||
# If the property is an E or below, then it's eligible under fabric measures or heating/innovation
|
||||
# measures
|
||||
|
||||
message = "eligible under fabric measures, with sufficient post retrofit sap improvement" if (
|
||||
needs_fabric_measure) else (
|
||||
"eligible under heating and innovation measures, with sufficient post retrofit sap improvement"
|
||||
)
|
||||
|
||||
self.eco4 = {"eligible": True, "message": message}
|
||||
else:
|
||||
if (current_sap <= 68) and expected_to_meet_upgrades:
|
||||
raise ValueError("something is wrong")
|
||||
self.eco4 = {
|
||||
"eligible": False,
|
||||
"message": "not eligible, above EPC C"
|
||||
}
|
||||
|
||||
return
|
||||
|
||||
if self.tenure == 'Rented (private)':
|
||||
# For private homes, the property needs to be an E or below
|
||||
|
||||
# For private homes, the cavity must be filled and the roof insulated
|
||||
cavity_filled = not self.cavity["suitability"]
|
||||
roof_insulated = (not self.loft["suitability"]) and (not self.room_roof["suitability"]) and (
|
||||
not self.flat_roof["suitability"])
|
||||
|
||||
if is_below_e and cavity_filled and roof_insulated and expected_to_meet_upgrades:
|
||||
|
||||
if self.solid_wall["suitability"]:
|
||||
self.eco4 = {
|
||||
"eligible": True,
|
||||
"message": "eligible under solid wall insulation, conditional on post retrofit sap and help "
|
||||
"to heat/ECO flex route"
|
||||
}
|
||||
elif first_time_central_heating:
|
||||
|
||||
self.eco4 = {
|
||||
"eligible": True,
|
||||
"message": "eligible under first time central heating, conditional on post retrofit sap and "
|
||||
"help to heat/ECO flex route"
|
||||
}
|
||||
else:
|
||||
self.eco4 = {
|
||||
"eligible": False,
|
||||
"message": "not eligible at this time"
|
||||
}
|
||||
|
||||
return
|
||||
|
||||
else:
|
||||
self.eco4 = {
|
||||
"eligible": False,
|
||||
"message": "not eligible at this time, EPC too high"
|
||||
}
|
||||
|
||||
self.eco4 = {
|
||||
"eligible": False,
|
||||
"message": "Out of scope"
|
||||
}
|
||||
6
etl/eligibility/README.md
Normal file
6
etl/eligibility/README.md
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
# Eligiblity
|
||||
|
||||
This codebase is responsible for determining if properties look like they would be
|
||||
eligible for retrofit funding schemes. In order to do this, we use our SAP ML model, to score
|
||||
what the property would look like after a retrofit. We then compare this to the eligibility
|
||||
criteria of various schemes, to determing if the property looks likely to be eligible for funding.
|
||||
0
etl/eligibility/__init__.py
Normal file
0
etl/eligibility/__init__.py
Normal file
664
etl/eligibility/ha_15_32/HA 15 Identified addresses.csv
Normal file
664
etl/eligibility/ha_15_32/HA 15 Identified addresses.csv
Normal file
|
|
@ -0,0 +1,664 @@
|
|||
Housing Association,No.,Address,Postcode
|
||||
HA15,2,2 Lander Road,HP19 9TT
|
||||
HA15,4,4 Lander Road,HP19 9TT
|
||||
HA15,5,5 Lander Road,HP19 9TT
|
||||
HA15,12,12 Lander Road,HP19 9TT
|
||||
HA15,14,14 Lander Road,HP19 9TT
|
||||
HA15,18,18 Lander Road,HP19 9TT
|
||||
HA15,22,22 Lander Road,HP19 9TT
|
||||
HA15,1,1 Eeles Close,HP19 9TU
|
||||
HA15,2,2 Eeles Close,HP19 9TU
|
||||
HA15,3,3 Eeles Close,HP19 9TU
|
||||
HA15,12,12 Eeles Close,HP19 9TU
|
||||
HA15,15,15 Eeles Close,HP19 9TU
|
||||
HA15,2,2 Dicks Way,HP19 9UA
|
||||
HA15,4,4 Dicks Way,HP19 9UA
|
||||
HA15,5,5 Dicks Way,HP19 9UA
|
||||
HA15,6,6 Dicks Way,HP19 9UA
|
||||
HA15,8,8 Dicks Way,HP19 9UA
|
||||
HA15,9,9 Dicks Way,HP19 9UA
|
||||
HA15,14,14 Dicks Way,HP19 9UA
|
||||
HA15,15,15 Dicks Way,HP19 9UA
|
||||
HA15,17,17 Dicks Way,HP19 9UA
|
||||
HA15,20,20 Dicks Way,HP19 9UA
|
||||
HA15,26,26 Dicks Way,HP19 9UA
|
||||
HA15,28,28 Dicks Way,HP19 9UA
|
||||
HA15,4,4 Fletcher Close,HP19 9UB
|
||||
HA15,5,5 Fletcher Close,HP19 9UB
|
||||
HA15,24,24 Fletcher Close,HP19 9UB
|
||||
HA15,25,25 Fletcher Close,HP19 9UB
|
||||
HA15,27,27 Fletcher Close,HP19 9UB
|
||||
HA15,28,28 Fletcher Close,HP19 9UB
|
||||
HA15,29,29 Fletcher Close,HP19 9UB
|
||||
HA15,31,31 Fletcher Close,HP19 9UB
|
||||
HA15,32,32 Fletcher Close,HP19 9UB
|
||||
HA15,33,33 Fletcher Close,HP19 9UB
|
||||
HA15,34,"34 Fletcher Close,Aylesbury,Bucks",HP19 9UB
|
||||
HA15,1,1 Grimmer Close,HP19 9UD
|
||||
HA15,11,11 Grimmer Close,HP19 9UD
|
||||
HA15,14,14 Grimmer Close,HP19 9UD
|
||||
HA15,15,15 Grimmer Close,HP19 9UD
|
||||
HA15,17,17 Grimmer Close,HP19 9UD
|
||||
HA15,18,18 Grimmer Close,HP19 9UD
|
||||
HA15,21,21 Grimmer Close,HP19 9UD
|
||||
HA15,23,23 Grimmer Close,HP19 9UD
|
||||
HA15,24,24 Grimmer Close,HP19 9UD
|
||||
HA15,28,28 Grimmer Close,HP19 9UD
|
||||
HA15,30,30 Grimmer Close,HP19 9UD
|
||||
HA15,1,1 Vincent Road,HP19 9UN
|
||||
HA15,6,6 Vincent Road,HP19 9UN
|
||||
HA15,10,10 Vincent Road,HP19 9UN
|
||||
HA15,12,12 Vincent Road,HP19 9UN
|
||||
HA15,13,13 Vincent Road,HP19 9UN
|
||||
HA15,16,16 Vincent Road,HP19 9UN
|
||||
HA15,21,21 Vincent Road,HP19 9UN
|
||||
HA15,24,24 Vincent Road,HP19 9UN
|
||||
HA15,26,26 Vincent Road,HP19 9UN
|
||||
HA15,27,27 Vincent Road,HP19 9UN
|
||||
HA15,32,32 Vincent Road,HP19 9UN
|
||||
HA15,1,1 Reading Close,HP19 9UW
|
||||
HA15,2,2 Reading Close,HP19 9UW
|
||||
HA15,3,3 Reading Close,HP19 9UW
|
||||
HA15,4,4 Reading Close,HP19 9UW
|
||||
HA15,5,5 Reading Close,HP19 9UW
|
||||
HA15,6,6 Reading Close,HP19 9UW
|
||||
HA15,7,7 Reading Close,HP19 9UW
|
||||
HA15,9,9 Reading Close,HP19 9UW
|
||||
HA15,10,10 Reading Close,HP19 9UW
|
||||
HA15,6,6 Mary Mac Manus Drive,MK18 1UN
|
||||
HA15,8,8 Mary Mac Manus Drive,MK18 1UN
|
||||
HA15,10,10 Mary Mac Manus Drive,MK18 1UN
|
||||
HA15,2,"2 Rosebery Road Aston Clinton, Aylesbury",HP22 5JY
|
||||
HA15,7,"7 Rosebery Road Aston Clinton, Aylesbury",HP22 5JY
|
||||
HA15,9,"9 Rosebery Road Aston Clinton, Aylesbury",HP22 5JY
|
||||
HA15,11,"11 Rosebery Road Aston Clinton, Aylesbury",HP22 5JY
|
||||
HA15,12,"12 Rosebery Road Aston Clinton, Aylesbury",HP22 5JY
|
||||
HA15,16,"16 Rosebery Road Aston Clinton, Aylesbury",HP22 5JY
|
||||
HA15,17,"17 Rosebery Road Aston Clinton, Aylesbury",HP22 5JY
|
||||
HA15,26,"26 Rosebery Road Aston Clinton, Aylesbury",HP22 5JY
|
||||
HA15,38,"38 Rosebery Road Aston Clinton, Aylesbury",HP22 5JY
|
||||
HA15,41,"41 Rosebery Road Aston Clinton, Aylesbury",HP22 5JY
|
||||
HA15,25,"25 New Road Weston Turville, Aylesbury",HP22 5RA
|
||||
HA15,27,"27 New Road Weston Turville, Aylesbury",HP22 5RA
|
||||
HA15,29,"29 New Road Weston Turville, Aylesbury",HP22 5RA
|
||||
HA15,31,"31 New Road Weston Turville, Aylesbury",HP22 5RA
|
||||
HA15,37,"37 New Road Weston Turville, Aylesbury",HP22 5RA
|
||||
HA15,39,"39 New Road Weston Turville, Aylesbury",HP22 5RA
|
||||
HA15,5,"5 Walton Place Weston Turville, Aylesbury",HP22 5RB
|
||||
HA15,9,"9 Walton Place Weston Turville, Aylesbury",HP22 5RB
|
||||
HA15,18,"18 Walton Place Weston Turville, Aylesbury",HP22 5RB
|
||||
HA15,21,"21 Walton Place Weston Turville, Aylesbury",HP22 5RD
|
||||
HA15,36,"36 Walton Place Weston Turville, Aylesbury",HP22 5RD
|
||||
HA15,42,"42 Walton Place Weston Turville, Aylesbury",HP22 5RD
|
||||
HA15,46,"46 Walton Place Weston Turville, Aylesbury",HP22 5RD
|
||||
HA15,76,"76 Worlds End Lane Weston Turville, Aylesbury",HP22 5RX
|
||||
HA15,78,"78 Worlds End Lane Weston Turville, Aylesbury",HP22 5RX
|
||||
HA15,82,"82 Worlds End Lane Weston Turville, Aylesbury",HP22 5RX
|
||||
HA15,84,"84 Worlds End Lane Weston Turville, Aylesbury",HP22 5RX
|
||||
HA15,86,"86 Worlds End Lane Weston Turville, Aylesbury",HP22 5RX
|
||||
HA15,88,"88 Worlds End Lane Weston Turville, Aylesbury",HP22 5RX
|
||||
HA15,64,"64 Halton Lane Wendover, Aylesbury",HP22 6AZ
|
||||
HA15,66,"66 Halton Lane Wendover, Aylesbury",HP22 6AZ
|
||||
HA15,68,"68 Halton Lane Wendover, Aylesbury",HP22 6AZ
|
||||
HA15,70,"70 Halton Lane Wendover, Aylesbury",HP22 6AZ
|
||||
HA15,8,"8 South Street Wendover, Aylesbury",HP22 6EF
|
||||
HA15,2,"2 Barlow Road Wendover, Aylesbury",HP22 6HP
|
||||
HA15,4,"4 Barlow Road Wendover, Aylesbury",HP22 6HP
|
||||
HA15,14,"14 Barlow Road Wendover, Aylesbury",HP22 6HP
|
||||
HA15,15,"15 Barlow Road Wendover, Aylesbury",HP22 6HP
|
||||
HA15,16,"16 Barlow Road Wendover, Aylesbury",HP22 6HP
|
||||
HA15,28,"28 Barlow Road Wendover, Aylesbury",HP22 6HP
|
||||
HA15,1,"1 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
|
||||
HA15,5,"5 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
|
||||
HA15,7,"7 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
|
||||
HA15,8,"8 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
|
||||
HA15,9,"9 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
|
||||
HA15,13,"13 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
|
||||
HA15,16,"16 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
|
||||
HA15,20,"20 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
|
||||
HA15,24,"24 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
|
||||
HA15,26,"26 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
|
||||
HA15,28,"28 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
|
||||
HA15,38,"38 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
|
||||
HA15,44,"44 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
|
||||
HA15,50,"50 Woollerton Crescent Wendover, Aylesbury",HP22 6HT
|
||||
HA15,15,"15 Hampden Road Wendover, Aylesbury",HP22 6HU
|
||||
HA15,18,"18 Hampden Road Wendover, Aylesbury",HP22 6HU
|
||||
HA15,22,"22 Hampden Road Wendover, Aylesbury",HP22 6HU
|
||||
HA15,26,"26 Hampden Road Wendover, Aylesbury",HP22 6HU
|
||||
HA15,28,"28 Hampden Road Wendover, Aylesbury",HP22 6HU
|
||||
HA15,25,"25 Hampden Road Wendover, Aylesbury",HP22 6HX
|
||||
HA15,27,"27 Hampden Road Wendover, Aylesbury",HP22 6HX
|
||||
HA15,31,"31 Hampden Road Wendover, Aylesbury",HP22 6HX
|
||||
HA15,34,"34 Hampden Road Wendover, Aylesbury",HP22 6HX
|
||||
HA15,36,"36 Hampden Road Wendover, Aylesbury",HP22 6HX
|
||||
HA15,38,"38 Hampden Road Wendover, Aylesbury",HP22 6HX
|
||||
HA15,5,"5 Gainsborough Road, Aylesbury",HP21 9AZ
|
||||
HA15,1,"1 Dart Close, Aylesbury",HP21 9NP
|
||||
HA15,1,"1 Wingrave Road Aston Abbotts, Aylesbury",HP22 4LT
|
||||
HA15,3,"3 Wingrave Road Aston Abbotts, Aylesbury",HP22 4LT
|
||||
HA15,5,"5 Wingrave Road Aston Abbotts, Aylesbury",HP22 4LT
|
||||
HA15,82,"82 Winslow Road Wingrave, Aylesbury",HP22 4QB
|
||||
HA15,84,"84 Winslow Road Wingrave, Aylesbury",HP22 4QB
|
||||
HA15,106,"106 Winslow Road Wingrave, Aylesbury",HP22 4QB
|
||||
HA15,125,"125 Winslow Road Wingrave, Aylesbury",HP22 4QB
|
||||
HA15,19,"19 Abbotts Way Wingrave, Aylesbury",HP22 4QF
|
||||
HA15,37,"37 Abbotts Way Wingrave, Aylesbury",HP22 4QF
|
||||
HA15,41,"41 Abbotts Way Wingrave, Aylesbury",HP22 4QF
|
||||
HA15,43,"43 Abbotts Way Wingrave, Aylesbury",HP22 4QF
|
||||
HA15,2,"2 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
|
||||
HA15,5,"5 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
|
||||
HA15,10,"10 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
|
||||
HA15,12,"12 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
|
||||
HA15,19,"19 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
|
||||
HA15,21,"21 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
|
||||
HA15,22,"22 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
|
||||
HA15,31,"31 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
|
||||
HA15,32,"32 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
|
||||
HA15,33,"33 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
|
||||
HA15,34,"34 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
|
||||
HA15,35,"35 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
|
||||
HA15,37,"37 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
|
||||
HA15,38,"38 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
|
||||
HA15,40,"40 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
|
||||
HA15,42,"42 Chiltern Road Wingrave, Aylesbury",HP22 4QQ
|
||||
HA15,23,"23 Great Lane Bierton, Aylesbury",HP22 5DE
|
||||
HA15,25,"25 Great Lane Bierton, Aylesbury",HP22 5DE
|
||||
HA15,35,"35 Great Lane Bierton, Aylesbury",HP22 5DE
|
||||
HA15,37,"37 Great Lane Bierton, Aylesbury",HP22 5DE
|
||||
HA15,61,"61 Weston Road Aston Clinton, Aylesbury",HP22 5EJ
|
||||
HA15,65,"65 Weston Road Aston Clinton, Aylesbury",HP22 5EJ
|
||||
HA15,67,"67 Weston Road Aston Clinton, Aylesbury",HP22 5EJ
|
||||
HA15,69,"69 Weston Road Aston Clinton, Aylesbury",HP22 5EJ
|
||||
HA15,28,"28a Tring Road Wendover, Aylesbury",HP22 6NT
|
||||
HA15,38,"38a Tring Road Wendover, Aylesbury",HP22 6NT
|
||||
HA15,14,"14 Tring Road Wendover, Aylesbury",HP22 6NT
|
||||
HA15,34,"34 Tring Road Wendover, Aylesbury",HP22 6NT
|
||||
HA15,36,"36 Tring Road Wendover, Aylesbury",HP22 6NT
|
||||
HA15,64,"64 Tring Road Wendover, Aylesbury",HP22 6NX
|
||||
HA15,68,"68 Tring Road Wendover, Aylesbury",HP22 6NX
|
||||
HA15,70,"70 Tring Road Wendover, Aylesbury",HP22 6NX
|
||||
HA15,74,"74 Tring Road Wendover, Aylesbury",HP22 6NX
|
||||
HA15,76,"76 Tring Road Wendover, Aylesbury",HP22 6NX
|
||||
HA15,78,"78 Tring Road Wendover, Aylesbury",HP22 6NX
|
||||
HA15,80,"80 Tring Road Wendover, Aylesbury",HP22 6NX
|
||||
HA15,90,"90 Tring Road Wendover, Aylesbury",HP22 6NX
|
||||
HA15,92,"92 Tring Road Wendover, Aylesbury",HP22 6NX
|
||||
HA15,100,"100 Tring Road Wendover, Aylesbury",HP22 6NX
|
||||
HA15,104,"104 Tring Road Wendover, Aylesbury",HP22 6NX
|
||||
HA15,106,"106 Tring Road Wendover, Aylesbury",HP22 6NX
|
||||
HA15,108,"108 Tring Road Wendover, Aylesbury",HP22 6NX
|
||||
HA15,114,"114 Tring Road Wendover, Aylesbury",HP22 6NX
|
||||
HA15,38,"38 The Beeches Wendover, Aylesbury",HP22 6PB
|
||||
HA15,49,"49 The Beeches Wendover, Aylesbury",HP22 6PB
|
||||
HA15,54,"54 The Beeches Wendover, Aylesbury",HP22 6PB
|
||||
HA15,64,"64 The Beeches Wendover, Aylesbury",HP22 6PB
|
||||
HA15,1,"1 Church End Edlesborough, Dunstable",LU6 2EP
|
||||
HA15,2,"2 Church End Edlesborough, Dunstable",LU6 2EP
|
||||
HA15,5,"5 Church End Edlesborough, Dunstable",LU6 2EP
|
||||
HA15,6,"6 Church End Edlesborough, Dunstable",LU6 2EP
|
||||
HA15,7,"7 Church End Edlesborough, Dunstable",LU6 2EP
|
||||
HA15,9,"9 Church End Edlesborough, Dunstable",LU6 2EP
|
||||
HA15,125,"125 High Street Edlesborough, Dunstable",LU6 2ER
|
||||
HA15,6,"6 Dove Street Stewkley, Leighton Buzzard",LU7 0HT
|
||||
HA15,14,"14 Wantage Crescent Wing, Leighton Buzzard",LU7 0NH
|
||||
HA15,32,"32 Wantage Crescent Wing, Leighton Buzzard",LU7 0NH
|
||||
HA15,38,"38a Wantage Crescent Wing, Leighton Buzzard",LU7 0NH
|
||||
HA15,38,"38b Wantage Crescent Wing, Leighton Buzzard",LU7 0NH
|
||||
HA15,75,"75 High Street Cheddington, Leighton Buzzard",LU7 0RG
|
||||
HA15,12,"12 New Street Cheddington, Leighton Buzzard",LU7 0RL
|
||||
HA15,14,"14 New Street Cheddington, Leighton Buzzard",LU7 0RL
|
||||
HA15,16,"16 New Street Cheddington, Leighton Buzzard",LU7 0RL
|
||||
HA15,2,"2 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
|
||||
HA15,4,"4 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
|
||||
HA15,10,"10 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
|
||||
HA15,11,"11 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
|
||||
HA15,17,"17 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
|
||||
HA15,19,"19 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
|
||||
HA15,20,"20 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
|
||||
HA15,23,"23 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
|
||||
HA15,25,"25 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
|
||||
HA15,26,"26 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
|
||||
HA15,28,"28 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
|
||||
HA15,31,"31 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
|
||||
HA15,33,"33 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
|
||||
HA15,36,"36 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
|
||||
HA15,40,"40 Sunnybank Cheddington, Leighton Buzzard",LU7 0RN
|
||||
HA15,4,"4 Barkham Close Cheddington, Leighton Buzzard",LU7 0RT
|
||||
HA15,4,"4 Manor Road Cheddington, Leighton Buzzard",LU7 0RW
|
||||
HA15,7,"7 Manor Road Cheddington, Leighton Buzzard",LU7 0RW
|
||||
HA15,8,"8 Manor Road Cheddington, Leighton Buzzard",LU7 0RW
|
||||
HA15,10,"10 Manor Road Cheddington, Leighton Buzzard",LU7 0RW
|
||||
HA15,11,"11 Manor Road Cheddington, Leighton Buzzard",LU7 0RW
|
||||
HA15,61,"61 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
|
||||
HA15,69,"69 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
|
||||
HA15,71,"71 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
|
||||
HA15,75,"75 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
|
||||
HA15,85,"85 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
|
||||
HA15,87,"87 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
|
||||
HA15,89,"89 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
|
||||
HA15,95,"95 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
|
||||
HA15,101,"101 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
|
||||
HA15,103,"103 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
|
||||
HA15,125,"125 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
|
||||
HA15,129,"129 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
|
||||
HA15,133,"133 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
|
||||
HA15,141,"141 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
|
||||
HA15,151,"151 Yardley Avenue Pitstone, Leighton Buzzard",LU7 9BD
|
||||
HA15,48,"48 Station Road Ivinghoe, Leighton Buzzard",LU7 9EB
|
||||
HA15,52,"52 Station Road Ivinghoe, Leighton Buzzard",LU7 9EB
|
||||
HA15,54,"54 Station Road Ivinghoe, Leighton Buzzard",LU7 9EB
|
||||
HA15,58,"58 Station Road Ivinghoe, Leighton Buzzard",LU7 9EB
|
||||
HA15,1,"1 Maud Janes Close Ivinghoe, Leighton Buzzard",LU7 9ED
|
||||
HA15,3,"3 Maud Janes Close Ivinghoe, Leighton Buzzard",LU7 9ED
|
||||
HA15,12,"12 Maud Janes Close Ivinghoe, Leighton Buzzard",LU7 9ED
|
||||
HA15,26,"26 Ladysmith Road Ivinghoe, Leighton Buzzard",LU7 9EE
|
||||
HA15,24,"24 High Street Ivinghoe, Leighton Buzzard",LU7 9EX
|
||||
HA15,26,"26 High Street Ivinghoe, Leighton Buzzard",LU7 9EX
|
||||
HA15,28,"28 High Street Ivinghoe, Leighton Buzzard",LU7 9EX
|
||||
HA15,30,"30 High Street Ivinghoe, Leighton Buzzard",LU7 9EX
|
||||
HA15,32,"32 High Street Ivinghoe, Leighton Buzzard",LU7 9EX
|
||||
HA15,3,"3 Stonebridge Road, Aylesbury",HP19 9LX
|
||||
HA15,102,"102 Coventon Road, Aylesbury",HP19 9ND
|
||||
HA15,83,"83 Priory Crescent, Aylesbury",HP19 9NY
|
||||
HA15,103,"103 Priory Crescent, Aylesbury",HP19 9NY
|
||||
HA15,83,"83 Weedon Road, Aylesbury",HP19 9PA
|
||||
HA15,7,"7 Haines Close, Aylesbury",HP19 9TS
|
||||
HA15,8,"8 Haines Close, Aylesbury",HP19 9TS
|
||||
HA15,9,"9 Haines Close, Aylesbury",HP19 9TS
|
||||
HA15,13,"13 Haines Close, Aylesbury",HP19 9TS
|
||||
HA15,22,"22 Haines Close, Aylesbury",HP19 9TS
|
||||
HA15,39,"39 Haines Close, Aylesbury",HP19 9TS
|
||||
HA15,45,"45 Haines Close, Aylesbury",HP19 9TS
|
||||
HA15,27,"27 Oakfield Road, Aylesbury",HP20 1LH
|
||||
HA15,11,"11 Wingate Walk, Aylesbury",HP20 1LN
|
||||
HA15,9,"9 Stanhope Road, Aylesbury",HP20 1LP
|
||||
HA15,28,"28 Stanhope Road, Aylesbury",HP20 1LR
|
||||
HA15,12,"12 Cleveland Road, Aylesbury",HP20 2AZ
|
||||
HA15,20,"20 Cleveland Road, Aylesbury",HP20 2AZ
|
||||
HA15,22,"22 Cleveland Road, Aylesbury",HP20 2AZ
|
||||
HA15,7,"7 Bryanston Avenue, Aylesbury",HP20 2BA
|
||||
HA15,17,"17 Bryanston Avenue, Aylesbury",HP20 2BA
|
||||
HA15,36,"36 Bryanston Avenue, Aylesbury",HP20 2BA
|
||||
HA15,38,"38 Bryanston Avenue, Aylesbury",HP20 2BA
|
||||
HA15,6,"6 Matlock Road, Aylesbury",HP20 2BE
|
||||
HA15,9,"9 Lisburn Path, Aylesbury",HP20 2BQ
|
||||
HA15,15,"15 Lisburn Path, Aylesbury",HP20 2BQ
|
||||
HA15,3,"3 Lansdowne Road, Aylesbury",HP20 2DJ
|
||||
HA15,15,"15 Lansdowne Road, Aylesbury",HP20 2DJ
|
||||
HA15,4,"4 Caversham Green, Aylesbury",HP20 2DL
|
||||
HA15,1,"1 Davies Close, Aylesbury",HP20 2SH
|
||||
HA15,62,"62 Stoke Road, Aylesbury",HP21 8BX
|
||||
HA15,64,"64 Stoke Road, Aylesbury",HP21 8BX
|
||||
HA15,78,"78 Stoke Road, Aylesbury",HP21 8BX
|
||||
HA15,4,"4 Court Close, Aylesbury",HP21 8BY
|
||||
HA15,7,"7 Clover Lane, Aylesbury",HP21 8DQ
|
||||
HA15,25,"25 Clover Lane, Aylesbury",HP21 8DQ
|
||||
HA15,31,"31 Clover Lane, Aylesbury",HP21 8DQ
|
||||
HA15,53,"53 Birch Court, Aylesbury",HP21 8DS
|
||||
HA15,59,"59 Birch Court, Aylesbury",HP21 8DS
|
||||
HA15,74,"74 Thrasher Road, Aylesbury",HP21 8DX
|
||||
HA15,2,"2 Vicarage Road, Aylesbury",HP21 8EU
|
||||
HA15,8,"8 Vicarage Road, Aylesbury",HP21 8EU
|
||||
HA15,126,"126 Penn Road, Aylesbury",HP21 8JS
|
||||
HA15,128,"128 Penn Road, Aylesbury",HP21 8JS
|
||||
HA15,140,"140 Penn Road, Aylesbury",HP21 8JS
|
||||
HA15,144,"144 Penn Road, Aylesbury",HP21 8JS
|
||||
HA15,146,"146 Penn Road, Aylesbury",HP21 8JS
|
||||
HA15,4,"4 Montague Road, Aylesbury",HP21 8JT
|
||||
HA15,132,"132 Prebendal Avenue, Aylesbury",HP21 8LF
|
||||
HA15,134,"134 Prebendal Avenue, Aylesbury",HP21 8LF
|
||||
HA15,138,"138 Prebendal Avenue, Aylesbury",HP21 8LF
|
||||
HA15,140,"140 Prebendal Avenue, Aylesbury",HP21 8LF
|
||||
HA15,144,"144 Prebendal Avenue, Aylesbury",HP21 8LF
|
||||
HA15,15,"15 Oak Green, Aylesbury",HP21 8LJ
|
||||
HA15,59,"59 Paterson Road, Aylesbury",HP21 8LW
|
||||
HA15,37,"37 Thame Road, Aylesbury",HP21 8LX
|
||||
HA15,95,"95 Thame Road, Aylesbury",HP21 8LY
|
||||
HA15,3,"3 Edinburgh Place, Aylesbury",HP21 8NG
|
||||
HA15,52,"52 Carrington Road, Aylesbury",HP21 8NL
|
||||
HA15,9,"9 Hartwell End, Aylesbury",HP21 8NZ
|
||||
HA15,12,"12 Hartwell End, Aylesbury",HP21 8NZ
|
||||
HA15,21,"21 Hartwell End, Aylesbury",HP21 8PA
|
||||
HA15,64,"64 Lavric Road, Aylesbury",HP21 8PF
|
||||
HA15,8,"8 Cooks Lane Mursley, Milton Keynes",MK17 0RU
|
||||
HA15,47,"47 Green End Great Brickhill, Milton Keynes",MK17 9AT
|
||||
HA15,14,"14 Green End Great Brickhill, Milton Keynes",MK17 9AU
|
||||
HA15,63,"63 Bourtonville, Buckingham",MK18 1AY
|
||||
HA15,2,"2 Bath Lane Terrace, Buckingham",MK18 1DY
|
||||
HA15,3,"3 Bath Lane Terrace, Buckingham",MK18 1DY
|
||||
HA15,4,"4 Bath Lane Terrace, Buckingham",MK18 1DY
|
||||
HA15,3,"3 Westfields, Buckingham",MK18 1DZ
|
||||
HA15,5,"5 Westfields, Buckingham",MK18 1DZ
|
||||
HA15,6,"6 Westfields, Buckingham",MK18 1DZ
|
||||
HA15,8,"8 Westfields, Buckingham",MK18 1DZ
|
||||
HA15,10,"10 Westfields, Buckingham",MK18 1DZ
|
||||
HA15,13,"13 Westfields, Buckingham",MK18 1DZ
|
||||
HA15,14,"14 Westfields, Buckingham",MK18 1DZ
|
||||
HA15,15,"15 Westfields, Buckingham",MK18 1DZ
|
||||
HA15,18,"18 Westfields, Buckingham",MK18 1DZ
|
||||
HA15,19,"19 Westfields, Buckingham",MK18 1DZ
|
||||
HA15,20,"20 Westfields, Buckingham",MK18 1DZ
|
||||
HA15,21,"21 Westfields, Buckingham",MK18 1DZ
|
||||
HA15,24,"24 Westfields, Buckingham",MK18 1DZ
|
||||
HA15,27,"27 Westfields, Buckingham",MK18 1DZ
|
||||
HA15,28,"28 Westfields, Buckingham",MK18 1DZ
|
||||
HA15,29,"29 Westfields, Buckingham",MK18 1DZ
|
||||
HA15,31,"31 Westfields, Buckingham",MK18 1DZ
|
||||
HA15,32,"32 Westfields, Buckingham",MK18 1DZ
|
||||
HA15,35,"35 Westfields, Buckingham",MK18 1DZ
|
||||
HA15,49,"49 Westfields, Buckingham",MK18 1DZ
|
||||
HA15,51,"51 Westfields, Buckingham",MK18 1DZ
|
||||
HA15,53,"53 Westfields, Buckingham",MK18 1DZ
|
||||
HA15,55,"55 Westfields, Buckingham",MK18 1DZ
|
||||
HA15,57,"57 Westfields, Buckingham",MK18 1DZ
|
||||
HA15,60,"60 Westfields, Buckingham",MK18 1DZ
|
||||
HA15,2,"2 Grenville Road, Buckingham",MK18 1LR
|
||||
HA15,118,"118 Western Avenue, Buckingham",MK18 1LS
|
||||
HA15,5,"5 South Hall Maids Moreton, Buckingham",MK18 1QB
|
||||
HA15,2,"2 Church Close Maids Moreton, Buckingham",MK18 1QG
|
||||
HA15,5,"5 Church Close Maids Moreton, Buckingham",MK18 1QG
|
||||
HA15,7,"7 Church Close Maids Moreton, Buckingham",MK18 1QG
|
||||
HA15,1,"1 The Leys Main Street, Buckingham",MK18 1QT
|
||||
HA15,31a,"31a Springfields Padbury, Buckingham",MK18 2AT
|
||||
HA15,31b,"31b Springfields Padbury, Buckingham",MK18 2AT
|
||||
HA15,1,"1 Arnolds Close Padbury, Buckingham",MK18 2BG
|
||||
HA15,42,"42 Victory Road Steeple Claydon, Buckingham",MK18 2NY
|
||||
HA15,50,"50 Victory Road Steeple Claydon, Buckingham",MK18 2NY
|
||||
HA15,4,"4 Falklands Close Steeple Claydon, Buckingham",MK18 2PN
|
||||
HA15,8,"8 Falklands Close Steeple Claydon, Buckingham",MK18 2PN
|
||||
HA15,10,"10 Falklands Close Steeple Claydon, Buckingham",MK18 2PN
|
||||
HA15,12,"12 Falklands Close Steeple Claydon, Buckingham",MK18 2PN
|
||||
HA15,11,"11 Vicarage Lane Steeple Claydon, Buckingham",MK18 2PR
|
||||
HA15,62,"62 Vicarage Lane Steeple Claydon, Buckingham",MK18 2PR
|
||||
HA15,64,"64 Vicarage Lane Steeple Claydon, Buckingham",MK18 2PR
|
||||
HA15,3,"3 Pound Close Steeple Claydon, Buckingham",MK18 2QL
|
||||
HA15,4,"4 Pound Close Steeple Claydon, Buckingham",MK18 2QL
|
||||
HA15,6,"6 Oak Leys Steeple Claydon, Buckingham",MK18 2RQ
|
||||
HA15,8,"8 Oak Leys Steeple Claydon, Buckingham",MK18 2RQ
|
||||
HA15,8,"8 Old Mill Furlong Winslow, Buckingham",MK18 3EX
|
||||
HA15,23,"23 Old Mill Furlong Winslow, Buckingham",MK18 3EX
|
||||
HA15,24,"24 Old Mill Furlong Winslow, Buckingham",MK18 3EX
|
||||
HA15,25,"25 Old Mill Furlong Winslow, Buckingham",MK18 3EX
|
||||
HA15,30,"30 Old Mill Furlong Winslow, Buckingham",MK18 3EX
|
||||
HA15,32,"32 Old Mill Furlong Winslow, Buckingham",MK18 3EX
|
||||
HA15,34,"34 Old Mill Furlong Winslow, Buckingham",MK18 3EX
|
||||
HA15,1,"1 Roberts Road Haddenham, Aylesbury",HP17 8HH
|
||||
HA15,6,"6 Roberts Road Haddenham, Aylesbury",HP17 8HH
|
||||
HA15,11,"11 Roberts Road Haddenham, Aylesbury",HP17 8HH
|
||||
HA15,15,"15 Roberts Road Haddenham, Aylesbury",HP17 8HH
|
||||
HA15,17,"17 Roberts Road Haddenham, Aylesbury",HP17 8HH
|
||||
HA15,18,"18 Roberts Road Haddenham, Aylesbury",HP17 8HH
|
||||
HA15,38,"38 Roberts Road Haddenham, Aylesbury",HP17 8HH
|
||||
HA15,3,"3 Harts Road Haddenham, Aylesbury",HP17 8HJ
|
||||
HA15,9,"9 Harts Road Haddenham, Aylesbury",HP17 8HJ
|
||||
HA15,11,"11 Harts Road Haddenham, Aylesbury",HP17 8HJ
|
||||
HA15,16,"16 Harts Road Haddenham, Aylesbury",HP17 8HJ
|
||||
HA15,18,"18 Harts Road Haddenham, Aylesbury",HP17 8HJ
|
||||
HA15,22,"22 Harts Road Haddenham, Aylesbury",HP17 8HJ
|
||||
HA15,2,"2 Willis Road Haddenham, Aylesbury",HP17 8HL
|
||||
HA15,4,"4 Willis Road Haddenham, Aylesbury",HP17 8HL
|
||||
HA15,5,"5 Willis Road Haddenham, Aylesbury",HP17 8HL
|
||||
HA15,8,"8 Willis Road Haddenham, Aylesbury",HP17 8HL
|
||||
HA15,20,"20 Willis Road Haddenham, Aylesbury",HP17 8HL
|
||||
HA15,21,"21 Willis Road Haddenham, Aylesbury",HP17 8HL
|
||||
HA15,22,"22 Willis Road Haddenham, Aylesbury",HP17 8HL
|
||||
HA15,26,"26 Willis Road Haddenham, Aylesbury",HP17 8HL
|
||||
HA15,29,"29 Willis Road Haddenham, Aylesbury",HP17 8HL
|
||||
HA15,31,"31 Willis Road Haddenham, Aylesbury",HP17 8HL
|
||||
HA15,33,"33 Willis Road Haddenham, Aylesbury",HP17 8HL
|
||||
HA15,35,"35 Willis Road Haddenham, Aylesbury",HP17 8HL
|
||||
HA15,37,"37 Willis Road Haddenham, Aylesbury",HP17 8HL
|
||||
HA15,39,"39 Willis Road Haddenham, Aylesbury",HP17 8HL
|
||||
HA15,5,"5 Woodways Haddenham, Aylesbury",HP17 8HW
|
||||
HA15,7,"7 Woodways Haddenham, Aylesbury",HP17 8HW
|
||||
HA15,13,"13 Woodways Haddenham, Aylesbury",HP17 8HW
|
||||
HA15,19,"19 Woodways Haddenham, Aylesbury",HP17 8HW
|
||||
HA15,1,"1 Woodlands Butte Furlong, Aylesbury",HP17 8JE
|
||||
HA15,2,"2 Franklin Road Haddenham, Aylesbury",HP17 8LE
|
||||
HA15,8,"8 Franklin Road Haddenham, Aylesbury",HP17 8LE
|
||||
HA15,129,"129 Churchway Haddenham, Aylesbury",HP17 8LG
|
||||
HA15,133,"133 Churchway Haddenham, Aylesbury",HP17 8LG
|
||||
HA15,135,"135 Churchway Haddenham, Aylesbury",HP17 8LG
|
||||
HA15,147,"147 Churchway Haddenham, Aylesbury",HP17 8LG
|
||||
HA15,7,"7 Bishopstone Road Stone, Aylesbury",HP17 8QX
|
||||
HA15,33,"33 Bishopstone Road Stone, Aylesbury",HP17 8QX
|
||||
HA15,8,"8 Chiltern Avenue Stone, Aylesbury",HP17 8QY
|
||||
HA15,20,"20 Chiltern Avenue Stone, Aylesbury",HP17 8QY
|
||||
HA15,28,"28 Chiltern Avenue Stone, Aylesbury",HP17 8QY
|
||||
HA15,32,"32 Chiltern Avenue Stone, Aylesbury",HP17 8QY
|
||||
HA15,34,"34 Chiltern Avenue Stone, Aylesbury",HP17 8QY
|
||||
HA15,46,"46 Chiltern Avenue Stone, Aylesbury",HP17 8QY
|
||||
HA15,60,"60 Chiltern Avenue Stone, Aylesbury",HP17 8QY
|
||||
HA15,62,"62 Chiltern Avenue Stone, Aylesbury",HP17 8QY
|
||||
HA15,7,"7 Chiltern Avenue Stone, Aylesbury",HP17 8QZ
|
||||
HA15,13,"13 Chiltern Avenue Stone, Aylesbury",HP17 8QZ
|
||||
HA15,33,"33 Chiltern Avenue Stone, Aylesbury",HP17 8QZ
|
||||
HA15,41,"41 Chiltern Avenue Stone, Aylesbury",HP17 8QZ
|
||||
HA15,14,"14 Chiltern Close Stone, Aylesbury",HP17 8RA
|
||||
HA15,17,"17 Chiltern Close Stone, Aylesbury",HP17 8RA
|
||||
HA15,10,"10 Round Hill Stone, Aylesbury",HP17 8RD
|
||||
HA15,16,"16 Round Hill Stone, Aylesbury",HP17 8RD
|
||||
HA15,7,"7 Round Hill Stone, Aylesbury",HP17 8RE
|
||||
HA15,17,"17 Round Hill Stone, Aylesbury",HP17 8RE
|
||||
HA15,23,"23 Round Hill Stone, Aylesbury",HP17 8RE
|
||||
HA15,59,"59 Bishopstone Road Stone, Aylesbury",HP17 8RX
|
||||
HA15,1,"1 Bittenham Close Stone, Aylesbury",HP17 8RY
|
||||
HA15,7,"7 Bittenham Close Stone, Aylesbury",HP17 8RY
|
||||
HA15,1,"1 New Road Dinton, Aylesbury",HP17 8UU
|
||||
HA15,3,"3 New Road Dinton, Aylesbury",HP17 8UU
|
||||
HA15,8,"8 New Road Dinton, Aylesbury",HP17 8UU
|
||||
HA15,1,"1 Bernard Close Cuddington, Aylesbury",HP18 0AJ
|
||||
HA15,4,"4 Bernard Close Cuddington, Aylesbury",HP18 0AJ
|
||||
HA15,7,"7 Bernard Close Cuddington, Aylesbury",HP18 0AJ
|
||||
HA15,12,"12 Bernard Close Cuddington, Aylesbury",HP18 0AJ
|
||||
HA15,19,"19 Bernard Close Cuddington, Aylesbury",HP18 0AJ
|
||||
HA15,22,"22 Bernard Close Cuddington, Aylesbury",HP18 0AJ
|
||||
HA15,34,"34 Bernard Close Cuddington, Aylesbury",HP18 0AJ
|
||||
HA15,39,"39 Bernard Close Cuddington, Aylesbury",HP18 0AJ
|
||||
HA15,41,"41 Bernard Close Cuddington, Aylesbury",HP18 0AJ
|
||||
HA15,7,"7 Hillside Cottages Dadbrook, Aylesbury",HP18 0AQ
|
||||
HA15,10,"10 Hillside Cottages Dadbrook, Aylesbury",HP18 0AQ
|
||||
HA15,11,"11 Hillside Cottages Dadbrook, Aylesbury",HP18 0AQ
|
||||
HA15,7,"7 Swan Hill Aylesbury Road, Aylesbury",HP18 0BE
|
||||
HA15,10,"10 Swan Hill Aylesbury Road, Aylesbury",HP18 0BE
|
||||
HA15,1,"1 Grove Way Waddesdon, Aylesbury",HP18 0LH
|
||||
HA15,6,"6 Grove Way Waddesdon, Aylesbury",HP18 0LH
|
||||
HA15,7,"7 Grove Way Waddesdon, Aylesbury",HP18 0LH
|
||||
HA15,1,"1 Sheriff Cottages Quainton Road, Aylesbury",HP18 0LT
|
||||
HA15,2,"2 Sheriff Cottages Quainton Road, Aylesbury",HP18 0LT
|
||||
HA15,3,"3 Sheriff Cottages Quainton Road, Aylesbury",HP18 0LT
|
||||
HA15,5,"5 Sheriff Cottages Quainton Road, Aylesbury",HP18 0LT
|
||||
HA15,6,"6 Sheriff Cottages Quainton Road, Aylesbury",HP18 0LT
|
||||
HA15,7,"7 Sheriff Cottages Quainton Road, Aylesbury",HP18 0LT
|
||||
HA15,9,"9 Sheriff Cottages Quainton Road, Aylesbury",HP18 0LT
|
||||
HA15,21,"21 Goss Avenue Waddesdon, Aylesbury",HP18 0LY
|
||||
HA15,86,"86 Sharps Close Waddesdon, Aylesbury",HP18 0LZ
|
||||
HA15,88,"88 Sharps Close Waddesdon, Aylesbury",HP18 0LZ
|
||||
HA15,3,"3 Hilltop Long Crendon, Aylesbury",HP18 9AT
|
||||
HA15,4,"4 Hilltop Long Crendon, Aylesbury",HP18 9AT
|
||||
HA15,1A,"1a Hilltop Long Crendon, Aylesbury",HP18 9AT
|
||||
HA15,3A,"3a Hilltop Long Crendon, Aylesbury",HP18 9AT
|
||||
HA15,26,"26 Peascroft Long Crendon, Aylesbury",HP18 9AU
|
||||
HA15,30,"30 Peascroft Long Crendon, Aylesbury",HP18 9AU
|
||||
HA15,52,"52 Peascroft Long Crendon, Aylesbury",HP18 9AU
|
||||
HA15,11,"11 Harroell Long Crendon, Aylesbury",HP18 9AY
|
||||
HA15,13,"13 Harroell Long Crendon, Aylesbury",HP18 9AY
|
||||
HA15,14,"14 Harroell Long Crendon, Aylesbury",HP18 9AY
|
||||
HA15,2,"2 Abbot Ridge Long Crendon, Aylesbury",HP18 9AZ
|
||||
HA15,14,"14 Abbot Ridge Long Crendon, Aylesbury",HP18 9AZ
|
||||
HA15,18,"18 Abbot Ridge Long Crendon, Aylesbury",HP18 9AZ
|
||||
HA15,26,"26 Abbot Ridge Long Crendon, Aylesbury",HP18 9AZ
|
||||
HA15,5,"5 Meadowbank Close Long Crendon, Aylesbury",HP18 9DH
|
||||
HA15,11,"11 Bonnersfield Long Crendon, Aylesbury",HP18 9DJ
|
||||
HA15,14,"14 Bonnersfield Long Crendon, Aylesbury",HP18 9DJ
|
||||
HA15,16,"16 Bonnersfield Long Crendon, Aylesbury",HP18 9DJ
|
||||
HA15,26,"26 Bonnersfield Long Crendon, Aylesbury",HP18 9DJ
|
||||
HA15,28,"28 Bonnersfield Long Crendon, Aylesbury",HP18 9DJ
|
||||
HA15,29,"29 Bonnersfield Long Crendon, Aylesbury",HP18 9DJ
|
||||
HA15,30,"30 Bonnersfield Long Crendon, Aylesbury",HP18 9DJ
|
||||
HA15,32,"32 Bonnersfield Long Crendon, Aylesbury",HP18 9DJ
|
||||
HA15,36,"36 Giffard Way Long Crendon, Aylesbury",HP18 9DN
|
||||
HA15,45,"45 Giffard Way Long Crendon, Aylesbury",HP18 9DN
|
||||
HA15,52,"52 Giffard Way Long Crendon, Aylesbury",HP18 9DN
|
||||
HA15,10,"10 Coltman Avenue Long Crendon, Aylesbury",HP18 9DP
|
||||
HA15,11,"11 Coltman Avenue Long Crendon, Aylesbury",HP18 9DP
|
||||
HA15,12,"12 Coltman Avenue Long Crendon, Aylesbury",HP18 9DP
|
||||
HA15,14,"14 Coltman Avenue Long Crendon, Aylesbury",HP18 9DP
|
||||
HA15,16,"16 Coltman Avenue Long Crendon, Aylesbury",HP18 9DP
|
||||
HA15,22,"22 Coltman Avenue Long Crendon, Aylesbury",HP18 9DP
|
||||
HA15,25,"25 Coltman Avenue Long Crendon, Aylesbury",HP18 9DP
|
||||
HA15,26,"26 Coltman Avenue Long Crendon, Aylesbury",HP18 9DP
|
||||
HA15,27,"27 Coltman Avenue Long Crendon, Aylesbury",HP18 9DP
|
||||
HA15,32,"32 Friars Furlong Long Crendon, Aylesbury",HP18 9DQ
|
||||
HA15,4,"4 Highfield Long Crendon, Aylesbury",HP18 9DR
|
||||
HA15,5,"5 Highfield Long Crendon, Aylesbury",HP18 9DR
|
||||
HA15,8,"8 Highfield Long Crendon, Aylesbury",HP18 9DR
|
||||
HA15,9,"9 Highfield Long Crendon, Aylesbury",HP18 9DR
|
||||
HA15,10,"10 Highfield Long Crendon, Aylesbury",HP18 9DR
|
||||
HA15,11,"11 Highfield Long Crendon, Aylesbury",HP18 9DR
|
||||
HA15,14,"14 Highfield Long Crendon, Aylesbury",HP18 9DR
|
||||
HA15,17,"17 Highfield Long Crendon, Aylesbury",HP18 9DR
|
||||
HA15,18,"18 Highfield Long Crendon, Aylesbury",HP18 9DR
|
||||
HA15,20,"20 Highfield Long Crendon, Aylesbury",HP18 9DR
|
||||
HA15,23,"23 Highfield Long Crendon, Aylesbury",HP18 9DR
|
||||
HA15,24,"24 Highfield Long Crendon, Aylesbury",HP18 9DR
|
||||
HA15,14b,"14b Highfield Long Crendon, Aylesbury",HP18 9DR
|
||||
HA15,4,"4 Giffard Way Long Crendon, Aylesbury",HP18 9DW
|
||||
HA15,13,"13 Giffard Way Long Crendon, Aylesbury",HP18 9DW
|
||||
HA15,14,"14 Giffard Way Long Crendon, Aylesbury",HP18 9DW
|
||||
HA15,24,"24 St. Annes Road, Aylesbury",HP19 7RB
|
||||
HA15,55,"55 St. Annes Road, Aylesbury",HP19 7RB
|
||||
HA15,6,"6 Palmer Avenue, Aylesbury",HP19 8EF
|
||||
HA15,18,"18 Palmer Avenue, Aylesbury",HP19 8EF
|
||||
HA15,20,"20 Palmer Avenue, Aylesbury",HP19 8EF
|
||||
HA15,24,"24 Palmer Avenue, Aylesbury",HP19 8EF
|
||||
HA15,25,"25 Palmer Avenue, Aylesbury",HP19 8EF
|
||||
HA15,1,"1 Gatehouse Road, Aylesbury",HP19 8EH
|
||||
HA15,10,"10 Gatehouse Road, Aylesbury",HP19 8EH
|
||||
HA15,12,"12 Gatehouse Road, Aylesbury",HP19 8EH
|
||||
HA15,53,"53 Oxford Road, Aylesbury",HP19 8EQ
|
||||
HA15,59,"59 Oxford Road, Aylesbury",HP19 8EQ
|
||||
HA15,2,"2 Lander Road,Aylesbury,Bucks",HP19 9TT
|
||||
HA15,30,"30 Lander Road,Aylesbury,Bucks",HP19 9TT
|
||||
HA15,31,"31 Lander Road,Aylesbury,Bucks",HP19 9TT
|
||||
HA15,32,"32 Lander Road,Aylesbury,Bucks",HP19 9TT
|
||||
HA15,3,"3 Eeles Close,Aylesbury,Bucks",HP19 9TU
|
||||
HA15,5,"5 Eeles Close,Aylesbury,Bucks",HP19 9TU
|
||||
HA15,6,"6 Eeles Close,Aylesbury,Bucks",HP19 9TU
|
||||
HA15,7,"7 Eeles Close,Aylesbury,Bucks",HP19 9TU
|
||||
HA15,8,"8 Eeles Close,Aylesbury,Bucks",HP19 9TU
|
||||
HA15,9,"9 Eeles Close,Aylesbury,Bucks",HP19 9TU
|
||||
HA15,10,"10 Eeles Close,Aylesbury,Bucks",HP19 9TU
|
||||
HA15,15,"15 Eeles Close,Aylesbury,Bucks",HP19 9TU
|
||||
HA15,17,"17 Dicks Way,Aylesbury,Bucks",HP19 9UA
|
||||
HA15,20,"20 Dicks Way,Aylesbury,Bucks",HP19 9UA
|
||||
HA15,28,"28 Dicks Way,Aylesbury,Bucks",HP19 9UA
|
||||
HA15,30,"30 Dicks Way,Aylesbury,Bucks",HP19 9UA
|
||||
HA15,32,"32 Dicks Way,Aylesbury,Bucks",HP19 9UA
|
||||
HA15,34,"34 Dicks Way,Aylesbury,Bucks",HP19 9UA
|
||||
HA15,36,"36 Dicks Way,Aylesbury,Bucks",HP19 9UA
|
||||
HA15,7,"7 Fletcher Close,Aylesbury,Bucks",HP19 9UB
|
||||
HA15,8,"8 Fletcher Close,Aylesbury,Bucks",HP19 9UB
|
||||
HA15,10,"10 Fletcher Close,Aylesbury,Bucks",HP19 9UB
|
||||
HA15,11,"11 Fletcher Close,Aylesbury,Bucks",HP19 9UB
|
||||
HA15,12,"12 Fletcher Close,Aylesbury,Bucks",HP19 9UB
|
||||
HA15,25,"25 Fletcher Close,Aylesbury,Bucks",HP19 9UB
|
||||
HA15,33,"33 Fletcher Close,Aylesbury,Bucks",HP19 9UB
|
||||
HA15,34,"34 Fletcher Close,Aylesbury,Bucks",HP19 9UB
|
||||
HA15,11,"11 Grimmer Close,Aylesbury,Bucks",HP19 9UD
|
||||
HA15,14,"14 Grimmer Close,Aylesbury,Bucks",HP19 9UD
|
||||
HA15,15,"15 Grimmer Close,Aylesbury,Bucks",HP19 9UD
|
||||
HA15,23,"23 Grimmer Close,Aylesbury,Bucks",HP19 9UD
|
||||
HA15,12,"12 Vincent Road,Aylesbury,Bucks",HP19 9UN
|
||||
HA15,4,"4 Reading Close,Aylesbury,Bucks",HP19 9UW
|
||||
HA15,7,"7 Reading Close,Aylesbury,Bucks",HP19 9UW
|
||||
HA15,10,"10 Reading Close,Aylesbury,Bucks",HP19 9UW
|
||||
HA15,2,"2 Mary Mac Manus Drive, Milton Keynes",MK18 1UN
|
||||
HA15,4,"4 Mary Mac Manus Drive, Milton Keynes",MK18 1UN
|
||||
HA15,6,"6 Mary Mac Manus Drive, Milton Keynes",MK18 1UN
|
||||
HA15,8,"8 Mary Mac Manus Drive, Milton Keynes",MK18 1UN
|
||||
HA15,10,"10 Mary Mac Manus Drive, Milton Keynes",MK18 1UN
|
||||
HA15,14,"14 Mary Mac Manus Drive, Milton Keynes",MK18 1UN
|
||||
HA15,16,"16 Mary Mac Manus Drive, Milton Keynes",MK18 1UN
|
||||
HA15,18,"18 Mary Mac Manus Drive, Milton Keynes",MK18 1UN
|
||||
HA15,20,"20 Mary Mac Manus Drive, Milton Keynes",MK18 1UN
|
||||
HA15,22,"22 Mary Mac Manus Drive, Milton Keynes",MK18 1UN
|
||||
HA15,24,"24 Mary Mac Manus Drive, Milton Keynes",MK18 1UN
|
||||
HA15,1,"1 Mary Mac Manus Drive, Milton Keynes",MK18 1UW
|
||||
HA15,3,"3 Mary Mac Manus Drive, Milton Keynes",MK18 1UW
|
||||
HA15,5,"5 Mary Mac Manus Drive, Milton Keynes",MK18 1UW
|
||||
HA15,7,"7 Mary Mac Manus Drive, Milton Keynes",MK18 1UW
|
||||
HA15,9,"9 Mary Mac Manus Drive, Milton Keynes",MK18 1UW
|
||||
HA15,11,"11 Mary Mac Manus Drive, Milton Keynes",MK18 1UW
|
||||
HA15,13,"13 Mary Mac Manus Drive, Milton Keynes",MK18 1UW
|
||||
HA15,15,"15 Mary Mac Manus Drive, Milton Keynes",MK18 1UW
|
||||
HA15,17,"17 Mary Mac Manus Drive, Milton Keynes",MK18 1UW
|
||||
HA15,24,"24 St. Annes Road, Aylesbury",HP19 7RB
|
||||
HA15,55,"55 St. Annes Road, Aylesbury",HP19 7RB
|
||||
HA15,3,"3 Lansdowne Road, Aylesbury",HP20 2DJ
|
||||
HA15,15,"15 Lansdowne Road, Aylesbury",HP20 2DJ
|
||||
HA15,28,"28 Beechwood Way Aston Clinton, Aylesbury",HP22 5JP
|
||||
HA15,11,"11 Lower Icknield Way Aston Clinton, Aylesbury",HP22 5JS
|
||||
HA15,17,"17 Lower Icknield Way Aston Clinton, Aylesbury",HP22 5JS
|
||||
HA15,5,"5 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JU
|
||||
HA15,6,"6 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JU
|
||||
HA15,8,"8 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JU
|
||||
HA15,12,"12 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JU
|
||||
HA15,13,"13 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JU
|
||||
HA15,15,"15 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JU
|
||||
HA15,16,"16 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JU
|
||||
HA15,19,"19 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JU
|
||||
HA15,21,"21 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JU
|
||||
HA15,23,"23 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JU
|
||||
HA15,13,"13 Beechwood Way Aston Clinton, Aylesbury",HP22 5JW
|
||||
HA15,24,"24 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JX
|
||||
HA15,26,"26 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JX
|
||||
HA15,34,"34 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JX
|
||||
HA15,39,"39 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JX
|
||||
HA15,42,"42 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JX
|
||||
HA15,44,"44 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JX
|
||||
HA15,45,"45 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JX
|
||||
HA15,89,"89 Beaconsfield Road Aston Clinton, Aylesbury",HP22 5JX
|
||||
HA15,9,"9 Longcroft Aston Clinton, Aylesbury",HP22 5JZ
|
||||
HA15,14,"14 Longcroft Aston Clinton, Aylesbury",HP22 5JZ
|
||||
HA15,55,"55 Grenville Avenue Wendover, Aylesbury",HP22 6AJ
|
||||
HA15,67,"67 Grenville Avenue Wendover, Aylesbury",HP22 6AJ
|
||||
HA15,75,"75 Grenville Avenue Wendover, Aylesbury",HP22 6AJ
|
||||
HA15,35,"35 Grenville Avenue Wendover, Aylesbury",HP22 6AQ
|
||||
HA15,12,"12 Boddington Road Wendover, Aylesbury",HP22 6HY
|
||||
HA15,16,"16 Boddington Road Wendover, Aylesbury",HP22 6HY
|
||||
HA15,21,"21 Boddington Road Wendover, Aylesbury",HP22 6HY
|
||||
HA15,35,"35 Boddington Road Wendover, Aylesbury",HP22 6HY
|
||||
HA15,39,"39 Boddington Road Wendover, Aylesbury",HP22 6HY
|
||||
HA15,5,"5 Boddington Road Wendover, Aylesbury",HP22 6HZ
|
||||
HA15,1,"1a Lionel Avenue Wendover, Aylesbury",HP22 6LL
|
||||
HA15,22,"22 Barley Close Weston Turville, Aylesbury",HP22 5SF
|
||||
HA15,24,"24 Barley Close Weston Turville, Aylesbury",HP22 5SF
|
||||
HA15,31,"31 Barley Close Weston Turville, Aylesbury",HP22 5SF
|
||||
HA15,39,"39 Barley Close Weston Turville, Aylesbury",HP22 5SF
|
||||
HA15,41,"41 Barley Close Weston Turville, Aylesbury",HP22 5SF
|
||||
HA15,43,"43 Barley Close Weston Turville, Aylesbury",HP22 5SF
|
||||
HA15,46,"46 Hampden Road Stoke Mandeville, Aylesbury",HP22 5TW
|
||||
HA15,6,"6 Hampden Road Stoke Mandeville, Aylesbury",HP22 5UF
|
||||
HA15,7,"7 Hampden Road Stoke Mandeville, Aylesbury",HP22 5UF
|
||||
HA15,21,"21 Hampden Road Stoke Mandeville, Aylesbury",HP22 5UF
|
||||
HA15,14,"14 Eskdale Road Stoke Mandeville, Aylesbury",HP22 5UJ
|
||||
HA15,15,"15 Eskdale Road Stoke Mandeville, Aylesbury",HP22 5UJ
|
||||
HA15,18,"18 Eskdale Road Stoke Mandeville, Aylesbury",HP22 5UJ
|
||||
HA15,20,"20 Eskdale Road Stoke Mandeville, Aylesbury",HP22 5UJ
|
||||
HA15,23,"23 Eskdale Road Stoke Mandeville, Aylesbury",HP22 5UJ
|
||||
HA15,43,"43 Eskdale Road Stoke Mandeville, Aylesbury",HP22 5UJ
|
||||
HA15,44,"44 Eskdale Road Stoke Mandeville, Aylesbury",HP22 5UJ
|
||||
HA15,27,"27 Station Road Stoke Mandeville, Aylesbury",HP22 5UL
|
||||
HA15,29,"29 Station Road Stoke Mandeville, Aylesbury",HP22 5UL
|
||||
HA15,3,"3 Moor Park Wendover, Aylesbury",HP22 6AX
|
||||
HA15,9,"9 Moor Park Wendover, Aylesbury",HP22 6AX
|
||||
HA15,21,"21 Moor Park Wendover, Aylesbury",HP22 6AX
|
||||
HA15,35,"35 Moor Park Wendover, Aylesbury",HP22 6AX
|
||||
HA15,40,"40 Moor Park Wendover, Aylesbury",HP22 6AX
|
||||
HA15,42,"42 Moor Park Wendover, Aylesbury",HP22 6AX
|
||||
HA15,45,"45 Moor Park Wendover, Aylesbury",HP22 6AX
|
||||
HA15,48,"48 Moor Park Wendover, Aylesbury",HP22 6AX
|
||||
HA15,54,"54 Moor Park Wendover, Aylesbury",HP22 6AX
|
||||
HA15,58,"58 Moor Park Wendover, Aylesbury",HP22 6AX
|
||||
|
499
etl/eligibility/ha_15_32/HA 32 Identified addresses.csv
Normal file
499
etl/eligibility/ha_15_32/HA 32 Identified addresses.csv
Normal file
|
|
@ -0,0 +1,499 @@
|
|||
Housing Association,No.,Address,Postcode
|
||||
HA 32,1,SHERWOOD COURT,HU114DF
|
||||
HA 32,2,SHERWOOD COURT,HU114DF
|
||||
HA 32,3,SHERWOOD COURT,HU114DF
|
||||
HA 32,4,SHERWOOD COURT,HU114DF
|
||||
HA 32,5,SHERWOOD COURT,HU114DF
|
||||
HA 32,7,SHERWOOD COURT,HU114DF
|
||||
HA 32,8,SHERWOOD COURT,HU114DF
|
||||
HA 32,9,SHERWOOD COURT,HU114DF
|
||||
HA 32,10,SHERWOOD COURT,HU114DF
|
||||
HA 32,27,Seaton Grove,HU4 6HF
|
||||
HA 32,29,Seaton Grove,HU4 6HF
|
||||
HA 32,31,Seaton Grove,HU4 6HF
|
||||
HA 32,33,Seaton Grove,HU4 6HF
|
||||
HA 32,35,Seaton Grove,HU4 6HF
|
||||
HA 32,39,Seaton Grove,HU4 6HF
|
||||
HA 32,41,Seaton Grove,HU4 6HF
|
||||
HA 32,43,Seaton Grove,HU4 6HF
|
||||
HA 32,7,Norton Grove,HU4 6HG
|
||||
HA 32,9,Norton Grove,HU4 6HG
|
||||
HA 32,11,Norton Grove,HU4 6HG
|
||||
HA 32,15,Norton Grove,HU4 6HG
|
||||
HA 32,17,Norton Grove,HU4 6HG
|
||||
HA 32,19,Norton Grove,HU4 6HG
|
||||
HA 32,21,Norton Grove,HU4 6HG
|
||||
HA 32,28,Coxwold,HU4 6HH
|
||||
HA 32,30,Coxwold,HU4 6HH
|
||||
HA 32,32,Coxwold,HU4 6HH
|
||||
HA 32,34,Coxwold,HU4 6HH
|
||||
HA 32,36,Coxwold,HU4 6HH
|
||||
HA 32,38,Coxwold,HU4 6HH
|
||||
HA 32,40,Coxwold,HU4 6HH
|
||||
HA 32,42,Coxwold,HU4 6HH
|
||||
HA 32,44,Coxwold,HU4 6HH
|
||||
HA 32,971,HESSLE ROAD,HU4 6QG
|
||||
HA 32,973,HESSLE ROAD,HU4 6QG
|
||||
HA 32,975,HESSLE ROAD,HU4 6QG
|
||||
HA 32,977,HESSLE ROAD,HU4 6QG
|
||||
HA 32,981,HESSLE ROAD,HU4 6QG
|
||||
HA 32,983,HESSLE ROAD,HU4 6QG
|
||||
HA 32,1,Hessle Road,HU4 6RS
|
||||
HA 32,2,Hessle Road,HU4 6RS
|
||||
HA 32,3,Hessle Road,HU4 6RS
|
||||
HA 32,4,Hessle Road,HU4 6RS
|
||||
HA 32,5,Hessle Road,HU4 6RS
|
||||
HA 32,6,Hessle Road,HU4 6RS
|
||||
HA 32,7,Hessle Road,HU4 6RS
|
||||
HA 32,8,Hessle Road,HU4 6RS
|
||||
HA 32,9,Hessle Road,HU4 6RS
|
||||
HA 32,10,Hessle Road,HU4 6RS
|
||||
HA 32,11,Hessle Road,HU4 6RS
|
||||
HA 32,12,Hessle Road,HU4 6RS
|
||||
HA 32,14,Hessle Road,HU4 6RS
|
||||
HA 32,15,Hessle Road,HU4 6RS
|
||||
HA 32,16,Hessle Road,HU4 6RS
|
||||
HA 32,17,Hessle Road,HU4 6RS
|
||||
HA 32,18,Hessle Road,HU4 6RS
|
||||
HA 32,19,Hessle Road,HU4 6RS
|
||||
HA 32,20,Hessle Road,HU4 6RS
|
||||
HA 32,21,Hessle Road,HU4 6RS
|
||||
HA 32,22,Hessle Road,HU4 6RS
|
||||
HA 32,23,Hessle Road,HU4 6RS
|
||||
HA 32,24,Hessle Road,HU4 6RS
|
||||
HA 32,25,Hessle Road,HU4 6RS
|
||||
HA 32,26,Hessle Road,HU4 6RS
|
||||
HA 32,27,Hessle Road,HU4 6RS
|
||||
HA 32,28,Hessle Road,HU4 6RS
|
||||
HA 32,29,Hessle Road,HU4 6RS
|
||||
HA 32,30,Hessle Road,HU4 6RS
|
||||
HA 32,31,Hessle Road,HU4 6RS
|
||||
HA 32,32,Hessle Road,HU4 6RS
|
||||
HA 32,33,Hessle Road,HU4 6RS
|
||||
HA 32,34,Hessle Road,HU4 6RS
|
||||
HA 32,35,Hessle Road,HU4 6RS
|
||||
HA 32,36,Hessle Road,HU4 6RS
|
||||
HA 32,37,Hessle Road,HU4 6RS
|
||||
HA 32,46,FORESTER WAY,HU4 6SR
|
||||
HA 32,48,FORESTER WAY,HU4 6SR
|
||||
HA 32,50,FORESTER WAY,HU4 6SR
|
||||
HA 32,54,FORESTER WAY,HU4 6SR
|
||||
HA 32,56,FORESTER WAY,HU4 6SR
|
||||
HA 32,62,FORESTER WAY,HU4 6SR
|
||||
HA 32,64,FORESTER WAY,HU4 6SR
|
||||
HA 32,66,FORESTER WAY,HU4 6SR
|
||||
HA 32,68,FORESTER WAY,HU4 6SR
|
||||
HA 32,70,FORESTER WAY,HU4 6SR
|
||||
HA 32,15,SUMMERGROVES WAY,HU4 6SZ
|
||||
HA 32,1,WALNUT TREE WAY,HU4 6TG
|
||||
HA 32,2,WALNUT TREE WAY,HU4 6TG
|
||||
HA 32,3,WALNUT TREE WAY,HU4 6TG
|
||||
HA 32,4,WALNUT TREE WAY,HU4 6TG
|
||||
HA 32,7,WALNUT TREE WAY,HU4 6TG
|
||||
HA 32,8,WALNUT TREE WAY,HU4 6TG
|
||||
HA 32,9,WALNUT TREE WAY,HU4 6TG
|
||||
HA 32,291,Cottingham Road,HU5 4AT
|
||||
HA 32,293,Cottingham Road,HU5 4AT
|
||||
HA 32,295,Cottingham Road,HU5 4AT
|
||||
HA 32,297,Cottingham Road,HU5 4AT
|
||||
HA 32,299,Cottingham Road,HU5 4AT
|
||||
HA 32,301,Cottingham Road,HU5 4AT
|
||||
HA 32,303,Cottingham Road,HU5 4AT
|
||||
HA 32,305,Cottingham Road,HU5 4AT
|
||||
HA 32,307,Cottingham Road,HU5 4AT
|
||||
HA 32,309,Cottingham Road,HU5 4AT
|
||||
HA 32,1,Edith Cavell Court,HU5 4BA
|
||||
HA 32,2,Edith Cavell Court,HU5 4BA
|
||||
HA 32,3,Edith Cavell Court,HU5 4BA
|
||||
HA 32,4,Edith Cavell Court,HU5 4BA
|
||||
HA 32,5,Edith Cavell Court,HU5 4BA
|
||||
HA 32,6,Edith Cavell Court,HU5 4BA
|
||||
HA 32,7,Edith Cavell Court,HU5 4BA
|
||||
HA 32,8,Edith Cavell Court,HU5 4BA
|
||||
HA 32,9,Edith Cavell Court,HU5 4BA
|
||||
HA 32,10,Edith Cavell Court,HU5 4BA
|
||||
HA 32,11,Edith Cavell Court,HU5 4BA
|
||||
HA 32,12,Edith Cavell Court,HU5 4BA
|
||||
HA 32,106,Barringhton Avenue,HU5 4BE
|
||||
HA 32,112,Barringhton Avenue,HU5 4BE
|
||||
HA 32,114,Barringhton Avenue,HU5 4BE
|
||||
HA 32,116,Barringhton Avenue,HU5 4BE
|
||||
HA 32,118,Barringhton Avenue,HU5 4BE
|
||||
HA 32,120,Barringhton Avenue,HU5 4BE
|
||||
HA 32,122,Barringhton Avenue,HU5 4BE
|
||||
HA 32,124,Barringhton Avenue,HU5 4BE
|
||||
HA 32,126,Barringhton Avenue,HU5 4BE
|
||||
HA 32,1,Florence Nightingale Court,HU5 4BW
|
||||
HA 32,2,Florence Nightingale Court,HU5 4BW
|
||||
HA 32,3,Florence Nightingale Court,HU5 4BW
|
||||
HA 32,4,Florence Nightingale Court,HU5 4BW
|
||||
HA 32,5,Florence Nightingale Court,HU5 4BW
|
||||
HA 32,6,Florence Nightingale Court,HU5 4BW
|
||||
HA 32,7,Florence Nightingale Court,HU5 4BW
|
||||
HA 32,8,Florence Nightingale Court,HU5 4BW
|
||||
HA 32,9,Florence Nightingale Court,HU5 4BW
|
||||
HA 32,10,Florence Nightingale Court,HU5 4BW
|
||||
HA 32,11,Florence Nightingale Court,HU5 4BW
|
||||
HA 32,12,Florence Nightingale Court,HU5 4BW
|
||||
HA 32,14,Florence Nightingale Court,HU5 4BW
|
||||
HA 32,15,Florence Nightingale Court,HU5 4BW
|
||||
HA 32,17,Florence Nightingale Court,HU5 4BW
|
||||
HA 32,19,Florence Nightingale Court,HU5 4BW
|
||||
HA 32,12,Green Close,HU6 8DA
|
||||
HA 32,44,Green Close,HU6 8DA
|
||||
HA 32,49,Green Close,HU6 8DA
|
||||
HA 32,50,Green Close,HU6 8DA
|
||||
HA 32,14,Ashbury Court,HU6 8DY
|
||||
HA 32,38,Westgarth Avenue,HU6 8LS
|
||||
HA 32,46,WESTGARTH AVENUE,HU6 8LS
|
||||
HA 32,48,WESTGARTH AVENUE,HU6 8LS
|
||||
HA 32,54,Westgarth Avenue,HU6 8LS
|
||||
HA 32,10,BEAUTIMAN COURT,HU6 8LX
|
||||
HA 32,1,Rosey Row,HU9 1HF
|
||||
HA 32,2,Rosey Row,HU9 1HF
|
||||
HA 32,3,Rosey Row,HU9 1HF
|
||||
HA 32,4,Rosey Row,HU9 1HF
|
||||
HA 32,5,Rosey Row,HU9 1HF
|
||||
HA 32,6,Rosey Row,HU9 1HF
|
||||
HA 32,7,Rosey Row,HU9 1HF
|
||||
HA 32,8,Rosey Row,HU9 1HF
|
||||
HA 32,9,Rosey Row,HU9 1HF
|
||||
HA 32,10,Rosey Row,HU9 1HF
|
||||
HA 32,11,Rosey Row,HU9 1HF
|
||||
HA 32,12,Rosey Row,HU9 1HF
|
||||
HA 32,14,Rosey Row,HU9 1HF
|
||||
HA 32,15,Rosey Row,HU9 1HF
|
||||
HA 32,16,Rosey Row,HU9 1HF
|
||||
HA 32,17,Rosey Row,HU9 1HF
|
||||
HA 32,18,Rosey Row,HU9 1HF
|
||||
HA 32,19,Rosey Row,HU9 1HF
|
||||
HA 32,20,Rosey Row,HU9 1HF
|
||||
HA 32,21,Rosey Row,HU9 1HF
|
||||
HA 32,24,Steynburg Street,HU9 2PF
|
||||
HA 32,26,Steynburg Street,HU9 2PF
|
||||
HA 32,28,Steynburg Street,HU9 2PF
|
||||
HA 32,30,Steynburg Street,HU9 2PF
|
||||
HA 32,36,Steynburg Street,HU9 2PF
|
||||
HA 32,38,Steynburg Street,HU9 2PF
|
||||
HA 32,40,Steynburg Street,HU9 2PF
|
||||
HA 32,42,Steynburg Street,HU9 2PF
|
||||
HA 32,19,Rustenburg,HU9 2PT
|
||||
HA 32,21,Rustenburg,HU9 2PT
|
||||
HA 32,23,Rustenburg,HU9 2PT
|
||||
HA 32,25,Rustenburg,HU9 2PT
|
||||
HA 32,27,Rustenburg,HU9 2PT
|
||||
HA 32,29,Rustenburg,HU9 2PT
|
||||
HA 32,31,Rustenburg,HU9 2PT
|
||||
HA 32,33,Rustenburg,HU9 2PT
|
||||
HA 32,35,Rustenburg,HU9 2PT
|
||||
HA 32,37,Rustenburg,HU9 2PT
|
||||
HA 32,55,Rustenburg,HU9 2PT
|
||||
HA 32,57,Rustenburg,HU9 2PT
|
||||
HA 32,59,Rustenburg,HU9 2PT
|
||||
HA 32,61,Rustenburg,HU9 2PT
|
||||
HA 32,3,The Broadway,HU9 3JH
|
||||
HA 32,5,THE BROADWAY,HU9 3JH
|
||||
HA 32,7,The Broadway,HU9 3JH
|
||||
HA 32,9,The Broadway,HU9 3JH
|
||||
HA 32,11,The Broadway,HU9 3JH
|
||||
HA 32,1,BOWLING CIRCLE,HU9 3JL
|
||||
HA 32,3,BOWLING CIRCLE,HU9 3JL
|
||||
HA 32,5,BOWLING CIRCLE,HU9 3JL
|
||||
HA 32,7,BOWLING CIRCLE,HU9 3JL
|
||||
HA 32,9,BOWLING CIRCLE,HU9 3JL
|
||||
HA 32,1,MAJESTIC COURT,HU9 3JY
|
||||
HA 32,2,MAJESTIC COURT,HU9 3JY
|
||||
HA 32,3,MAJESTIC COURT,HU9 3JY
|
||||
HA 32,4,MAJESTIC COURT,HU9 3JY
|
||||
HA 32,5,MAJESTIC COURT,HU9 3JY
|
||||
HA 32,6,MAJESTIC COURT,HU9 3JY
|
||||
HA 32,7,MAJESTIC COURT,HU9 3JY
|
||||
HA 32,8,MAJESTIC COURT,HU9 3JY
|
||||
HA 32,9,MAJESTIC COURT,HU9 3JY
|
||||
HA 32,10,MAJESTIC COURT,HU9 3JY
|
||||
HA 32,11,MAJESTIC COURT,HU9 3JY
|
||||
HA 32,12,MAJESTIC COURT,HU9 3JY
|
||||
HA 32,14,MAJESTIC COURT,HU9 3JY
|
||||
HA 32,15,Majestic Court,HU9 3JY
|
||||
HA 32,16,MAJESTIC COURT,HU9 3JY
|
||||
HA 32,1,ROYALE COURT,HU9 3JZ
|
||||
HA 32,2,ROYALE COURT,HU9 3JZ
|
||||
HA 32,3,ROYALE COURT,HU9 3JZ
|
||||
HA 32,4,ROYALE COURT,HU9 3JZ
|
||||
HA 32,5,ROYALE COURT,HU9 3JZ
|
||||
HA 32,6,ROYALE COURT,HU9 3JZ
|
||||
HA 32,7,ROYALE COURT,HU9 3JZ
|
||||
HA 32,8,ROYALE COURT,HU9 3JZ
|
||||
HA 32,9,ROYALE COURT,HU9 3JZ
|
||||
HA 32,10,ROYALE COURT,HU9 3JZ
|
||||
HA 32,11,ROYALE COURT,HU9 3JZ
|
||||
HA 32,12,ROYALE COURT,HU9 3JZ
|
||||
HA 32,14,ROYALE COURT,HU9 3JZ
|
||||
HA 32,16,ROYALE COURT,HU9 3JZ
|
||||
HA 32,17,ROYALE COURT,HU9 3JZ
|
||||
HA 32,18,ROYALE COURT,HU9 3JZ
|
||||
HA 32,19,ROYALE COURT,HU9 3JZ
|
||||
HA 32,20,ROYALE COURT,HU9 3JZ
|
||||
HA 32,21,ROYALE COURT,HU9 3JZ
|
||||
HA 32,22,ROYALE COURT,HU9 3JZ
|
||||
HA 32,23,ROYALE COURT,HU9 3JZ
|
||||
HA 32,24,ROYALE COURT,HU9 3JZ
|
||||
HA 32,25,ROYALE COURT,HU9 3JZ
|
||||
HA 32,26,ROYALE COURT,HU9 3JZ
|
||||
HA 32,12A,ROYALE COURT,HU9 3JZ
|
||||
HA 32,79,MAYBURY ROAD,HU9 3LB
|
||||
HA 32,1,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,2,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,3,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,4,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,5,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,6,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,7,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,8,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,9,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,10,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,11,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,14,Hebrides Close,HU9 3LF
|
||||
HA 32,15,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,16,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,17,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,18,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,19,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,20,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,21,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,22,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,23,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,24,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,25,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,27,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,28,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,29,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,30,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,31,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,32,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,33,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,34,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,35,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,36,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,39,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,40,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,41,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,42,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,2,CROMARTY CLOSE,HU9 3LG
|
||||
HA 32,4,CROMARTY CLOSE,HU9 3LG
|
||||
HA 32,6,CROMARTY CLOSE,HU9 3LG
|
||||
HA 32,8,CROMARTY CLOSE,HU9 3LG
|
||||
HA 32,10,CROMARTY CLOSE,HU9 3LG
|
||||
HA 32,12,CROMARTY CLOSE,HU9 3LG
|
||||
HA 32,14,CROMARTY CLOSE,HU9 3LG
|
||||
HA 32,16,CROMARTY CLOSE,HU9 3LG
|
||||
HA 32,18,CROMARTY CLOSE,HU9 3LG
|
||||
HA 32,20,CROMARTY CLOSE,HU9 3LG
|
||||
HA 32,22,CROMARTY CLOSE,HU9 3LG
|
||||
HA 32,24,CROMARTY CLOSE,HU9 3LG
|
||||
HA 32,26,CROMARTY CLOSE,HU9 3LG
|
||||
HA 32,28,CROMARTY CLOSE,HU9 3LG
|
||||
HA 32,30,CROMARTY CLOSE,HU9 3LG
|
||||
HA 32,32,CROMARTY CLOSE,HU9 3LG
|
||||
HA 32,34,CROMARTY CLOSE,HU9 3LG
|
||||
HA 32,36,CROMARTY CLOSE,HU9 3LG
|
||||
HA 32,40,CROMARTY CLOSE,HU9 3LG
|
||||
HA 32,42,CROMARTY CLOSE,HU9 3LG
|
||||
HA 32,44,CROMARTY CLOSE,HU9 3LG
|
||||
HA 32,46,CROMARTY CLOSE,HU9 3LG
|
||||
HA 32,48,CROMARTY CLOSE,HU9 3LG
|
||||
HA 32,48,CROMARTY CLOSE,HU9 3LG
|
||||
HA 32,50,CROMARTY CLOSE,HU9 3LG
|
||||
HA 32,52,CROMARTY CLOSE,HU9 3LG
|
||||
HA 32,54,CROMARTY CLOSE,HU9 3LG
|
||||
HA 32,56,CROMARTY CLOSE,HU9 3LG
|
||||
HA 32,58,CROMARTY CLOSE,HU9 3LG
|
||||
HA 32,60,CROMARTY CLOSE,HU9 3LG
|
||||
HA 32,62,CROMARTY CLOSE,HU9 3LG
|
||||
HA 32,64,CROMARTY CLOSE,HU9 3LG
|
||||
HA 32,66,CROMARTY CLOSE,HU9 3LG
|
||||
HA 32,68,CROMARTY CLOSE,HU9 3LG
|
||||
HA 32,1,RONALDSWAY CLOSE,HU9 3LH
|
||||
HA 32,2,RONALDSWAY CLOSE,HU9 3LH
|
||||
HA 32,3,RONALDSWAY CLOSE,HU9 3LH
|
||||
HA 32,3,"MALIN LODGE, RONALDSWAY CLOSE",HU9 3LH
|
||||
HA 32,4,"MALIN LODGE, RONALDSWAY CLOSE",HU9 3LH
|
||||
HA 32,6,"MALIN LODGE, RONALDSWAY CLOSE",HU9 3LH
|
||||
HA 32,9,"MALIN LODGE, RONALDSWAY CLOSE",HU9 3LH
|
||||
HA 32,10,"MALIN LODGE, RONALDSWAY CLOSE",HU9 3LH
|
||||
HA 32,15,"MALIN LODGE, RONALDSWAY CLOSE",HU9 3LH
|
||||
HA 32,17,"MALIN LODGE, RONALDSWAY CLOSE",HU9 3LH
|
||||
HA 32,18,"MALIN LODGE, RONALDSWAY CLOSE",HU9 3LH
|
||||
HA 32,7,BROADWAY DRIVE,HU9 3PA
|
||||
HA 32,9,BROADWAY DRIVE,HU9 3PA
|
||||
HA 32,11,BROADWAY DRIVE,HU9 3PA
|
||||
HA 32,15,Broadway Drive,HU9 3PA
|
||||
HA 32,17,Broadway Drive,HU9 3PA
|
||||
HA 32,19,Broadway Drive,HU9 3PA
|
||||
HA 32,21,Broadway Drive,HU9 3PA
|
||||
HA 32,23,Broadway Drive,HU9 3PA
|
||||
HA 32,25,Broadway Drive,HU9 3PA
|
||||
HA 32,27,Broadway Drive,HU9 3PA
|
||||
HA 32,29,Broadway Drive,HU9 3PA
|
||||
HA 32,31,Broadway Drive,HU9 3PA
|
||||
HA 32,33,Broadway Drive,HU9 3PA
|
||||
HA 32,35,Broadway Drive,HU9 3PA
|
||||
HA 32,37,BROADWAY DRIVE,HU9 3PA
|
||||
HA 32,39,BROADWAY DRIVE,HU9 3PA
|
||||
HA 32,41,Broadway Drive,HU9 3PA
|
||||
HA 32,43,BROADWAY DRIVE,HU9 3PA
|
||||
HA 32,45,BROADWAY DRIVE,HU9 3PA
|
||||
HA 32,47,BROADWAY DRIVE,HU9 3PA
|
||||
HA 32,49,BROADWAY DRIVE,HU9 3PA
|
||||
HA 32,2,Broadway Drive,HU9 3PB
|
||||
HA 32,4,Broadway Drive,HU9 3PB
|
||||
HA 32,6,Broadway Drive,HU9 3PB
|
||||
HA 32,8,Broadway Drive,HU9 3PB
|
||||
HA 32,10,Broadway Drive,HU9 3PB
|
||||
HA 32,12,Broadway Drive,HU9 3PB
|
||||
HA 32,14,Broadway Drive,HU9 3PB
|
||||
HA 32,16,Broadway Drive,HU9 3PB
|
||||
HA 32,18,Broadway Drive,HU9 3PB
|
||||
HA 32,20,Broadway Drive,HU9 3PB
|
||||
HA 32,22,Broadway Drive,HU9 3PB
|
||||
HA 32,26,Broadway Drive,HU9 3PB
|
||||
HA 32,28,Broadway Drive,HU9 3PB
|
||||
HA 32,28,ADA HOLMES CIRCLE,HU9 3PB
|
||||
HA 32,30,Broadway Drive,HU9 3PB
|
||||
HA 32,32,Broadway Drive,HU9 3PB
|
||||
HA 32,34,Broadway Drive,HU9 3PB
|
||||
HA 32,36,Broadway Drive,HU9 3PB
|
||||
HA 32,38,Broadway Drive,HU9 3PB
|
||||
HA 32,40,Broadway Drive,HU9 3PB
|
||||
HA 32,42,Broadway Drive,HU9 3PB
|
||||
HA 32,44,Broadway Drive,HU9 3PB
|
||||
HA 32,46,Broadway Drive,HU9 3PB
|
||||
HA 32,48,Broadway Drive,HU9 3PB
|
||||
HA 32,52,Broadway Drive,HU9 3PB
|
||||
HA 32,56,Broadway Drive,HU9 3PB
|
||||
HA 32,58,Broadway Drive,HU9 3PB
|
||||
HA 32,60,Broadway Drive,HU9 3PB
|
||||
HA 32,55,RUTHERGLEN DRIVE,HU9 3PF
|
||||
HA 32,57,RUTHERGLEN DRIVE,HU9 3PF
|
||||
HA 32,59,RUTHERGLEN DRIVE,HU9 3PF
|
||||
HA 32,1,IMPERIAL COURT,HU9 3PG
|
||||
HA 32,3,IMPERIAL COURT,HU9 3PG
|
||||
HA 32,4,IMPERIAL COURT,HU9 3PG
|
||||
HA 32,5,IMPERIAL COURT,HU9 3PG
|
||||
HA 32,6,IMPERIAL COURT,HU9 3PG
|
||||
HA 32,7,IMPERIAL COURT,HU9 3PG
|
||||
HA 32,8,IMPERIAL COURT,HU9 3PG
|
||||
HA 32,9,IMPERIAL COURT,HU9 3PG
|
||||
HA 32,10,IMPERIAL COURT,HU9 3PG
|
||||
HA 32,10,SCHUBERT CLOSE,HU9 3PL
|
||||
HA 32,27,SCHUBERT CLOSE,HU9 3PL
|
||||
HA 32,28,SCHUBERT CLOSE,HU9 3PL
|
||||
HA 32,32,SCHUBERT CLOSE,HU9 3PL
|
||||
HA 32,1,Broadway Manor,HU9 3PN
|
||||
HA 32,1,Broadway Cottages,HU9 3PN
|
||||
HA 32,2,Broadway Manor,HU9 3PN
|
||||
HA 32,2,Broadway Cottages,HU9 3PN
|
||||
HA 32,3,Broadway Cottages,HU9 3PN
|
||||
HA 32,6,Broadway Manor,HU9 3PN
|
||||
HA 32,8,Broadway Manor,HU9 3PN
|
||||
HA 32,17,Broadway Manor,HU9 3PN
|
||||
HA 32,18,Broadway Manor,HU9 3PN
|
||||
HA 32,19,Broadway Manor,HU9 3PN
|
||||
HA 32,20,Broadway Manor,HU9 3PN
|
||||
HA 32,24,Broadway Manor,HU9 3PN
|
||||
HA 32,31,Broadway Manor,HU9 3PN
|
||||
HA 32,35,Broadway Manor,HU9 3PN
|
||||
HA 32,36,Broadway Manor,HU9 3PN
|
||||
HA 32,12A,Broadway Manor,HU9 3PN
|
||||
HA 32,1,FAROES CLOSE,HU9 4AN
|
||||
HA 32,2,Feroes Close,HU9 4AN
|
||||
HA 32,3,FAROES CLOSE,HU9 4AN
|
||||
HA 32,4,FAROES CLOSE,HU9 4AN
|
||||
HA 32,5,FAROES CLOSE,HU9 4AN
|
||||
HA 32,6,FAROES CLOSE,HU9 4AN
|
||||
HA 32,7,FAROES CLOSE,HU9 4AN
|
||||
HA 32,9,FAROES CLOSE,HU9 4AN
|
||||
HA 32,10,FAROES CLOSE,HU9 4AN
|
||||
HA 32,11,FAROES CLOSE,HU9 4AN
|
||||
HA 32,12,FAROES CLOSE,HU9 4AN
|
||||
HA 32,14,FAROES CLOSE,HU9 4AN
|
||||
HA 32,15,FAROES CLOSE,HU9 4AN
|
||||
HA 32,16,FAROES CLOSE,HU9 4AN
|
||||
HA 32,17,FAROES CLOSE,HU9 4AN
|
||||
HA 32,18,FAROES CLOSE,HU9 4AN
|
||||
HA 32,19,FAROES CLOSE,HU9 4AN
|
||||
HA 32,81,MAYBURY ROAD,HU93LB
|
||||
HA 32,1,ZIEGFELD COURT,HU93PH
|
||||
HA 32,2,ZIEGFELD COURT,HU93PH
|
||||
HA 32,3,ZIEGFELD COURT,HU93PH
|
||||
HA 32,4,ZIEGFELD COURT,HU93PH
|
||||
HA 32,5,ZIEGFELD COURT,HU93PH
|
||||
HA 32,6,ZIEGFELD COURT,HU93PH
|
||||
HA 32,7,ZIEGFELD COURT,HU93PH
|
||||
HA 32,8,ZIEGFELD COURT,HU93PH
|
||||
HA 32,9,ZIEGFELD COURT,HU93PH
|
||||
HA 32,1,GOLDEN COURT,HU93PJ
|
||||
HA 32,2,GOLDEN COURT,HU93PJ
|
||||
HA 32,3,GOLDEN COURT,HU93PJ
|
||||
HA 32,4,GOLDEN COURT,HU93PJ
|
||||
HA 32,5,GOLDEN COURT,HU93PJ
|
||||
HA 32,6,GOLDEN COURT,HU93PJ
|
||||
HA 32,7,GOLDEN COURT,HU93PJ
|
||||
HA 32,8,GOLDEN COURT,HU93PJ
|
||||
HA 32,10,GOLDEN COURT,HU93PJ
|
||||
HA 32,11,GOLDEN COURT,HU93PJ
|
||||
HA 32,12,GOLDEN COURT,HU93PJ
|
||||
HA 32,14,GOLDEN COURT,HU93PJ
|
||||
HA 32,15,GOLDEN COURT,HU93PJ
|
||||
HA 32,16,GOLDEN COURT,HU93PJ
|
||||
HA 32,17,GOLDEN COURT,HU93PJ
|
||||
HA 32,18,GOLDEN COURT,HU93PJ
|
||||
HA 32,19,GOLDEN COURT,HU93PJ
|
||||
HA 32,20,GOLDEN COURT,HU93PJ
|
||||
HA 32,22,GOLDEN COURT,HU93PJ
|
||||
HA 32,23,GOLDEN COURT,HU93PJ
|
||||
HA 32,24,GOLDEN COURT,HU93PJ
|
||||
HA 32,15,ROYALE COURT,HU9 3JZ
|
||||
HA 32,6,SHERWOOD COURT,HU114DF
|
||||
HA 32,979,HESSLE ROAD,HU4 6QG
|
||||
HA 32,985,HESSLE ROAD,HU4 6QG
|
||||
HA 32,2,BUSH CLOSE,HU4 6SP
|
||||
HA 32,11,BUSH CLOSE,HU4 6SP
|
||||
HA 32,16,BUSH CLOSE,HU4 6SP
|
||||
HA 32,52,FORESTER WAY,HU4 6SR
|
||||
HA 32,72,FORESTER WAY,HU4 6SR
|
||||
HA 32,74,FORESTER WAY,HU4 6SR
|
||||
HA 32,3,SUMMERGROVES WAY,HU4 6SZ
|
||||
HA 32,5,WALNUT TREE WAY,HU4 6TG
|
||||
HA 32,6,WALNUT TREE WAY,HU4 6TG
|
||||
HA 32,417,Endike Lane,HU6 8AG
|
||||
HA 32,5,Ashbury Court,HU6 8DA
|
||||
HA 32,9,Ashbury Court,HU6 8DA
|
||||
HA 32,12,Ashbury Court,HU6 8DA
|
||||
HA 32,28,Green Close,HU6 8DA
|
||||
HA 32,34,Green Close,HU6 8DA
|
||||
HA 32,51,Green Close,HU6 8DA
|
||||
HA 32,259,Endike Lane,HU6 8DX
|
||||
HA 32,261,Endike Lane,HU6 8DX
|
||||
HA 32,17,Ashbury Court,HU6 8DY
|
||||
HA 32,20,Ashbury Court,HU6 8DY
|
||||
HA 32,30,Westgarth Avenue,HU6 8LS
|
||||
HA 32,45,Westgarth Avenue,HU6 8LS
|
||||
HA 32,65,Westgarth Avenue,HU6 8LS
|
||||
HA 32,12,BEAUTIMAN COURT,HU6 8LX
|
||||
HA 32,1,THE BROADWAY,HU9 3JH
|
||||
HA 32,12,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,26,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,37,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,38,HEBRIDES CLOSE,HU9 3LF
|
||||
HA 32,24,Broadway Drive,HU9 3PB
|
||||
HA 32,50,Broadway Drive,HU9 3PB
|
||||
HA 32,54,Broadway Drive,HU9 3PB
|
||||
HA 32,2,IMPERIAL COURT,HU9 3PG
|
||||
HA 32,5,SCHUBERT CLOSE,HU9 3PL
|
||||
HA 32,8,SCHUBERT CLOSE,HU9 3PL
|
||||
HA 32,19,SCHUBERT CLOSE,HU9 3PL
|
||||
HA 32,34,SCHUBERT CLOSE,HU9 3PL
|
||||
HA 32,8,FAROES CLOSE,HU9 4AN
|
||||
HA 32,9,GOLDEN COURT,HU93PJ
|
||||
HA 32,21,GOLDEN COURT,HU93PJ
|
||||
|
7667
etl/eligibility/ha_15_32/HA15 - ASSET LIST.csv
Normal file
7667
etl/eligibility/ha_15_32/HA15 - ASSET LIST.csv
Normal file
File diff suppressed because it is too large
Load diff
1419
etl/eligibility/ha_15_32/HA32 - ASSET LIST.csv
Normal file
1419
etl/eligibility/ha_15_32/HA32 - ASSET LIST.csv
Normal file
File diff suppressed because it is too large
Load diff
665
etl/eligibility/ha_15_32/WFT Sales data analysis.py
Normal file
665
etl/eligibility/ha_15_32/WFT Sales data analysis.py
Normal file
|
|
@ -0,0 +1,665 @@
|
|||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
ECO4_NEW_RATES = 1710
|
||||
GBIS_NEW_RATES = 600
|
||||
|
||||
|
||||
def app():
|
||||
# Load in the excel
|
||||
nov_ha_data = pd.read_excel(
|
||||
'etl/eligibility/ha_15_32/ALL HA FIGURES AND ASSIGNED INSTALLERS 21.11.2023 with sales data.xlsx',
|
||||
)
|
||||
# Drop rows where HA name is null
|
||||
nov_ha_data = nov_ha_data.dropna(subset=["HA Name"])
|
||||
nov_ha_data["ha_number"] = nov_ha_data["HA Name"].str.extract(r"(\d+)").astype(int)
|
||||
nov_ha_data = nov_ha_data.sort_values("ha_number", ascending=True)
|
||||
|
||||
variance_explanations = pd.read_excel(
|
||||
'etl/eligibility/ha_15_32/ALL HA FIGURES AND ASSIGNED INSTALLERS 21.11.2023 with sales data.xlsx',
|
||||
sheet_name="Variance explanations"
|
||||
)
|
||||
|
||||
september_figures = pd.read_excel(
|
||||
"etl/eligibility/ha_15_32/ALL HA FIGURES AND ASSIGNED INSTALLERS SEP 23 UPDATE (2).xlsx",
|
||||
sheet_name="HA Stats"
|
||||
)
|
||||
|
||||
historical_invoices = pd.read_excel(
|
||||
"etl/eligibility/ha_15_32/ALL HA FIGURES AND ASSIGNED INSTALLERS 21.11.2023 with sales data.xlsx",
|
||||
sheet_name="Jul 22 to Oct 23"
|
||||
)
|
||||
# Drop rows where installer rates is null
|
||||
historical_invoices = historical_invoices[~pd.isnull(historical_invoices["INSTALLER RATES"])]
|
||||
historical_invoices = historical_invoices[historical_invoices["INSTALLER RATES"] != "NA "]
|
||||
# By Scheme, take a weighted mean of the INSTALLER RATES, weighted on the number of rows
|
||||
n_invoices = historical_invoices.groupby(["Scheme", "INSTALLER RATES"])["Invoice number"].count().reset_index()
|
||||
n_invoices = n_invoices[n_invoices["Scheme"].isin(["Eco 4", "GBIS"])]
|
||||
historical_scheme_rates = n_invoices.groupby("Scheme").apply(
|
||||
lambda x: np.average(x["INSTALLER RATES"], weights=x["Invoice number"])
|
||||
).reset_index().rename(columns={0: "Historical rates"})
|
||||
|
||||
# we take just entries sales data that have sales > 0
|
||||
sales_data = nov_ha_data[nov_ha_data["Sales"] > 0]
|
||||
|
||||
# We now need to adjust sales data depending on the variance explanations
|
||||
sales_data = sales_data.merge(
|
||||
variance_explanations[["HA", 'Which figure is correct']],
|
||||
how="left",
|
||||
left_on="ha_number",
|
||||
right_on="HA"
|
||||
)
|
||||
|
||||
def adjust_sales(row):
|
||||
if pd.isnull(row["Which figure is correct"]):
|
||||
return row["Sales"]
|
||||
|
||||
if row["Which figure is correct"] == "HA facts & figures":
|
||||
return row['No. of Tech surveys complete']
|
||||
|
||||
if row["Which figure is correct"] == "Billed amount":
|
||||
return row["Sales"]
|
||||
|
||||
if row["Which figure is correct"] in ["Both correct, HA facts and figures includes November", "Both correct"]:
|
||||
return row["Sales"]
|
||||
|
||||
raise ValueError(f"Unknown value for 'Which figure is correct': {row['Which figure is correct']}")
|
||||
|
||||
# We now need to adjust sales data depending on the variance explanations
|
||||
sales_data["adjusted_sales"] = sales_data.apply(lambda row: adjust_sales(row), axis=1)
|
||||
|
||||
# We therefore adjust GBIS and ECO4 sales data based on adjusted sales
|
||||
sales_data["adjusted_eco4_sales"] = sales_data["No. of Tech surveys complete - Eco 4"] / sales_data["Sales"] * \
|
||||
sales_data["adjusted_sales"]
|
||||
|
||||
sales_data["adjusted_gbis_sales"] = sales_data["No. of Tech surveys complete - GBIS"] / sales_data["Sales"] * \
|
||||
sales_data["adjusted_sales"]
|
||||
|
||||
sales_data["cancellation_rate"] = (sales_data["Sales"] - sales_data["adjusted_sales"]) / sales_data["Sales"]
|
||||
|
||||
# The difference between the adjusted sales and the actual sales is the cancellation
|
||||
cancellations = (sales_data["adjusted_sales"].sum() - sales_data["Sales"].sum()) / sales_data["Sales"].sum()
|
||||
|
||||
# Given the cancellations, we can now adjust the expected remaining surveys
|
||||
sales_data["No. of Tech surveys remaining"] = sales_data["No. of Tech surveys remaining"] * (
|
||||
1 - sales_data["cancellation_rate"]
|
||||
)
|
||||
|
||||
# We now merge on the expected values for September
|
||||
sales_data = sales_data.merge(
|
||||
september_figures[["Redacted HA", "ECO4", "GBIS"]].rename(
|
||||
columns={"Redacted HA": "HA Name", "ECO4": "Sept Expected ECO4", "GBIS": "Sept Expected GBIS"}
|
||||
),
|
||||
how="left",
|
||||
on="HA Name",
|
||||
)
|
||||
|
||||
sales_data["Sept Expected ECO4"] = sales_data["Sept Expected ECO4"].fillna(0)
|
||||
sales_data["Sept Expected GBIS"] = sales_data["Sept Expected GBIS"].fillna(0)
|
||||
|
||||
# We calculate the ECO4 and GBIS conversion rates with the adjusted numbers
|
||||
sales_data["ECO4 Conversion"] = sales_data["adjusted_eco4_sales"] / sales_data["adjusted_sales"]
|
||||
sales_data["GBIS Conversion"] = sales_data["adjusted_gbis_sales"] / sales_data["adjusted_sales"]
|
||||
|
||||
# We now calculate the expected remaining ECO4 and GBIS sales
|
||||
# We take the number of remaining surveys and multiply by the conversion rate for each scheme, which tells us
|
||||
# how many more we should expect to see
|
||||
sales_data["Expected Remaining ECO4"] = sales_data["No. of Tech surveys remaining"] * sales_data["ECO4 Conversion"]
|
||||
sales_data["Expected Remaining GBIS"] = sales_data["No. of Tech surveys remaining"] * sales_data["GBIS Conversion"]
|
||||
|
||||
# We now produce a forecasted ECO4 and GBIS sales figure
|
||||
sales_data["Forecasted ECO4 Sales"] = sales_data["adjusted_eco4_sales"] + sales_data["Expected Remaining ECO4"]
|
||||
sales_data["Forecasted GBIS Sales"] = sales_data["adjusted_gbis_sales"] + sales_data["Expected Remaining GBIS"]
|
||||
|
||||
# Take the columns we're interestd in
|
||||
# HA # Properties Sept ECO4 Figures Sept GBIS Figures Nov Total Sales Nov ECO4 Sales Nov GBIS Sales
|
||||
# Remaining Surveys ECO4 conversion GBIS conversion Forecasted ECO4 Sales Forecasted GBIS sales ECO4 Change
|
||||
# GBIS Change
|
||||
sales_data_formatted = sales_data[[
|
||||
"HA Name",
|
||||
"ASSET LIST no.",
|
||||
"Sept Expected ECO4",
|
||||
"Sept Expected GBIS",
|
||||
"adjusted_sales",
|
||||
"adjusted_eco4_sales",
|
||||
"adjusted_gbis_sales",
|
||||
"No. of Tech surveys remaining",
|
||||
"ECO4 Conversion",
|
||||
"GBIS Conversion",
|
||||
"Forecasted ECO4 Sales",
|
||||
"Forecasted GBIS Sales"
|
||||
]].rename(
|
||||
columns={
|
||||
"adjusted_sales": "Oct Total Sales (adjusted for variance)",
|
||||
"adjusted_eco4_sales": "Oct ECO4 Sales (adjusted for variance)",
|
||||
"adjusted_gbis_sales": "Oct GBIS Sales (adjusted for variance)",
|
||||
"No. of Tech surveys remaining": "Remaining Surveys",
|
||||
}
|
||||
)
|
||||
|
||||
# Convert columns which should be integers to integers
|
||||
for col in ["ASSET LIST no.", "Remaining Surveys", "Sept Expected ECO4", "Sept Expected GBIS",
|
||||
"Oct Total Sales (adjusted for variance)", "Oct ECO4 Sales (adjusted for variance)",
|
||||
"Oct GBIS Sales (adjusted for variance)", "Forecasted ECO4 Sales", "Forecasted GBIS Sales"]:
|
||||
sales_data_formatted[col] = sales_data_formatted[col].fillna(0)
|
||||
sales_data_formatted[col] = sales_data_formatted[col].astype(int)
|
||||
|
||||
# Remove HA 17 because this was EPCs only. We also remove HA33 because they do not have access to the full portfolio
|
||||
sales_data_formatted = sales_data_formatted[
|
||||
~sales_data_formatted["HA Name"].isin(["HA 17", "HA 33"])
|
||||
]
|
||||
|
||||
# September expected ECO4 and GBIS
|
||||
sept_expected_eco4 = sales_data_formatted["Sept Expected ECO4"].sum()
|
||||
sept_expected_gbis = sales_data_formatted["Sept Expected GBIS"].sum()
|
||||
|
||||
# Completed so far
|
||||
oct_eco4_sales = sales_data_formatted["Oct ECO4 Sales (adjusted for variance)"].sum()
|
||||
oct_gbis_sales = sales_data_formatted["Oct GBIS Sales (adjusted for variance)"].sum()
|
||||
|
||||
# Forecasted figures
|
||||
forecasted_eco4_sales = sales_data_formatted["Forecasted ECO4 Sales"].sum()
|
||||
forecasted_gbis_sales = sales_data_formatted["Forecasted GBIS Sales"].sum()
|
||||
|
||||
# Expected remaining sales
|
||||
expected_remaining_eco4_sales = forecasted_eco4_sales - oct_eco4_sales
|
||||
expected_remaining_gbis_sales = forecasted_gbis_sales - oct_gbis_sales
|
||||
|
||||
# Forecast change vs September
|
||||
forecasted_eco4_change = 100 * (forecasted_eco4_sales - sept_expected_eco4) / sept_expected_eco4
|
||||
forecasted_gbis_change = 100 * (forecasted_gbis_sales - sept_expected_gbis) / sept_expected_gbis
|
||||
|
||||
aggregates = pd.DataFrame(
|
||||
columns=["Scheme", "Sept Expected", "Oct Completed", "Forecasted Remaining Sales", "Forecasted Total Sales",
|
||||
"Forecasted Change vs Sept"],
|
||||
data=[
|
||||
["ECO4", sept_expected_eco4, oct_eco4_sales, expected_remaining_eco4_sales, forecasted_eco4_sales,
|
||||
forecasted_eco4_change],
|
||||
["GBIS", sept_expected_gbis, oct_gbis_sales, expected_remaining_gbis_sales, forecasted_gbis_sales,
|
||||
forecasted_gbis_change],
|
||||
]
|
||||
)
|
||||
|
||||
# Multiply by histoical rates to get revenue
|
||||
# For ECO4, this is ~£1456 and for GBIS it's ~£432
|
||||
historical_gbis_price = historical_scheme_rates[
|
||||
historical_scheme_rates["Scheme"] == "GBIS"
|
||||
]["Historical rates"].iloc[0]
|
||||
|
||||
historical_eco4_price = historical_scheme_rates[
|
||||
historical_scheme_rates["Scheme"] == "Eco 4"
|
||||
]["Historical rates"].iloc[0]
|
||||
|
||||
aggregates["Sept Expected Revenue"] = np.where(
|
||||
aggregates["Scheme"] == "ECO4",
|
||||
aggregates["Sept Expected"] * historical_eco4_price,
|
||||
aggregates["Sept Expected"] * historical_gbis_price
|
||||
)
|
||||
|
||||
aggregates["Completed Revenue"] = np.where(
|
||||
aggregates["Scheme"] == "ECO4",
|
||||
aggregates["Oct Completed"] * historical_eco4_price,
|
||||
aggregates["Oct Completed"] * historical_gbis_price
|
||||
)
|
||||
|
||||
# We use the new rates for the forecasted revenue
|
||||
aggregates["Forecasted Remaining Revenue"] = np.where(
|
||||
aggregates["Scheme"] == "ECO4",
|
||||
aggregates["Forecasted Remaining Sales"] * ECO4_NEW_RATES,
|
||||
aggregates["Forecasted Remaining Sales"] * GBIS_NEW_RATES
|
||||
)
|
||||
|
||||
# We also calculate the forecasted remaining revenue at the original price
|
||||
aggregates["Forecasted Remaining Revenue (original price)"] = np.where(
|
||||
aggregates["Scheme"] == "ECO4",
|
||||
aggregates["Forecasted Remaining Sales"] * historical_eco4_price,
|
||||
aggregates["Forecasted Remaining Sales"] * historical_gbis_price
|
||||
)
|
||||
|
||||
aggregates["Forecasted Revenue"] = aggregates["Completed Revenue"] + aggregates["Forecasted Remaining Revenue"]
|
||||
|
||||
# Forecasted revenue with original price
|
||||
aggregates["Forecasted Revenue (original price)"] = (
|
||||
aggregates["Completed Revenue"] + aggregates["Forecasted Remaining Revenue (original price)"]
|
||||
)
|
||||
|
||||
# Create a totals row which sums up the two rows
|
||||
|
||||
forecasted_change_vs_sept = 100 * (
|
||||
aggregates["Forecasted Total Sales"].sum() - aggregates["Sept Expected"].sum()
|
||||
) / aggregates["Sept Expected"].sum()
|
||||
|
||||
aggregates = pd.concat(
|
||||
[
|
||||
aggregates,
|
||||
pd.DataFrame(
|
||||
[
|
||||
["Total", aggregates["Sept Expected"].sum(), aggregates["Oct Completed"].sum(),
|
||||
aggregates["Forecasted Remaining Sales"].sum(), aggregates["Forecasted Total Sales"].sum(),
|
||||
forecasted_change_vs_sept,
|
||||
aggregates["Sept Expected Revenue"].sum(), aggregates["Completed Revenue"].sum(),
|
||||
aggregates["Forecasted Remaining Revenue"].sum(),
|
||||
aggregates["Forecasted Remaining Revenue (original price)"].sum(),
|
||||
aggregates["Forecasted Revenue"].sum(),
|
||||
aggregates["Forecasted Revenue (original price)"].sum(),
|
||||
]
|
||||
],
|
||||
columns=aggregates.columns
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
# For each property in the asset list, we now calculate an average conversion rate to ECO4 and GBIS
|
||||
# We do this by taking the forecasted sales values for each schemes and dividing by the number of properties
|
||||
|
||||
number_properties = sales_data_formatted["ASSET LIST no."].sum()
|
||||
eco4_conversion_rate = forecasted_eco4_sales / number_properties
|
||||
gbis_conversion_rate = forecasted_gbis_sales / number_properties
|
||||
|
||||
# We also attribute a future value per property
|
||||
future_eco4_value = ECO4_NEW_RATES * eco4_conversion_rate
|
||||
future_gbis_value = GBIS_NEW_RATES * gbis_conversion_rate
|
||||
|
||||
# We also calulate a revenue figure for the old rates
|
||||
historical_eco4_value = historical_eco4_price * eco4_conversion_rate
|
||||
historical_gbis_value = historical_gbis_price * gbis_conversion_rate
|
||||
|
||||
# For the HAs that have not begun selling, we estimate the value of the projects
|
||||
# We start with some problem HAs
|
||||
|
||||
# HA 7, HA 24, HA 25
|
||||
# These HAs have no sales data, so we use the expected figures
|
||||
|
||||
problem_has_data = nov_ha_data[
|
||||
(nov_ha_data["HA Name"].isin(["HA 7", "HA 24", "HA 25"]))
|
||||
].copy()
|
||||
# Merge on the september expected figures
|
||||
problem_has_data = problem_has_data.merge(
|
||||
september_figures[["Redacted HA", "ECO4", "GBIS"]].rename(
|
||||
columns={"Redacted HA": "HA Name", "ECO4": "Sept Expected ECO4", "GBIS": "Sept Expected GBIS"}
|
||||
),
|
||||
how="left",
|
||||
on="HA Name",
|
||||
)
|
||||
# Fill NAs
|
||||
problem_has_data["Sept Expected ECO4"] = problem_has_data["Sept Expected ECO4"].fillna(0)
|
||||
problem_has_data["Sept Expected GBIS"] = problem_has_data["Sept Expected GBIS"].fillna(0)
|
||||
|
||||
# We now calculate the expected ECO4 and GBIS sales based on the average conversion rates
|
||||
problem_has_data["Expected ECO4 Sales"] = problem_has_data["ASSET LIST no."] * eco4_conversion_rate
|
||||
problem_has_data["Expected GBIS Sales"] = problem_has_data["ASSET LIST no."] * gbis_conversion_rate
|
||||
|
||||
# Filter just on columns we're interested in
|
||||
problem_has_data = problem_has_data[[
|
||||
"HA Name",
|
||||
"ASSET LIST no.",
|
||||
"Sept Expected ECO4",
|
||||
"Sept Expected GBIS",
|
||||
"ECO4",
|
||||
"GBIS",
|
||||
"Expected ECO4 Sales",
|
||||
"Expected GBIS Sales"
|
||||
]].rename(
|
||||
columns={
|
||||
"ECO4": "Nov Expected ECO4",
|
||||
"GBIS": "Nov Expected GBIS",
|
||||
}
|
||||
)
|
||||
|
||||
# Fill NAs
|
||||
problem_has_data["Nov Expected ECO4"] = problem_has_data["Nov Expected ECO4"].fillna(0)
|
||||
problem_has_data["Nov Expected GBIS"] = problem_has_data["Nov Expected GBIS"].fillna(0)
|
||||
|
||||
# We calculate HA level Sept, Nov expected revenue, based on historical rates and then forecasted revenue
|
||||
problem_has_data["Sept Expected ECO4 Value"] = problem_has_data["Sept Expected ECO4"] * historical_eco4_price
|
||||
problem_has_data["Sept Expected GBIS Value"] = problem_has_data["Sept Expected GBIS"] * historical_gbis_price
|
||||
|
||||
problem_has_data["Nov Expected ECO4 Value"] = problem_has_data["Nov Expected ECO4"] * historical_eco4_price
|
||||
problem_has_data["Nov Expected GBIS Value"] = problem_has_data["Nov Expected GBIS"] * historical_gbis_price
|
||||
|
||||
problem_has_data["Forecasted ECO4 Revenue"] = problem_has_data["ASSET LIST no."] * future_eco4_value
|
||||
problem_has_data["Forecasted GBIS Revenue"] = problem_has_data["ASSET LIST no."] * future_gbis_value
|
||||
|
||||
# Totals
|
||||
problem_has_data["Sept Expected Total Value"] = problem_has_data["Sept Expected ECO4 Value"] + \
|
||||
problem_has_data["Sept Expected GBIS Value"]
|
||||
problem_has_data["Nov Expected Total Value"] = problem_has_data["Nov Expected ECO4 Value"] + \
|
||||
problem_has_data["Nov Expected GBIS Value"]
|
||||
problem_has_data["Forecasted Total Revenue"] = problem_has_data["Forecasted ECO4 Revenue"] + \
|
||||
problem_has_data["Forecasted GBIS Revenue"]
|
||||
|
||||
# We calculate a total expected value for September, November and then forecasted
|
||||
problem_has_expected_eco4_value = problem_has_data["Sept Expected ECO4"].sum() * historical_eco4_price
|
||||
problem_has_expected_gbis_value = problem_has_data["Sept Expected GBIS"].sum() * historical_gbis_price
|
||||
problem_has_expected_total_value = problem_has_expected_eco4_value + problem_has_expected_gbis_value
|
||||
|
||||
problem_has_nov_eco4_value = problem_has_data["Nov Expected ECO4"].sum() * historical_eco4_price
|
||||
problem_has_nov_gbis_value = problem_has_data["Nov Expected GBIS"].sum() * historical_gbis_price
|
||||
problem_has_nov_total_value = problem_has_nov_eco4_value + problem_has_nov_gbis_value
|
||||
|
||||
forecasted_eco4_value = problem_has_data["ASSET LIST no."].sum() * future_eco4_value
|
||||
forecasted_gbis_value = problem_has_data["ASSET LIST no."].sum() * future_gbis_value
|
||||
problem_has_forecasted_total_value = forecasted_eco4_value + forecasted_gbis_value
|
||||
|
||||
problem_has_summary = pd.DataFrame(
|
||||
columns=["Scheme", "Sept Expected", "Nov Expected", "Forecasted"],
|
||||
data=[
|
||||
["ECO4", problem_has_expected_eco4_value, problem_has_nov_eco4_value, forecasted_eco4_value],
|
||||
["GBIS", problem_has_expected_gbis_value, problem_has_nov_gbis_value, forecasted_gbis_value],
|
||||
["Total", problem_has_expected_total_value, problem_has_nov_total_value, problem_has_forecasted_total_value]
|
||||
]
|
||||
)
|
||||
|
||||
# We now also estimate the value of the remaining HAs based on historical sales performance and new rates
|
||||
# We take the has that are not in the sales data
|
||||
remaining_has = nov_ha_data[
|
||||
~nov_ha_data["HA Name"].isin(sales_data_formatted["HA Name"])
|
||||
].copy()
|
||||
|
||||
# Merge on the september expected figures
|
||||
remaining_has = remaining_has.merge(
|
||||
september_figures[["Redacted HA", "ECO4", "GBIS"]].rename(
|
||||
columns={"Redacted HA": "HA Name", "ECO4": "Sept Expected ECO4", "GBIS": "Sept Expected GBIS"}
|
||||
),
|
||||
how="left",
|
||||
on="HA Name",
|
||||
)
|
||||
|
||||
# We update the asset list size for HA 33, because they do not have access to the full portfolio
|
||||
remaining_has.loc[remaining_has["HA Name"] == "HA 33", "ASSET LIST no."] = 20699
|
||||
# We also remove HA 17
|
||||
remaining_has = remaining_has[~remaining_has["HA Name"].isin(["HA 17"])]
|
||||
|
||||
# We now calculate the expected ECO4 and GBIS sales based on the average conversion rates
|
||||
remaining_has["Expected ECO4 Sales"] = remaining_has["ASSET LIST no."] * eco4_conversion_rate
|
||||
remaining_has["Expected GBIS Sales"] = remaining_has["ASSET LIST no."] * gbis_conversion_rate
|
||||
|
||||
# Filter just on columns we're interested in
|
||||
remaining_has = remaining_has[[
|
||||
"HA Name",
|
||||
"ASSET LIST no.",
|
||||
"Sept Expected ECO4",
|
||||
"Sept Expected GBIS",
|
||||
"ECO4",
|
||||
"GBIS",
|
||||
]].rename(
|
||||
columns={
|
||||
"ECO4": "Nov Expected ECO4",
|
||||
"GBIS": "Nov Expected GBIS",
|
||||
}
|
||||
)
|
||||
|
||||
remaining_has = remaining_has.fillna(0)
|
||||
|
||||
# We take just HAs that had an initial september expectation for ECO4 or GBIS, or that now have a Nov expectation
|
||||
remaining_has = remaining_has[
|
||||
(remaining_has["Sept Expected ECO4"] > 0) | (remaining_has["Sept Expected GBIS"] > 0) |
|
||||
(remaining_has["Nov Expected ECO4"] > 0) | (remaining_has["Nov Expected GBIS"] > 0)
|
||||
]
|
||||
|
||||
# Expected sales based on asset list size and conversion rate
|
||||
remaining_has["Forecasted Sales ECO4"] = remaining_has["ASSET LIST no."] * eco4_conversion_rate
|
||||
remaining_has["Forecasted Sales GBIS"] = remaining_has["ASSET LIST no."] * gbis_conversion_rate
|
||||
|
||||
# Calculat the total expected value for September and November
|
||||
remaining_has["Sept Expected ECO4 Value"] = remaining_has["Sept Expected ECO4"] * historical_eco4_price
|
||||
remaining_has["Sept Expected GBIS Value"] = remaining_has["Sept Expected GBIS"] * historical_gbis_price
|
||||
|
||||
remaining_has["Nov Expected ECO4 Value"] = remaining_has["Nov Expected ECO4"] * historical_eco4_price
|
||||
remaining_has["Nov Expected GBIS Value"] = remaining_has["Nov Expected GBIS"] * historical_gbis_price
|
||||
|
||||
# Calculate forecasted revenue
|
||||
remaining_has["Forecasted ECO4 Revenue"] = remaining_has["ASSET LIST no."] * future_eco4_value
|
||||
remaining_has["Forecasted GBIS Revenue"] = remaining_has["ASSET LIST no."] * future_gbis_value
|
||||
|
||||
# We also calculate forecasted revenue with the original price
|
||||
remaining_has["Forecasted ECO4 Revenue (original price)"] = remaining_has["ASSET LIST no."] * historical_eco4_value
|
||||
remaining_has["Forecasted GBIS Revenue (original price)"] = remaining_has["ASSET LIST no."] * historical_gbis_value
|
||||
|
||||
# Calculate totals for each scheme
|
||||
remaining_has_september_eco4_sales = remaining_has["Sept Expected ECO4"].sum()
|
||||
remaining_has_september_gbis_sales = remaining_has["Sept Expected GBIS"].sum()
|
||||
|
||||
remaining_has_november_eco4_sales = remaining_has["Nov Expected ECO4"].sum()
|
||||
remaining_has_november_gbis_sales = remaining_has["Nov Expected GBIS"].sum()
|
||||
|
||||
remaining_has_forecasted_eco4_sales = remaining_has["Forecasted Sales ECO4"].sum()
|
||||
remaining_has_forecasted_gbis_sales = remaining_has["Forecasted Sales GBIS"].sum()
|
||||
|
||||
remaining_has_september_eco4_value = remaining_has["Sept Expected ECO4 Value"].sum()
|
||||
remaining_has_september_gbis_value = remaining_has["Sept Expected GBIS Value"].sum()
|
||||
|
||||
remaining_has_november_eco4_value = remaining_has["Nov Expected ECO4 Value"].sum()
|
||||
remaining_has_november_gbis_value = remaining_has["Nov Expected GBIS Value"].sum()
|
||||
|
||||
remaining_has_forecasted_eco4_value = remaining_has["Forecasted ECO4 Revenue"].sum()
|
||||
remaining_has_forecasted_gbis_value = remaining_has["Forecasted GBIS Revenue"].sum()
|
||||
|
||||
remaining_has_forecasted_eco4_value_original_price = remaining_has["Forecasted ECO4 Revenue (original price)"].sum()
|
||||
remaining_has_forecasted_gbis_value_original_price = remaining_has["Forecasted GBIS Revenue (original price)"].sum()
|
||||
|
||||
# Calculate the change in forecasted sales against the September expected sales
|
||||
remaining_has_foecast_change_eco4 = 100 * (
|
||||
remaining_has["Forecasted Sales ECO4"].sum() - remaining_has["Sept Expected ECO4"].sum()
|
||||
) / remaining_has["Sept Expected ECO4"].sum()
|
||||
|
||||
remaining_has_foecast_change_gbis = 100 * (
|
||||
remaining_has["Forecasted Sales GBIS"].sum() - remaining_has["Sept Expected GBIS"].sum()
|
||||
) / remaining_has["Sept Expected GBIS"].sum()
|
||||
|
||||
# Total change
|
||||
remaining_has_foecast_change_total = 100 * (
|
||||
remaining_has["Forecasted Sales ECO4"].sum() + remaining_has["Forecasted Sales GBIS"].sum() -
|
||||
remaining_has["Sept Expected ECO4"].sum() - remaining_has["Sept Expected GBIS"].sum()
|
||||
) / (remaining_has["Sept Expected ECO4"].sum() + remaining_has["Sept Expected GBIS"].sum())
|
||||
|
||||
asset_list_size = remaining_has["ASSET LIST no."].sum()
|
||||
|
||||
# Create a summary table of the rest with the totals for ECO4, GBIS and then a total row
|
||||
remaining_has_aggregate = pd.DataFrame(
|
||||
columns=["Scheme", "Asset List Size", "Sept Expected Sales", "Nov Expected Sales", "Forecasted Sales",
|
||||
"Forecasted Change vs Sept",
|
||||
"Sept Expected Value", "Nov Expected Value", "Forecasted Value", "Forecasted Value (original price)"],
|
||||
data=[
|
||||
[
|
||||
"ECO4", asset_list_size, remaining_has_september_eco4_sales, remaining_has_november_eco4_sales,
|
||||
remaining_has_forecasted_eco4_sales, remaining_has_foecast_change_eco4,
|
||||
remaining_has_september_eco4_value,
|
||||
remaining_has_november_eco4_value, remaining_has_forecasted_eco4_value,
|
||||
remaining_has_forecasted_eco4_value_original_price
|
||||
],
|
||||
[
|
||||
"GBIS", asset_list_size, remaining_has_september_gbis_sales, remaining_has_november_gbis_sales,
|
||||
remaining_has_forecasted_gbis_sales, remaining_has_foecast_change_gbis,
|
||||
remaining_has_september_gbis_value,
|
||||
remaining_has_november_gbis_value, remaining_has_forecasted_gbis_value,
|
||||
remaining_has_forecasted_gbis_value_original_price
|
||||
],
|
||||
[
|
||||
"Total", asset_list_size, remaining_has_september_eco4_sales + remaining_has_september_gbis_sales,
|
||||
remaining_has_november_eco4_sales + remaining_has_november_gbis_sales,
|
||||
remaining_has_forecasted_eco4_sales + remaining_has_forecasted_gbis_sales,
|
||||
remaining_has_foecast_change_total,
|
||||
remaining_has_september_eco4_value + remaining_has_september_gbis_value,
|
||||
remaining_has_november_eco4_value + remaining_has_november_gbis_value,
|
||||
remaining_has_forecasted_eco4_value + remaining_has_forecasted_gbis_value,
|
||||
remaining_has_forecasted_eco4_value_original_price +
|
||||
remaining_has_forecasted_gbis_value_original_price
|
||||
]
|
||||
]
|
||||
)
|
||||
|
||||
# Calculate pipeline value
|
||||
pipeline_value = aggregates[["Scheme", "Completed Revenue", "Forecasted Remaining Revenue"]].merge(
|
||||
remaining_has_aggregate[["Scheme", "Forecasted Value"]].rename(
|
||||
columns={"Forecasted Value": "Forecasted Revenue, Unconfirmed HAs"}
|
||||
), how="inner", on="Scheme"
|
||||
)
|
||||
|
||||
# Calculate the total
|
||||
pipeline_value["Total Value"] = (
|
||||
pipeline_value["Completed Revenue"] + pipeline_value["Forecasted Remaining Revenue"] + pipeline_value[
|
||||
"Forecasted Revenue, Unconfirmed HAs"]
|
||||
)
|
||||
|
||||
# TODO: Insert model figures
|
||||
model_results = pd.DataFrame(
|
||||
[
|
||||
{
|
||||
# This one, we don't have sales data
|
||||
"HA Name": "HA 15",
|
||||
"Model Expected Additional ECO4 (unit level)": None,
|
||||
"Model Expected Total ECO4 (unit level)": 296,
|
||||
"Model Expected Additional GBIS (unit level)": None,
|
||||
"Model Expected Total GBIS (unit level)": 209,
|
||||
},
|
||||
{
|
||||
"HA Name": "HA 16",
|
||||
# Old before re-run
|
||||
# "Model Expected Additional ECO4 (unit level)": 418,
|
||||
# "Model Expected Total ECO4 (unit level)": 1820,
|
||||
# "Model Expected Additional GBIS (unit level)": 576,
|
||||
# "Model Expected Total GBIS (unit level)": 612,
|
||||
|
||||
# IN the partial sales data, WFT have completed 1407 ECO4, 36 GBIS
|
||||
"Model Expected Additional ECO4 (unit level)": 411 + 342 + 235,
|
||||
"Model Expected Total ECO4 (unit level)": 1407 + 411 + 342 + 235,
|
||||
"Model Expected Additional GBIS (unit level)": 223,
|
||||
"Model Expected Total GBIS (unit level)": 36 + 223,
|
||||
},
|
||||
{
|
||||
"HA Name": "HA 24",
|
||||
"Model Expected Additional ECO4 (unit level)": 224,
|
||||
"Model Expected Total ECO4 (unit level)": 848,
|
||||
"Model Expected Additional GBIS (unit level)": 552,
|
||||
"Model Expected Total GBIS (unit level)": 552,
|
||||
},
|
||||
{
|
||||
"HA Name": "HA 25",
|
||||
"Model Expected Additional ECO4 (unit level)": None,
|
||||
"Model Expected Total ECO4 (unit level)": 1709 + 59,
|
||||
"Model Expected Additional GBIS (unit level)": None,
|
||||
"Model Expected Total GBIS (unit level)": 2004 + 107,
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
sales_data_formatted["Remaining ECO4 Sales"] = (
|
||||
sales_data_formatted["Forecasted ECO4 Sales"] - sales_data_formatted["Oct ECO4 Sales (adjusted for variance)"]
|
||||
)
|
||||
|
||||
sales_data_formatted["Remaining GBIS Sales"] = (
|
||||
sales_data_formatted["Forecasted GBIS Sales"] - sales_data_formatted["Oct GBIS Sales (adjusted for variance)"]
|
||||
)
|
||||
|
||||
sales_data_formatted["Completed ECO4 Revenue"] = (sales_data_formatted[
|
||||
"Oct ECO4 Sales (adjusted for variance)"] *
|
||||
historical_eco4_price)
|
||||
sales_data_formatted["Completed GBIS Revenue"] = (sales_data_formatted[
|
||||
"Oct GBIS Sales (adjusted for variance)"] *
|
||||
historical_gbis_price)
|
||||
|
||||
ha_subset_with_sales = ["HA 15", "HA 16", "HA 24"]
|
||||
|
||||
has_subset_with_sales_value = sales_data_formatted[
|
||||
sales_data_formatted["HA Name"].isin(ha_subset_with_sales)
|
||||
].copy()[
|
||||
[
|
||||
"HA Name",
|
||||
"Oct ECO4 Sales (adjusted for variance)",
|
||||
"Oct GBIS Sales (adjusted for variance)",
|
||||
"Remaining ECO4 Sales",
|
||||
"Remaining GBIS Sales",
|
||||
"Forecasted ECO4 Sales",
|
||||
"Forecasted GBIS Sales",
|
||||
"Completed ECO4 Revenue",
|
||||
"Completed GBIS Revenue"
|
||||
]
|
||||
]
|
||||
|
||||
has_subset_with_sales_value["Remaining ECO4 Revenue"] = has_subset_with_sales_value[
|
||||
"Remaining ECO4 Sales"] * ECO4_NEW_RATES
|
||||
has_subset_with_sales_value["Remaining GBIS Revenue"] = has_subset_with_sales_value[
|
||||
"Remaining GBIS Sales"] * GBIS_NEW_RATES
|
||||
|
||||
has_subset_with_sales_value["Remaining Total Revenue"] = (
|
||||
has_subset_with_sales_value["Remaining ECO4 Revenue"] + has_subset_with_sales_value["Remaining GBIS Revenue"]
|
||||
)
|
||||
|
||||
model_results["Model Expected Additional ECO4 Revenue"] = (
|
||||
model_results["Model Expected Additional ECO4 (unit level)"] * ECO4_NEW_RATES
|
||||
)
|
||||
|
||||
model_results["Model Expected Additional GBIS revenue"] = (
|
||||
model_results["Model Expected Additional GBIS (unit level)"] * GBIS_NEW_RATES
|
||||
)
|
||||
|
||||
model_results["Model Expected Additional Total Revenue"] = (
|
||||
model_results["Model Expected Additional ECO4 Revenue"] + model_results[
|
||||
"Model Expected Additional GBIS revenue"]
|
||||
)
|
||||
|
||||
# Show more columns with pandas
|
||||
pd.set_option('display.max_rows', 500)
|
||||
pd.set_option('display.max_columns', 500)
|
||||
pd.set_option('display.width', 1000)
|
||||
|
||||
# Look at HA 16
|
||||
ha16_model = model_results[model_results["HA Name"] == "HA 16"]
|
||||
has_subset_with_sales_value[has_subset_with_sales_value["HA Name"] == "HA 16"]
|
||||
|
||||
# WFT: For HA 16: 4,598,190 ECO4, 57,000 GBIS
|
||||
# Model:
|
||||
|
||||
# Look at HA 24
|
||||
ha24_model = model_results[model_results["HA Name"] == "HA 24"]
|
||||
has_subset_with_sales_value[has_subset_with_sales_value["HA Name"] == "HA 24"]
|
||||
|
||||
# Look at HA 15
|
||||
ha15_data = has_subset_with_sales_value[has_subset_with_sales_value["HA Name"] == "HA 15"]
|
||||
ha15_portfolio_value = ha15_data["Completed ECO4 Revenue"] + ha15_data[
|
||||
"Completed GBIS Revenue"] + ha15_data["Remaining Total Revenue"]
|
||||
# # This doesn't have sales data so in the model analysis, we just value the ha as a whole
|
||||
ha15_model = model_results[model_results["HA Name"] == "HA 15"]
|
||||
ha15_value = ha15_model["Model Expected Total ECO4 (unit level)"].iloc[0] * ECO4_NEW_RATES + \
|
||||
ha15_model["Model Expected Total GBIS (unit level)"].iloc[0] * GBIS_NEW_RATES
|
||||
|
||||
model_results["Expected ECO4 Revenue"] = model_results["Model Expected Total ECO4 (unit level)"] * ECO4_NEW_RATES
|
||||
model_results["Expected GBIS Revenue"] = model_results["Model Expected Total GBIS (unit level)"] * GBIS_NEW_RATES
|
||||
model_results["Expected Total Revenue"] = model_results["Expected ECO4 Revenue"] + model_results[
|
||||
"Expected GBIS Revenue"]
|
||||
model_results[model_results["HA Name"].isin(["HA 15"])]
|
||||
|
||||
# We now create a final excel with all of the data
|
||||
# We want:
|
||||
# 1) aggregates
|
||||
# 2) sales_data_formatted
|
||||
# 3) remaining_has_aggregate
|
||||
# 4) remaining_has
|
||||
# 5) problem_has_summary
|
||||
|
||||
# Function to get the maximum column width
|
||||
def get_col_widths(dataframe):
|
||||
# First we find the maximum length of the index column
|
||||
idx_max = max([len(str(s)) for s in dataframe.index.values] + [len(str(dataframe.index.name))])
|
||||
# Then, we concatenate this to the max of the lengths of column name and its max value for each column, row-wise
|
||||
return [idx_max] + [max(dataframe[col].astype(str).map(len).max(), len(col)) for col in dataframe.columns]
|
||||
|
||||
# Create a Pandas Excel writer using XlsxWriter as the engine
|
||||
with pd.ExcelWriter('HA Pipeline Analysis.xlsx', engine='xlsxwriter') as writer:
|
||||
# Write each dataframe to a different worksheet without the index
|
||||
for df, sheet in [(aggregates, 'Forecasted Sales'),
|
||||
(sales_data_formatted, 'Sales Data'),
|
||||
(remaining_has_aggregate, 'Remaining HAs Value'),
|
||||
(remaining_has, 'Remaining HAs data'),
|
||||
(pipeline_value, 'Pipeline Value'),
|
||||
(problem_has_summary, 'Problem HAs Analysis'),
|
||||
(problem_has_data, 'Problem HAs Data')
|
||||
|
||||
]:
|
||||
|
||||
df.to_excel(writer, sheet_name=sheet, index=False)
|
||||
|
||||
# Auto-adjust columns' width
|
||||
for i, width in enumerate(get_col_widths(df)):
|
||||
writer.sheets[sheet].set_column(i, i, width)
|
||||
0
etl/eligibility/ha_15_32/__init__.py
Normal file
0
etl/eligibility/ha_15_32/__init__.py
Normal file
1146
etl/eligibility/ha_15_32/app.py
Normal file
1146
etl/eligibility/ha_15_32/app.py
Normal file
File diff suppressed because it is too large
Load diff
113
etl/eligibility/ha_15_32/cancellation.py
Normal file
113
etl/eligibility/ha_15_32/cancellation.py
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
import openpyxl
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
|
||||
def get_excel_survey_list(workbook_path, worksheet_name=None):
|
||||
survey_workbook = openpyxl.load_workbook(workbook_path)
|
||||
if worksheet_name is not None:
|
||||
survey_sheet = survey_workbook[worksheet_name]
|
||||
else:
|
||||
survey_sheet = survey_workbook.active
|
||||
|
||||
survey_rows = []
|
||||
survey_colors = []
|
||||
|
||||
for row in survey_sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
|
||||
row_data = [cell.value for cell in row] # This will get you the cell values
|
||||
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
|
||||
# row_color = COLOR_INDEX[row_color]
|
||||
survey_rows.append(row_data)
|
||||
survey_colors.append(row_color)
|
||||
|
||||
survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
|
||||
survey_list["row_colour"] = survey_colors
|
||||
|
||||
return survey_list
|
||||
|
||||
|
||||
def load_data():
|
||||
# Load for HA 16 - ECO 4
|
||||
ha16_survey_list = get_excel_survey_list('etl/eligibility/ha_15_32/HESTIA- HA 16 ECO4 SURVEY LIST.xlsx')
|
||||
|
||||
# Load for HA 24 - ECO 4
|
||||
ha24_survey_list = get_excel_survey_list('etl/eligibility/ha_15_32/HESTIA - HA 24 ECO4 SURVEY LIST.xlsx')
|
||||
|
||||
# Load for HA 25 - ECO 3
|
||||
ha25_survey_list = get_excel_survey_list(
|
||||
'etl/eligibility/ha_15_32/HESTIA - HA 25 ECO3 SURVEY LIST.xlsx', worksheet_name="CAVITY"
|
||||
)
|
||||
|
||||
# Remove columns with None column names
|
||||
ha25_survey_list = ha25_survey_list.dropna(axis=1, how='all')
|
||||
|
||||
# Standardised this installation status columns
|
||||
ha16_survey_list["survey_status"] = ha16_survey_list["INSTALLED OR CANCELLED"].copy()
|
||||
ha16_survey_list["survey_status"] = ha16_survey_list["survey_status"].replace(
|
||||
{
|
||||
"NO UPDATE - CHECKED 2.10.23": "no update",
|
||||
"NO UPDATE - CHECKED 18.12.23": "no update",
|
||||
"INSTALLED": "installed",
|
||||
"CANCELLED": "cancelled",
|
||||
"LOFT STILL TO BE INSTALLED": "loft remaining",
|
||||
}
|
||||
)
|
||||
|
||||
ha24_survey_list["survey_status"] = ha24_survey_list["INSTALLED OR CANCELLED"].copy()
|
||||
ha24_survey_list["survey_status"] = ha24_survey_list["survey_status"].replace(
|
||||
{
|
||||
"NO UPDATE - CHECKED 21.11.23": "no update",
|
||||
"NO UPDATE - CHECKED 18.12.23": "no update",
|
||||
"INSTALLED": "installed",
|
||||
"CANCELLED": "cancelled",
|
||||
"LOFT STILL TO BE INSTALLED": "loft remaining",
|
||||
"SEE NOTES >>": "see notes",
|
||||
}
|
||||
)
|
||||
|
||||
# We need to prepare HA25 differently
|
||||
ha25_survey_list["survey_status"] = np.where(
|
||||
ha25_survey_list["row_colour"] == "FF7030A0", "installed",
|
||||
np.where(ha25_survey_list["row_colour"] == "FF92D050", "installed",
|
||||
np.where(ha25_survey_list["row_colour"] == "FFFF0000", "cancelled",
|
||||
np.where(ha25_survey_list["row_colour"] == "FFFFFF00", "filler row - drop",
|
||||
np.where(ha25_survey_list["row_colour"] == "FF38FD23", "installed", "unknown")
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
ha25_survey_list = ha25_survey_list[ha25_survey_list["survey_status"] != "filler row - drop"]
|
||||
|
||||
# We standardise the cancellation reasons - just create a new column
|
||||
ha16_survey_list["cancellation_reason"] = ha16_survey_list["INSTALLERS NOTES ; REASONS FOR CANCELLATIONS"].copy()
|
||||
ha24_survey_list["cancellation_reason"] = ha24_survey_list["INSTALLERS NOTES ; REASONS FOR CANCELLATIONS"].copy()
|
||||
# There's no cancellation reason for HA25
|
||||
ha25_survey_list["cancellation_reason"] = "No reason provided"
|
||||
|
||||
# Combine the dataframes
|
||||
ha16_survey_list["HA"] = "HA 16"
|
||||
ha24_survey_list["HA"] = "HA 24"
|
||||
ha25_survey_list["HA"] = "HA 25"
|
||||
|
||||
cancellation_data = pd.concat(
|
||||
[
|
||||
ha16_survey_list[["HA", "survey_status", "cancellation_reason"]],
|
||||
ha24_survey_list[["HA", "survey_status", "cancellation_reason"]],
|
||||
ha25_survey_list[["HA", "survey_status", "cancellation_reason"]]
|
||||
]
|
||||
)
|
||||
|
||||
# Take just rows that we have a confirmed status for
|
||||
cancellation_data = cancellation_data[~cancellation_data["survey_status"].isin(["no update", "loft remaining"])]
|
||||
|
||||
return cancellation_data
|
||||
|
||||
|
||||
def app():
|
||||
"""
|
||||
This application is used to analyse the cancellation data provided by warmfront
|
||||
:return:
|
||||
"""
|
||||
|
||||
# This is cancellations of jobs that completed invasive surveys and the installer could not conclude the work
|
||||
sales_cancellation_data = load_data()
|
||||
647
etl/eligibility/ha_15_32/ha16_app.py
Normal file
647
etl/eligibility/ha_15_32/ha16_app.py
Normal file
|
|
@ -0,0 +1,647 @@
|
|||
import os
|
||||
import msgpack
|
||||
import openpyxl
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from utils.s3 import read_from_s3
|
||||
from utils.logger import setup_logger
|
||||
from dotenv import load_dotenv
|
||||
from utils.s3 import read_dataframe_from_s3_parquet
|
||||
from tqdm import tqdm
|
||||
from backend.SearchEpc import SearchEpc
|
||||
from etl.eligibility.Eligibility import Eligibility
|
||||
from etl.eligibility.ha_15_32.app import prepare_model_data_row
|
||||
from etl.epc.DataProcessor import DataProcessor
|
||||
from etl.epc.settings import COLUMNS_TO_MERGE_ON
|
||||
from backend.ml_models.api import ModelApi
|
||||
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
|
||||
from recommendations.recommendation_utils import calculate_cavity_age
|
||||
from recommendation_utils import convert_thickness_to_numeric
|
||||
|
||||
import re
|
||||
|
||||
ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
|
||||
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
||||
|
||||
logger = setup_logger()
|
||||
load_dotenv(ENV_FILE)
|
||||
|
||||
|
||||
def load_data():
|
||||
# This asset list is spread across two sheets, which we need to combine
|
||||
|
||||
asset_list_filenames = [
|
||||
"HESTIA - HA 16 ASSET LIST PART 1 OF 2.xlsx",
|
||||
"HESTIA - HA 16 ASSET LIST PART 2 OF 2.xlsx",
|
||||
]
|
||||
|
||||
# Prepare lists to collect rows data and their colors
|
||||
rows_data = []
|
||||
rows_colors = []
|
||||
colnames = []
|
||||
for asset_list_filename in asset_list_filenames:
|
||||
workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/{asset_list_filename}')
|
||||
sheet = workbook.active
|
||||
sheet_colnames = [cell.value for cell in sheet[1]]
|
||||
colnames.append(sheet_colnames)
|
||||
|
||||
for row in sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
|
||||
row_data = [cell.value for cell in row] # This will get you the cell values
|
||||
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
|
||||
# row_color = COLOR_INDEX[row_color]
|
||||
rows_data.append(row_data)
|
||||
rows_colors.append(row_color)
|
||||
|
||||
asset_list = pd.DataFrame(rows_data, columns=colnames[0])
|
||||
# Remove None columns
|
||||
asset_list = asset_list.iloc[:, 0:12]
|
||||
asset_list['row_color'] = rows_colors
|
||||
|
||||
asset_list["row_colour_name"] = np.where(
|
||||
asset_list["row_color"] == "FFFF0000", "red",
|
||||
np.where(asset_list["row_color"] == "FF92D050", "green", "yellow")
|
||||
)
|
||||
|
||||
# Split up the address on commas, which is useful for matching later
|
||||
split_addresses = asset_list['Address'].str.split(',', expand=True)
|
||||
split_addresses.columns = ['temp', 'address2', 'address3', 'address4', 'address5']
|
||||
|
||||
asset_list = pd.concat([asset_list, split_addresses], axis=1)
|
||||
# There is no commas separating house number and address 1
|
||||
split_addresses2 = asset_list['temp'].str.split(' ', expand=True)
|
||||
split_addresses2.columns = ['HouseNo', 'part1', 'part2', "part3", "part4"]
|
||||
# We could re-concatenate but we only care about HouseNo for the moment
|
||||
asset_list = pd.concat([asset_list, split_addresses2[["HouseNo"]]], axis=1)
|
||||
|
||||
# We now read in the survey list
|
||||
survey_workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/HESTIA- HA 16 ECO4 SURVEY LIST.xlsx')
|
||||
survey_sheet = survey_workbook.active
|
||||
|
||||
survey_rows = []
|
||||
survey_colors = []
|
||||
|
||||
for row in survey_sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
|
||||
row_data = [cell.value for cell in row] # This will get you the cell values
|
||||
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
|
||||
# row_color = COLOR_INDEX[row_color]
|
||||
survey_rows.append(row_data)
|
||||
survey_colors.append(row_color)
|
||||
|
||||
survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
|
||||
|
||||
# For the survey list, we don't need the colours, since there is a column called "INSTALLED OR CANCELLED"
|
||||
# which describes the status of the property
|
||||
survey_list["row_colour"] = survey_colors
|
||||
survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(survey_list))]
|
||||
# Tidy up the street/block name a bit
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("/", ", ")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.lower()
|
||||
survey_list["Street / Block Name"] = np.where(
|
||||
survey_list["Street / Block Name"] == "REEDS RD",
|
||||
"Reeds ROAD",
|
||||
survey_list["Street / Block Name"]
|
||||
)
|
||||
# Replace " rd " with "road"
|
||||
survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(r'\brd\b', 'road', regex=True)
|
||||
|
||||
# Replace " , " with ", "
|
||||
survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(
|
||||
" , ", ', ',
|
||||
)
|
||||
# Fix "{place} ,{place}" with "{place}, {place}"
|
||||
survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(r'\s*,\s*', ', ', regex=True)
|
||||
# Strip whitespace
|
||||
survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.strip()
|
||||
|
||||
# Correct errors
|
||||
survey_list["Post Code"] = np.where(
|
||||
survey_list["Post Code"] == "M38 0SA",
|
||||
"M38 9SA",
|
||||
survey_list["Post Code"]
|
||||
)
|
||||
|
||||
survey_list["Post Code"] = np.where(
|
||||
(survey_list["Street / Block Name"] == "nelson drive") & (survey_list["Post Code"] == "M44 5JE"),
|
||||
"M44 5JF",
|
||||
survey_list["Post Code"]
|
||||
)
|
||||
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("eccels", "eccles")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("chatley, road", "chatley road")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("vaughen", "Vaughan")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("cresent", "crescent")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("plantation road",
|
||||
"plantation avenue")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("how clough drive",
|
||||
"howclough drive")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brockhurst lane",
|
||||
"brookhurst lane")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("biirch road",
|
||||
"birch road")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hadson road",
|
||||
"hodson road")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("harbonne avennue",
|
||||
"narbonne avenue")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("cumberland road, cadishead",
|
||||
"cumberland avenue, cadishead")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("aston field drive",
|
||||
"ashton field drive")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("wedgewood road",
|
||||
"wedgwood road")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hamilton close",
|
||||
"hamilton avenue")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("lichens crescent, fitton hill",
|
||||
"lichens crescent")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("south croft, fitton hill",
|
||||
"south croft")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(", fitton hill", "")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("firtree dr", "fir tree avenue")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hawthorne road",
|
||||
"hawthorn crescent")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("rein lee avenue",
|
||||
"reins lee avenue")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("westerhill road",
|
||||
"wester hill road")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("st martins road",
|
||||
"saint martins road")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("timperley avenue",
|
||||
"timperley close")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("eastwood road",
|
||||
"eastwood avenue")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("new road", "new street")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("grassmere road",
|
||||
"grasmere road")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hulton road",
|
||||
"hulton avenue")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("beechfield avenue",
|
||||
"beechfield road")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("princess avenue",
|
||||
"princes avenue")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("edge ford crecent",
|
||||
"edge fold crescent")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("conniston avenue",
|
||||
"coniston avenue")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("blackthorne crescent",
|
||||
"blackthorn crescent")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("wellstock road",
|
||||
"wellstock lane")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brackley avenue",
|
||||
"brackley street")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brook avenue swinton",
|
||||
"brook avenue, swinton")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("green avenue swinton",
|
||||
"green avenue, swinton")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("grasmere avenue wardley",
|
||||
"grasmere avenue, wardley")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("mardale avenue wardle",
|
||||
"mardale avenue, wardle")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("carleach grove",
|
||||
"cartleach Grove")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("arbour grove",
|
||||
"arbor Grove")
|
||||
|
||||
# Replacement for clively avenue 66-68
|
||||
survey_list["NO."] = np.where(
|
||||
survey_list["NO."] == "66-68",
|
||||
"66",
|
||||
survey_list["NO."]
|
||||
)
|
||||
|
||||
# asset_list[asset_list["Address"].str.lower().str.contains("clively")]
|
||||
|
||||
# We now need to merge the survey list onto the asset list
|
||||
# Could be easier just to do a search on each row, even though it's much slower
|
||||
matched = []
|
||||
for _, row in tqdm(survey_list.iterrows(), total=len(survey_list)):
|
||||
|
||||
house_number = row["NO."]
|
||||
if isinstance(house_number, str):
|
||||
house_number = house_number.lower()
|
||||
|
||||
# Filter on the first line of the address
|
||||
df = asset_list[asset_list["Address"].str.lower().str.contains(row["Street / Block Name"].lower())].copy()
|
||||
# df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
|
||||
df = df[df["Address"].str.lower().str.contains(str(house_number))]
|
||||
if df.shape[0] != 1:
|
||||
df = df[df["HouseNo"] == str(house_number)]
|
||||
if df.shape[0] != 1:
|
||||
df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
|
||||
if df.shape[0] != 1:
|
||||
raise ValueError("Investigate")
|
||||
|
||||
matched.append(
|
||||
{
|
||||
"survey_key": row["survey_key"],
|
||||
"matched_address": df["Address"].values[0],
|
||||
"survey_house_no": row["NO."],
|
||||
"survey_street_name": row["Street / Block Name"],
|
||||
"survey_postcode": row["Post Code"],
|
||||
"survey_status": row["INSTALLED OR CANCELLED"]
|
||||
}
|
||||
)
|
||||
|
||||
matched = pd.DataFrame(matched)
|
||||
matched["warmfront_identified"] = True
|
||||
|
||||
# Combine asset list and surveys
|
||||
data = asset_list.merge(
|
||||
matched, how="left", left_on="Address", right_on="matched_address",
|
||||
)
|
||||
data["warmfront_identified"] = data["warmfront_identified"].fillna(False)
|
||||
|
||||
return data, survey_list
|
||||
|
||||
|
||||
def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
|
||||
scoring_data = []
|
||||
results = []
|
||||
nodata = []
|
||||
|
||||
property_type_lookup = {
|
||||
'Semi Detached Bungalow': {"property-type": "Bungalow", "built-form": "Semi-Detached"},
|
||||
'Mid Terraced House': {"property-type": "House", "built-form": "Mid-Terrace"},
|
||||
'End Terraced House': {"property-type": "House", "built-form": "End-Terrace"},
|
||||
'Low Rise Flat': {"property-type": "Flat", "built-form": "Mid-Terrace"},
|
||||
'Semi-Detached House': {"property-type": "House", "built-form": "Semi-Detached"},
|
||||
'Detached Bungalow': {"property-type": "Bungalow", "built-form": "Detached"},
|
||||
'End Terraced Bungalow': {"property-type": "Bungalow", "built-form": "End-Terrace"},
|
||||
'Mid Terraced Bungalow': {"property-type": "Bungalow", "built-form": "Mid-Terrace"},
|
||||
'Medium Rise Flat': {"property-type": "Flat", "built-form": "Mid-Terrace"},
|
||||
'Detached House': {"property-type": "House", "built-form": "Detached"},
|
||||
'Cottage Flat': {"property-type": "Flat", "built-form": "Semi-Detached"},
|
||||
'Maisonette Medium Rise': {"property-type": "Flat", "built-form": "Mid-Terrace"},
|
||||
'Maisonette Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"},
|
||||
'End Terraced Town House': {"property-type": "House", "built-form": "End-Terrace"},
|
||||
'Flat Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"},
|
||||
'Mid Terraced Town House': {"property-type": "House", "built-form": "Mid-Terrace"},
|
||||
}
|
||||
|
||||
for index, property_meta in tqdm(data.iterrows(), total=len(data)):
|
||||
|
||||
searcher = SearchEpc(
|
||||
address1=property_meta["HouseNo"],
|
||||
postcode=property_meta["Postcode"],
|
||||
auth_token=EPC_AUTH_TOKEN,
|
||||
os_api_key=None,
|
||||
full_address=property_meta["Address"]
|
||||
)
|
||||
searcher.ordnance_survey_client.property_type = property_type_lookup[property_meta["Type"]]["property-type"]
|
||||
searcher.ordnance_survey_client.built_form = property_type_lookup[property_meta["Type"]]["built-form"]
|
||||
searcher.find_property(skip_os=True)
|
||||
|
||||
if searcher.newest_epc is None:
|
||||
nodata.append(property_meta)
|
||||
continue
|
||||
|
||||
if searcher.newest_epc.get("estimated"):
|
||||
# We insert the row ID as our proxy for UPRN
|
||||
proxy_uprn = int(property_meta["row_id"].split("_")[1])
|
||||
searcher.newest_epc["uprn"] = proxy_uprn
|
||||
|
||||
newest_epc = searcher.newest_epc
|
||||
older_epcs = searcher.older_epcs
|
||||
full_sap_epc = searcher.full_sap_epc
|
||||
# We also want to get the penultimate epc
|
||||
penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
|
||||
if not penultimate_epc:
|
||||
penultimate_epc = newest_epc
|
||||
|
||||
eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
|
||||
eligibility.check_gbis_warmfront()
|
||||
eligibility.check_eco4_warmfront()
|
||||
|
||||
if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
|
||||
eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
|
||||
eligibility.check_gbis_warmfront()
|
||||
eligibility.check_eco4_warmfront()
|
||||
# If this is the case, we need to update the older epcs
|
||||
# We don't update just to make data cleaning easier
|
||||
if penultimate_epc.get("estimated") is None:
|
||||
older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
|
||||
|
||||
# If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity
|
||||
|
||||
# Loft MUST be suitable
|
||||
cavity_age = None
|
||||
if (
|
||||
eligibility.walls["is_cavity_wall"] and
|
||||
eligibility.walls["is_filled_cavity"] and
|
||||
eligibility.loft["suitability"] and
|
||||
eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
|
||||
):
|
||||
# We check the age of the cavity and if it's particularly old, we flag it
|
||||
cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
|
||||
|
||||
# Full checks
|
||||
eligibility.check_gbis()
|
||||
eligibility.check_eco4()
|
||||
|
||||
if eligibility.eco4_warmfront["eligible"]:
|
||||
if eligibility.epc["uprn"] == "":
|
||||
eligibility.epc["uprn"] = int(property_meta["row_id"].split("_")[1])
|
||||
|
||||
scoring_dictionary = prepare_model_data_row(
|
||||
property_id=property_meta["row_id"],
|
||||
modelling_epc=eligibility.epc,
|
||||
cleaned=cleaned,
|
||||
cleaning_data=cleaning_data,
|
||||
created_at=created_at,
|
||||
old_data=older_epcs,
|
||||
full_sap_epc=full_sap_epc,
|
||||
photo_supply_lookup=photo_supply_lookup,
|
||||
floor_area_decile_thresholds=floor_area_decile_thresholds
|
||||
)
|
||||
scoring_data.extend(scoring_dictionary)
|
||||
|
||||
results.append(
|
||||
{
|
||||
"row_id": property_meta["row_id"],
|
||||
"uprn": eligibility.epc["uprn"],
|
||||
"Address": property_meta["Address"],
|
||||
"Postcode": property_meta["Postcode"],
|
||||
"property_type": eligibility.epc["property-type"],
|
||||
"gbis_eligible": eligibility.gbis_warmfront,
|
||||
"eco4_eligible": eligibility.eco4_warmfront["eligible"],
|
||||
"eco4_message": eligibility.eco4_warmfront["message"],
|
||||
"sap": float(eligibility.epc["current-energy-efficiency"]),
|
||||
"gbis_eligible_future": eligibility.gbis["eligible"],
|
||||
"gbis_eligible_future_message": eligibility.gbis["message"],
|
||||
"eco4_eligible_future": eligibility.eco4["eligible"],
|
||||
"eco4_eligible_future_message": eligibility.eco4["message"],
|
||||
# Property components
|
||||
"roof": eligibility.roof["clean_description"],
|
||||
"walls": eligibility.walls["clean_description"],
|
||||
"cavity_type": eligibility.cavity["type"],
|
||||
"heating": eligibility.epc["mainheat-description"],
|
||||
"tenure": eligibility.tenure,
|
||||
"date_epc": eligibility.epc["lodgement-date"],
|
||||
"loft_thickness": eligibility.roof["insulation_thickness"],
|
||||
"cavity_age": cavity_age,
|
||||
**eligibility.walls,
|
||||
**eligibility.roof,
|
||||
}
|
||||
)
|
||||
|
||||
scoring_df = pd.DataFrame(scoring_data)
|
||||
|
||||
# Perform the same cleaning as in the model - first clean number of room variables though
|
||||
scoring_df = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=scoring_df,
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
|
||||
colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
|
||||
)
|
||||
|
||||
scoring_df = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=scoring_df,
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
|
||||
).drop(columns=["LOCAL_AUTHORITY"])
|
||||
|
||||
scoring_df = DataProcessor.clean_missings_after_description_process(
|
||||
scoring_df,
|
||||
ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
|
||||
"insulation_thickness" in c) or ("ENERGY_EFF" in c)]
|
||||
)
|
||||
|
||||
scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
|
||||
scoring_df["UPRN"] = scoring_df["UPRN"].astype(int)
|
||||
|
||||
model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
|
||||
all_predictions = model_api.predict_all(
|
||||
df=scoring_df,
|
||||
bucket="retrofit-data-dev",
|
||||
prediction_buckets={
|
||||
"sap_change_predictions": "retrofit-sap-predictions-dev",
|
||||
"heat_demand_predictions": "retrofit-heat-predictions-dev",
|
||||
"carbon_change_predictions": "retrofit-carbon-predictions-dev"
|
||||
}
|
||||
)
|
||||
|
||||
predictions = all_predictions["sap_change_predictions"].copy()
|
||||
|
||||
results_df = pd.DataFrame(results)
|
||||
|
||||
predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
|
||||
results_df[["row_id", "sap"]], how="left", on="row_id"
|
||||
)
|
||||
predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
|
||||
predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
|
||||
|
||||
results_df = results_df.merge(
|
||||
predictions[["sap_uplift", "row_id"]],
|
||||
how="left",
|
||||
on="row_id"
|
||||
)
|
||||
results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
|
||||
|
||||
eligibility_assessment = []
|
||||
for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
|
||||
# The upgrade requirements are dependent on the current SAP
|
||||
|
||||
# If the property is an F or G, it only needs to upgrade to an %
|
||||
if row["sap"] <= 38:
|
||||
if row["post_install_sap"] >= 57:
|
||||
eligibility_classification = "highest confidence"
|
||||
elif row["post_install_sap"] >= 55:
|
||||
eligibility_classification = "high confidence"
|
||||
elif row["post_install_sap"] >= 53:
|
||||
eligibility_classification = "medium confidence"
|
||||
else:
|
||||
eligibility_classification = "unlikely"
|
||||
else:
|
||||
|
||||
if row["post_install_sap"] >= 71:
|
||||
eligibility_classification = "highest confidence"
|
||||
elif row["post_install_sap"] >= 69:
|
||||
eligibility_classification = "high confidence"
|
||||
elif row["post_install_sap"] >= 67:
|
||||
eligibility_classification = "medium confidence"
|
||||
else:
|
||||
eligibility_classification = "unlikely"
|
||||
|
||||
eligibility_assessment.append(
|
||||
{
|
||||
"row_id": row["row_id"],
|
||||
"eligibility_classification": eligibility_classification
|
||||
}
|
||||
)
|
||||
|
||||
eligibility_assessment = pd.DataFrame(eligibility_assessment)
|
||||
|
||||
results_df = results_df.merge(
|
||||
eligibility_assessment, how="left", on="row_id"
|
||||
)
|
||||
return results_df, scoring_data, nodata
|
||||
|
||||
|
||||
def analyse_results(results_df, data, survey_list):
|
||||
analysis_data = data[["row_id", "survey_key", "warmfront_identified", "row_colour_name"]].merge(
|
||||
results_df, how="left", on="row_id"
|
||||
).merge(
|
||||
survey_list[["survey_key", survey_list.columns[0]]].rename(columns={survey_list.columns[0]: "funding_scheme"}),
|
||||
how="left", on="survey_key"
|
||||
)
|
||||
|
||||
analysis_data["roof_insulation_thickness"] = np.where(
|
||||
pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
|
||||
)
|
||||
analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
|
||||
lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
|
||||
)
|
||||
|
||||
warmfront_sold_eco4 = analysis_data[
|
||||
(analysis_data["warmfront_identified"] == True) & (
|
||||
analysis_data["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"]))
|
||||
] # 1407
|
||||
|
||||
warmfront_sold_gbis = analysis_data[
|
||||
(analysis_data["warmfront_identified"] == True) & (
|
||||
analysis_data["funding_scheme"].isin(["ECO4 GBIS (ECO+)"]))
|
||||
]
|
||||
|
||||
ideal_eco4_warmfront_not_sold = analysis_data[
|
||||
(analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
|
||||
analysis_data["roof_insulation_thickness_numeric"] <= 100)
|
||||
]
|
||||
|
||||
secondary_eco4_warmfront_not_sold = analysis_data[
|
||||
(analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
|
||||
analysis_data["roof_insulation_thickness_numeric"] > 100)
|
||||
]
|
||||
|
||||
# underperforming cavities
|
||||
underperforming_cavities = analysis_data[
|
||||
(analysis_data["eco4_message"] == "Failed due to full cavity - check cavity age") & (
|
||||
analysis_data["cavity_age"] > 10 * 365
|
||||
) & (analysis_data["roof_insulation_thickness_numeric"] <= 100)
|
||||
]
|
||||
|
||||
identified_gbis_not_sold = analysis_data[
|
||||
(analysis_data["gbis_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
|
||||
analysis_data["eco4_eligible"] == False
|
||||
)
|
||||
]
|
||||
|
||||
eco_eligible = analysis_data[analysis_data["eco4_eligible"] == True]
|
||||
eco_ineligible = analysis_data[analysis_data["eco4_eligible"] == False]
|
||||
|
||||
eco_ineligible["eco4_message"].value_counts()
|
||||
|
||||
# SAP too high:
|
||||
sap_too_high = eco_ineligible[eco_ineligible["eco4_message"] == "sap too high"].copy()
|
||||
further_possibilities = sap_too_high[
|
||||
sap_too_high["walls"].isin(
|
||||
[
|
||||
"Cavity wall, as built, insulated",
|
||||
"Cavity wall, as built, no insulation",
|
||||
"Cavity wall, as built, partial insulation",
|
||||
"Cavity wall, no insulation",
|
||||
"Cavity wall, partial insulation"
|
||||
]
|
||||
)
|
||||
]
|
||||
|
||||
filled_cavities = eco_ineligible[
|
||||
eco_ineligible["eco4_message"] == "sap too high"
|
||||
]
|
||||
|
||||
warmfront_identified = analysis_data[analysis_data["warmfront_identified"]]
|
||||
warmfront_identified["walls"].value_counts()
|
||||
|
||||
all_identified_gbis = analysis_data[
|
||||
(analysis_data["warmfront_identified"] & analysis_data["funding_scheme"].isin(
|
||||
["ECO4 GBIS (ECO+)"])) |
|
||||
(analysis_data["gbis_eligible"] & analysis_data["eco4_eligible"].isin([False, None]))
|
||||
]
|
||||
|
||||
empty_cavity_desriptions = [
|
||||
"Cavity wall, as built, no insulation", "Cavity wall, as built, partial insulation",
|
||||
"Cavity wall, no insulation", "Cavity wall, partial insulation"
|
||||
]
|
||||
|
||||
empty_cavities = analysis_data[analysis_data["walls"].isin(empty_cavity_desriptions)]
|
||||
remaining_empty = empty_cavities[~empty_cavities["warmfront_identified"]]
|
||||
|
||||
warmfront_identified = analysis_data[analysis_data["warmfront_identified"]]
|
||||
|
||||
# Of the ECO jobs, what proportion to we get right
|
||||
warmfront_identified_eco = warmfront_identified[
|
||||
warmfront_identified["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"])
|
||||
]
|
||||
|
||||
eco_success_rate = warmfront_identified_eco["eco4_eligible"].sum() / warmfront_identified_eco.shape[0]
|
||||
|
||||
warmfront_identified_gbis = warmfront_identified[
|
||||
warmfront_identified["funding_scheme"].isin(["ECO4 GBIS (ECO+)"])
|
||||
]
|
||||
|
||||
gbis_success_rate = warmfront_identified_gbis["gbis_eligible"].sum() / warmfront_identified_gbis.shape[0]
|
||||
|
||||
# Additional identified
|
||||
additional_identified_eco = analysis_data[
|
||||
(analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False)
|
||||
]
|
||||
|
||||
additional_identified_eco["eligibility_classification"].value_counts()
|
||||
|
||||
additional_identified_gbis = analysis_data[
|
||||
(analysis_data["gbis_eligible"] == True) & (analysis_data["eco4_eligible"] == False) & (
|
||||
analysis_data["warmfront_identified"] == False
|
||||
)
|
||||
].shape[0]
|
||||
# Future
|
||||
additional_identified_eco_future = analysis_data[
|
||||
(analysis_data["eco4_eligible_future"] == True) & (analysis_data["warmfront_identified"] == False)
|
||||
].shape[0]
|
||||
additional_identified_gbis_future = analysis_data[
|
||||
(analysis_data["gbis_eligible_future"] == True) & (analysis_data["eco4_eligible_future"] == False) & (
|
||||
analysis_data["warmfront_identified"] == False
|
||||
)
|
||||
].shape[0]
|
||||
|
||||
|
||||
def app():
|
||||
data, survey_list = load_data()
|
||||
|
||||
data["row_id"] = ["ha16_" + str(i) for i in range(0, len(data))]
|
||||
|
||||
cleaned = read_from_s3(
|
||||
s3_file_name="cleaned_epc_data/cleaned.bson",
|
||||
bucket_name="retrofit-data-dev"
|
||||
)
|
||||
cleaned = msgpack.unpackb(cleaned, raw=False)
|
||||
|
||||
cleaning_data = read_dataframe_from_s3_parquet(
|
||||
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
|
||||
)
|
||||
|
||||
created_at = datetime.now().isoformat()
|
||||
|
||||
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
|
||||
|
||||
results_df, scoring_data, nodata = get_epc_data(
|
||||
data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds
|
||||
)
|
||||
|
||||
# Store
|
||||
# Old file was ha16.pickle
|
||||
# import pickle
|
||||
# with open("ha16_10_jan.pickle", "wb") as f:
|
||||
# pickle.dump(
|
||||
# {
|
||||
# "scoring_data": scoring_data,
|
||||
# "results": results_df,
|
||||
# "nodata": nodata
|
||||
# }, f
|
||||
# )
|
||||
|
||||
# Read pickle
|
||||
# import pickle
|
||||
# with open("ha16_10_jan.pickle", "rb") as f:
|
||||
# saved = pickle.load(f)
|
||||
# scoring_data = saved["scoring_data"]
|
||||
# results_df = saved["results"]
|
||||
# nodata = saved["nodata"]
|
||||
524
etl/eligibility/ha_15_32/ha24_app.py
Normal file
524
etl/eligibility/ha_15_32/ha24_app.py
Normal file
|
|
@ -0,0 +1,524 @@
|
|||
import os
|
||||
import msgpack
|
||||
import openpyxl
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from utils.s3 import read_from_s3, read_dataframe_from_s3_parquet
|
||||
from utils.logger import setup_logger
|
||||
from dotenv import load_dotenv
|
||||
from tqdm import tqdm
|
||||
from backend.SearchEpc import SearchEpc
|
||||
from etl.eligibility.Eligibility import Eligibility
|
||||
from etl.eligibility.ha_15_32.app import prepare_model_data_row
|
||||
from etl.epc.DataProcessor import DataProcessor
|
||||
from etl.epc.settings import COLUMNS_TO_MERGE_ON
|
||||
from backend.ml_models.api import ModelApi
|
||||
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
|
||||
from recommendations.recommendation_utils import calculate_cavity_age
|
||||
from recommendation_utils import convert_thickness_to_numeric
|
||||
|
||||
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
||||
ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
|
||||
|
||||
logger = setup_logger()
|
||||
load_dotenv(ENV_FILE)
|
||||
|
||||
|
||||
def load_data():
|
||||
workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/HESTIA - HA 24 ASSET LIST.xlsx')
|
||||
sheet = workbook.active
|
||||
sheet_colnames = [cell.value for cell in sheet[1]]
|
||||
|
||||
rows_data = []
|
||||
rows_colors = []
|
||||
for row in sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
|
||||
row_data = [cell.value for cell in row] # This will get you the cell values
|
||||
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
|
||||
# row_color = COLOR_INDEX[row_color]
|
||||
rows_data.append(row_data)
|
||||
rows_colors.append(row_color)
|
||||
|
||||
asset_list = pd.DataFrame(rows_data, columns=sheet_colnames)
|
||||
# Remove None columns
|
||||
asset_list = asset_list.iloc[:, 0:10]
|
||||
asset_list['row_color'] = rows_colors
|
||||
|
||||
asset_list["row_colour_name"] = np.where(
|
||||
asset_list["row_color"] == "FFFF0000", "red",
|
||||
np.where(asset_list["row_color"] == "FF92D050", "green", "yellow")
|
||||
)
|
||||
|
||||
asset_list["row_colour_code"] = np.where(
|
||||
asset_list["row_colour_name"] == "red", "does not meet criteria",
|
||||
np.where(asset_list["row_colour_name"] == "green", "identified potential eco", "maybe in the future")
|
||||
)
|
||||
|
||||
# The third column is listed as "Address" but it's actually the postcode". We have two Address columns so we
|
||||
# change just the third
|
||||
asset_list.columns.values[2] = "Postcode"
|
||||
|
||||
# Split up the address on commas, which is useful for matching later
|
||||
split_addresses = asset_list['Address'].str.split(',', expand=True)
|
||||
split_addresses.columns = ['temp', 'address2', 'address3', 'address4', 'address5', 'address6']
|
||||
|
||||
asset_list = pd.concat([asset_list, split_addresses], axis=1)
|
||||
# There is no commas separating house number and address 1
|
||||
split_addresses2 = asset_list['temp'].str.split(' ', expand=True)
|
||||
split_addresses2.columns = ['HouseNo', 'part1', 'part2', "part3", "part4"]
|
||||
# We could re-concatenate but we only care about HouseNo for the moment
|
||||
asset_list = pd.concat([asset_list, split_addresses2[["HouseNo"]]], axis=1)
|
||||
|
||||
# Read in surveys
|
||||
survey_workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/HESTIA - HA 24 ECO4 SURVEY LIST.xlsx')
|
||||
survey_sheet = survey_workbook.active
|
||||
|
||||
survey_rows = []
|
||||
survey_colors = []
|
||||
|
||||
for row in survey_sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
|
||||
row_data = [cell.value for cell in row] # This will get you the cell values
|
||||
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
|
||||
# row_color = COLOR_INDEX[row_color]
|
||||
survey_rows.append(row_data)
|
||||
survey_colors.append(row_color)
|
||||
|
||||
survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
|
||||
|
||||
survey_list["row_colour"] = survey_colors
|
||||
survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(survey_list))]
|
||||
# Tidy up the street/block name a bit
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("/", ", ")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.lower()
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.strip()
|
||||
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
|
||||
"council house, nidds lane", "nidds lane"
|
||||
)
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
|
||||
"wirral avenue", "wirrall avenue"
|
||||
)
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
|
||||
"st ives road", "st. ives crescent"
|
||||
)
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
|
||||
"sundringham road", "sandringham road"
|
||||
)
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
|
||||
"milton avenue", "milton road"
|
||||
)
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
|
||||
"st ives crescent", "st. ives crescent"
|
||||
)
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
|
||||
"council house, waterbelly lane", "waterbelly lane"
|
||||
)
|
||||
# Generally remove "councile house, " from the start of the street name
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
|
||||
"council house, ", ""
|
||||
)
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
|
||||
"st. leodegars close", "st leodegars close"
|
||||
)
|
||||
|
||||
# asset_list[asset_list["Address"].str.lower().str.contains("wirral")]["Address"]
|
||||
|
||||
# Drop all None rows
|
||||
survey_list = survey_list[~pd.isnull(survey_list["Street / Block Name"])]
|
||||
survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(survey_list))]
|
||||
|
||||
matched = []
|
||||
for _, row in tqdm(survey_list.iterrows(), total=len(survey_list)):
|
||||
house_number = row["NO."]
|
||||
if isinstance(house_number, str):
|
||||
house_number = house_number.lower()
|
||||
|
||||
# Filter on the first line of the address
|
||||
df = asset_list[asset_list["Address"].str.lower().str.contains(row["Street / Block Name"].lower())].copy()
|
||||
# df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
|
||||
df = df[df["Address"].str.lower().str.contains(str(house_number))]
|
||||
if df.shape[0] != 1:
|
||||
df = df[df["HouseNo"] == str(house_number)]
|
||||
if df.shape[0] != 1:
|
||||
df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
|
||||
if df.shape[0] != 1:
|
||||
print(row["Street / Block Name"])
|
||||
print(house_number)
|
||||
print(row["Post Code"].lower())
|
||||
raise ValueError("Investigate")
|
||||
|
||||
matched.append(
|
||||
{
|
||||
"survey_key": row["survey_key"],
|
||||
"matched_address": df["Address"].values[0],
|
||||
"survey_house_no": row["NO."],
|
||||
"survey_street_name": row["Street / Block Name"],
|
||||
"survey_postcode": row["Post Code"],
|
||||
"survey_status": row["INSTALLED OR CANCELLED"]
|
||||
}
|
||||
)
|
||||
|
||||
matched = pd.DataFrame(matched)
|
||||
matched["warmfront_identified"] = True
|
||||
|
||||
# Combine asset list and surveys
|
||||
data = asset_list.merge(
|
||||
matched, how="left", left_on="Address", right_on="matched_address",
|
||||
)
|
||||
data["warmfront_identified"] = data["warmfront_identified"].fillna(False)
|
||||
|
||||
return data, survey_list
|
||||
|
||||
|
||||
def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
|
||||
scoring_data = []
|
||||
results = []
|
||||
nodata = []
|
||||
|
||||
property_type_lookup = {
|
||||
"01 HOUSE": "House",
|
||||
"02 FLAT": "Flat",
|
||||
"03 BUNGALOW": "Bungalow",
|
||||
"05 BEDSIT": "Flat",
|
||||
"04 MAISONETTE": "Maisonette",
|
||||
"01 HOUSE MID": "House",
|
||||
"10 PBUNGALOW": "Bungalow",
|
||||
"14 SFLAT": "Flat",
|
||||
"12 SBEDSIT": "Flat",
|
||||
"11 PFLAT": "Flat",
|
||||
"13 SBUNGALOW": "Bungalow",
|
||||
" 01 HOUSE MID": "House",
|
||||
"09 PBEDSIT": "Flat"
|
||||
}
|
||||
|
||||
for _, property_meta in tqdm(data.iterrows(), total=len(data)):
|
||||
|
||||
searcher = SearchEpc(
|
||||
address1=property_meta["HouseNo"],
|
||||
postcode=property_meta["Postcode"],
|
||||
auth_token=EPC_AUTH_TOKEN,
|
||||
os_api_key=None,
|
||||
full_address=property_meta["Address"]
|
||||
)
|
||||
searcher.ordnance_survey_client.property_type = property_type_lookup[property_meta["Property Type"]]
|
||||
searcher.find_property(skip_os=True)
|
||||
|
||||
if searcher.newest_epc is None:
|
||||
nodata.append(property_meta)
|
||||
continue
|
||||
|
||||
newest_epc = searcher.newest_epc
|
||||
older_epcs = searcher.older_epcs
|
||||
full_sap_epc = searcher.full_sap_epc
|
||||
# We also want to get the penultimate epc
|
||||
penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
|
||||
if not penultimate_epc:
|
||||
penultimate_epc = newest_epc
|
||||
|
||||
eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
|
||||
eligibility.check_gbis_warmfront()
|
||||
eligibility.check_eco4_warmfront()
|
||||
|
||||
if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
|
||||
eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
|
||||
eligibility.check_gbis_warmfront()
|
||||
eligibility.check_eco4_warmfront()
|
||||
# If this is the case, we need to update the older epcs
|
||||
# older_epcs = [
|
||||
# x for x in older_epcs if x["lmk-key"] not in [newest_epc["lmk-key"], penultimate_epc["lmk-key"]]
|
||||
# ]
|
||||
# If this is the case, we need to update the older epcs
|
||||
# We don't update just to make data cleaning easier
|
||||
if penultimate_epc.get("estimated") is None:
|
||||
older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
|
||||
|
||||
# Loft MUST be suitable
|
||||
cavity_age = None
|
||||
if (
|
||||
eligibility.walls["is_cavity_wall"] and
|
||||
eligibility.walls["is_filled_cavity"] and
|
||||
eligibility.loft["suitability"] and
|
||||
eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
|
||||
):
|
||||
# We check the age of the cavity and if it's particularly old, we flag it
|
||||
cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
|
||||
|
||||
# Full checks
|
||||
eligibility.check_gbis()
|
||||
eligibility.check_eco4()
|
||||
|
||||
if eligibility.eco4_warmfront["eligible"]:
|
||||
if eligibility.epc["uprn"] in ["", None]:
|
||||
eligibility.epc["uprn"] = int(property_meta["row_id"].split("_")[1])
|
||||
|
||||
scoring_dictionary = prepare_model_data_row(
|
||||
property_id=property_meta["row_id"],
|
||||
modelling_epc=eligibility.epc,
|
||||
cleaned=cleaned,
|
||||
cleaning_data=cleaning_data,
|
||||
created_at=created_at,
|
||||
old_data=older_epcs,
|
||||
full_sap_epc=full_sap_epc,
|
||||
photo_supply_lookup=photo_supply_lookup,
|
||||
floor_area_decile_thresholds=floor_area_decile_thresholds
|
||||
)
|
||||
scoring_data.extend(scoring_dictionary)
|
||||
|
||||
results.append(
|
||||
{
|
||||
"row_id": property_meta["row_id"],
|
||||
"uprn": eligibility.epc["uprn"],
|
||||
"Address": property_meta["Address"],
|
||||
"Postcode": property_meta["Postcode"],
|
||||
"property_type": eligibility.epc["property-type"],
|
||||
"gbis_eligible": eligibility.gbis_warmfront,
|
||||
"eco4_eligible": eligibility.eco4_warmfront["eligible"],
|
||||
"eco4_message": eligibility.eco4_warmfront["message"],
|
||||
"sap": float(eligibility.epc["current-energy-efficiency"]),
|
||||
"gbis_eligible_future": eligibility.gbis["eligible"],
|
||||
"gbis_eligible_future_message": eligibility.gbis["message"],
|
||||
"eco4_eligible_future": eligibility.eco4["eligible"],
|
||||
"eco4_eligible_future_message": eligibility.eco4["message"],
|
||||
# Property components
|
||||
"roof": eligibility.roof["clean_description"],
|
||||
"walls": eligibility.walls["clean_description"],
|
||||
"cavity_type": eligibility.cavity["type"],
|
||||
"heating": eligibility.epc["mainheat-description"],
|
||||
"tenure": eligibility.tenure,
|
||||
"date_epc": eligibility.epc["lodgement-date"],
|
||||
"cavity_age": cavity_age,
|
||||
**eligibility.walls,
|
||||
**eligibility.roof,
|
||||
}
|
||||
)
|
||||
|
||||
scoring_df = pd.DataFrame(scoring_data)
|
||||
|
||||
# Perform the same cleaning as in the model - first clean number of room variables though
|
||||
scoring_df = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=scoring_df,
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
|
||||
colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
|
||||
)
|
||||
|
||||
scoring_df = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=scoring_df,
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
|
||||
).drop(columns=["LOCAL_AUTHORITY"])
|
||||
|
||||
scoring_df = DataProcessor.clean_missings_after_description_process(
|
||||
scoring_df,
|
||||
ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
|
||||
"insulation_thickness" in c) or ("ENERGY_EFF" in c)]
|
||||
)
|
||||
|
||||
scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
|
||||
scoring_df["UPRN"] = scoring_df["UPRN"].astype(int)
|
||||
|
||||
model_api = ModelApi(portfolio_id="ha24-eligibility", timestamp=created_at)
|
||||
all_predictions = model_api.predict_all(
|
||||
df=scoring_df,
|
||||
bucket="retrofit-data-dev",
|
||||
prediction_buckets={
|
||||
"sap_change_predictions": "retrofit-sap-predictions-dev",
|
||||
"heat_demand_predictions": "retrofit-heat-predictions-dev",
|
||||
"carbon_change_predictions": "retrofit-carbon-predictions-dev"
|
||||
}
|
||||
)
|
||||
|
||||
predictions = all_predictions["sap_change_predictions"].copy()
|
||||
|
||||
results_df = pd.DataFrame(results)
|
||||
|
||||
predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
|
||||
results_df[["row_id", "sap"]], how="left", on="row_id"
|
||||
)
|
||||
predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
|
||||
predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
|
||||
|
||||
results_df = results_df.merge(
|
||||
predictions[["sap_uplift", "row_id"]],
|
||||
how="left",
|
||||
on="row_id"
|
||||
)
|
||||
results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
|
||||
|
||||
eligibility_assessment = []
|
||||
for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
|
||||
# The upgrade requirements are dependent on the current SAP
|
||||
|
||||
# If the property is an F or G, it only needs to upgrade to an %
|
||||
if row["sap"] <= 38:
|
||||
if row["post_install_sap"] >= 57:
|
||||
eligibility_classification = "highest confidence"
|
||||
elif row["post_install_sap"] >= 55:
|
||||
eligibility_classification = "high confidence"
|
||||
elif row["post_install_sap"] >= 53:
|
||||
eligibility_classification = "medium confidence"
|
||||
else:
|
||||
eligibility_classification = "unlikely"
|
||||
else:
|
||||
|
||||
if row["post_install_sap"] >= 71:
|
||||
eligibility_classification = "highest confidence"
|
||||
elif row["post_install_sap"] >= 69:
|
||||
eligibility_classification = "high confidence"
|
||||
elif row["post_install_sap"] >= 67:
|
||||
eligibility_classification = "medium confidence"
|
||||
else:
|
||||
eligibility_classification = "unlikely"
|
||||
|
||||
eligibility_assessment.append(
|
||||
{
|
||||
"row_id": row["row_id"],
|
||||
"eligibility_classification": eligibility_classification
|
||||
}
|
||||
)
|
||||
|
||||
eligibility_assessment = pd.DataFrame(eligibility_assessment)
|
||||
|
||||
results_df = results_df.merge(
|
||||
eligibility_assessment, how="left", on="row_id"
|
||||
)
|
||||
return results_df, scoring_data, nodata
|
||||
|
||||
|
||||
def analyse_results(results_df, data, survey_list):
|
||||
analysis_data = data[["row_id", "survey_key", "warmfront_identified"]].merge(
|
||||
results_df, how="left", on="row_id"
|
||||
).merge(
|
||||
survey_list[["survey_key", survey_list.columns[0]]].rename(columns={survey_list.columns[0]: "funding_scheme"}),
|
||||
how="left", on="survey_key"
|
||||
)
|
||||
|
||||
# NEW
|
||||
|
||||
analysis_data["roof_insulation_thickness"] = np.where(
|
||||
pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
|
||||
)
|
||||
analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
|
||||
lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
|
||||
)
|
||||
|
||||
warmfront_sold_eco4 = analysis_data[
|
||||
(analysis_data["warmfront_identified"] == True) & (
|
||||
analysis_data["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"]))
|
||||
]
|
||||
|
||||
warmfront_sold_gbis = analysis_data[
|
||||
(analysis_data["warmfront_identified"] == True) & (
|
||||
analysis_data["funding_scheme"].isin(["ECO4 GBIS (ECO+)"]))
|
||||
]
|
||||
# 1407
|
||||
|
||||
additional_eco4_warmfront_not_sold = analysis_data[
|
||||
(analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
|
||||
analysis_data["roof_insulation_thickness_numeric"] <= 100)
|
||||
]
|
||||
|
||||
additional_gbis_warmfront_not_sold = analysis_data[
|
||||
(analysis_data["gbis_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
|
||||
~analysis_data["row_id"].isin(additional_eco4_warmfront_not_sold["row_id"].values)
|
||||
)
|
||||
]
|
||||
|
||||
additional_gbis_warmfront_not_sold["walls"].value_counts()
|
||||
analysis_data["walls"].value_counts()
|
||||
|
||||
# END NEW
|
||||
|
||||
all_identified_eco = analysis_data[
|
||||
(analysis_data["warmfront_identified"] & analysis_data["funding_scheme"].isin(
|
||||
["ECO4 A/W"])) |
|
||||
(analysis_data["eco4_eligible"])
|
||||
]
|
||||
|
||||
all_identified_gbis = analysis_data[
|
||||
(analysis_data["warmfront_identified"] & analysis_data["funding_scheme"].isin(
|
||||
["ECO4 GBIS (ECO+)"])) |
|
||||
(analysis_data["gbis_eligible"] & analysis_data["eco4_eligible"].isin([False, None]))
|
||||
]
|
||||
|
||||
warmfront_identified = analysis_data[analysis_data["warmfront_identified"]]
|
||||
|
||||
# Of the ECO jobs, what proportion to we get right
|
||||
warmfront_identified_eco = warmfront_identified[
|
||||
warmfront_identified["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"])
|
||||
]
|
||||
|
||||
eco_success_rate = warmfront_identified_eco["eco4_eligible"].sum() / warmfront_identified_eco.shape[0]
|
||||
|
||||
warmfront_identified_gbis = warmfront_identified[
|
||||
warmfront_identified["funding_scheme"].isin(["ECO4 GBIS (ECO+)"])
|
||||
]
|
||||
|
||||
# No gbis for this
|
||||
# gbis_success_rate = warmfront_identified_gbis["gbis_eligible"].sum() / warmfront_identified_gbis.shape[0]
|
||||
|
||||
# Additional identified
|
||||
additional_identified_eco = analysis_data[
|
||||
(analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False)
|
||||
]
|
||||
|
||||
additional_identified_eco["eligibility_classification"].value_counts()
|
||||
|
||||
additional_identified_gbis = analysis_data[
|
||||
(analysis_data["gbis_eligible"] == True) & (analysis_data["eco4_eligible"] == False) & (
|
||||
analysis_data["warmfront_identified"] == False
|
||||
)
|
||||
].shape[0]
|
||||
# Future
|
||||
additional_identified_eco_future = analysis_data[
|
||||
(analysis_data["eco4_eligible_future"] == True) & (analysis_data["warmfront_identified"] == False)
|
||||
].shape[0]
|
||||
additional_identified_gbis_future = analysis_data[
|
||||
(analysis_data["gbis_eligible_future"] == True) & (analysis_data["eco4_eligible_future"] == False) & (
|
||||
analysis_data["warmfront_identified"] == False
|
||||
)
|
||||
].shape[0]
|
||||
|
||||
|
||||
def app():
|
||||
data, survey_list = load_data()
|
||||
|
||||
data["row_id"] = ["ha24_" + str(i) for i in range(0, len(data))]
|
||||
|
||||
cleaned = read_from_s3(
|
||||
s3_file_name="cleaned_epc_data/cleaned.bson",
|
||||
bucket_name="retrofit-data-dev"
|
||||
)
|
||||
cleaned = msgpack.unpackb(cleaned, raw=False)
|
||||
|
||||
cleaning_data = read_dataframe_from_s3_parquet(
|
||||
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
|
||||
)
|
||||
|
||||
created_at = datetime.now().isoformat()
|
||||
|
||||
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
|
||||
|
||||
results_df, scoring_data, nodata = get_epc_data(
|
||||
data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds
|
||||
)
|
||||
|
||||
# Pickle results just in case
|
||||
# import pickle
|
||||
# with open("ha24_10_jan.pickle", "wb") as f:
|
||||
# pickle.dump(
|
||||
# {
|
||||
# "scoring_data": scoring_data,
|
||||
# "results": results_df,
|
||||
# "nodata": nodata
|
||||
# }, f
|
||||
# )
|
||||
|
||||
# Read in pickle
|
||||
# import pickle
|
||||
# with open("ha24_10_jan.pickle", "rb") as f:
|
||||
# saved = pickle.load(f)
|
||||
# scoring_data = saved["scoring_data"]
|
||||
# results_df = saved["results"]
|
||||
# nodata = saved["nodata"]
|
||||
883
etl/eligibility/ha_15_32/ha25_app.py
Normal file
883
etl/eligibility/ha_15_32/ha25_app.py
Normal file
|
|
@ -0,0 +1,883 @@
|
|||
import os
|
||||
import msgpack
|
||||
import openpyxl
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from utils.s3 import read_from_s3
|
||||
from utils.logger import setup_logger
|
||||
from dotenv import load_dotenv
|
||||
from utils.s3 import read_dataframe_from_s3_parquet
|
||||
from tqdm import tqdm
|
||||
from backend.SearchEpc import SearchEpc
|
||||
from etl.eligibility.Eligibility import Eligibility
|
||||
from etl.eligibility.ha_15_32.app import prepare_model_data_row
|
||||
from etl.epc.DataProcessor import DataProcessor
|
||||
from etl.epc.settings import COLUMNS_TO_MERGE_ON
|
||||
from backend.ml_models.api import ModelApi
|
||||
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
|
||||
from recommendations.recommendation_utils import calculate_cavity_age
|
||||
from recommendation_utils import convert_thickness_to_numeric
|
||||
|
||||
import re
|
||||
|
||||
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
||||
ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
|
||||
|
||||
logger = setup_logger()
|
||||
load_dotenv(ENV_FILE)
|
||||
|
||||
|
||||
def load_data():
|
||||
workbook = openpyxl.load_workbook('etl/eligibility/ha_15_32/HESTIA - HA 25 ASSET LIST.xlsx', data_only=True)
|
||||
sheet = workbook.active
|
||||
|
||||
rows_data = []
|
||||
rows_colors = []
|
||||
for row in sheet.iter_rows(min_row=1, values_only=True): # use values_only=True to get values
|
||||
|
||||
row_data = list(row) # No need for comprehension, values_only=True returns a tuple of values
|
||||
rows_data.append(row_data)
|
||||
|
||||
# Headers are on the final row. Pop them off and store them and then remove them from rows_data
|
||||
headers = rows_data.pop()
|
||||
# The postcode header is None, so we replace it with "postcode"
|
||||
headers[-1] = "postcode"
|
||||
|
||||
# Handle colours separately
|
||||
for row in sheet.iter_rows(min_row=1, values_only=False):
|
||||
# Assume first cell color is indicative of entire row
|
||||
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
|
||||
rows_colors.append(row_color)
|
||||
|
||||
# Remove the final row of colours, which is the header
|
||||
rows_colors.pop()
|
||||
|
||||
asset_list = pd.DataFrame(rows_data, columns=headers)
|
||||
asset_list['row_color'] = rows_colors
|
||||
|
||||
asset_list["row_colour_name"] = np.where(
|
||||
asset_list["row_color"] == "FFFF0000", "red",
|
||||
np.where(asset_list["row_color"] == "FF00B050", "green", "yellow")
|
||||
)
|
||||
|
||||
asset_list["row_colour_code"] = np.where(
|
||||
asset_list["row_colour_name"] == "red", "does not meet criteria",
|
||||
np.where(asset_list["row_colour_name"] == "green", "identified potential eco", "maybe in the future")
|
||||
)
|
||||
|
||||
asset_list["address"] = asset_list["T1_Address"].copy().str.lower()
|
||||
asset_list["address"] = asset_list["address"].str.replace("flat", "")
|
||||
asset_list["address"] = asset_list["address"].str.strip()
|
||||
|
||||
split_addresses = asset_list['address'].str.split(' ', expand=True)
|
||||
split_addresses.columns = ['HouseNo', 'address2', 'address3', 'address4', 'address5', 'address6', 'address7',
|
||||
'address8',
|
||||
'address9', 'address10', 'address11', 'address12', 'address13', 'address14', ]
|
||||
split_addresses["HouseNo"] = split_addresses["HouseNo"].str.replace(";", "")
|
||||
|
||||
# We could re-concatenate but we only care about HouseNo for the moment
|
||||
asset_list = pd.concat([asset_list, split_addresses[["HouseNo"]]], axis=1)
|
||||
asset_list["postcode"] = asset_list["postcode"].str.strip()
|
||||
|
||||
# We analysis historical ECO3 survey list
|
||||
eco3_survey_workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/HESTIA - HA 25 ECO3 SURVEY LIST.xlsx')
|
||||
eco3_survey_sheet = eco3_survey_workbook["CAVITY"]
|
||||
|
||||
eco3_survey_rows = []
|
||||
eco3_survey_colors = []
|
||||
|
||||
for row in eco3_survey_sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
|
||||
row_data = [cell.value for cell in row] # This will get you the cell values
|
||||
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
|
||||
# row_color = COLOR_INDEX[row_color]
|
||||
eco3_survey_rows.append(row_data)
|
||||
eco3_survey_colors.append(row_color)
|
||||
|
||||
# Some adhoc analysis on the eco3 survey list, just to get completion and cancellation rates historically
|
||||
eco3_survey_list = pd.DataFrame(eco3_survey_rows, columns=[cell.value for cell in eco3_survey_sheet[1]])
|
||||
eco3_survey_list["row_colour"] = eco3_survey_colors
|
||||
# Remove rows where street name is missing
|
||||
eco3_survey_list = eco3_survey_list[~pd.isnull(eco3_survey_list["Street / Block Name"])]
|
||||
# We need to parse the row colours
|
||||
# We have the following mappings:
|
||||
# FF7030A0: purple
|
||||
# FF92D050: green
|
||||
# FFFF0000: red
|
||||
# FFFFFF00: yellow
|
||||
# FF38FD23: green
|
||||
eco3_survey_list["row_colour_name"] = np.where(
|
||||
eco3_survey_list["row_colour"] == "FF7030A0", "purple",
|
||||
np.where(eco3_survey_list["row_colour"] == "FF92D050", "green",
|
||||
np.where(eco3_survey_list["row_colour"] == "FFFF0000", "red",
|
||||
np.where(eco3_survey_list["row_colour"] == "FFFFFF00", "yellow",
|
||||
np.where(eco3_survey_list["row_colour"] == "FF38FD23", "green", "unknown")
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
# We map the meaning:
|
||||
# red: cancelled
|
||||
# green: installed advised install complete
|
||||
# purple: installer advised install complete + post works EPC
|
||||
# yellow: filler row - drop
|
||||
eco3_survey_list["row_colour_code"] = np.where(
|
||||
eco3_survey_list["row_colour_name"] == "red", "cancelled",
|
||||
np.where(eco3_survey_list["row_colour_name"] == "green", "installed advised install complete",
|
||||
np.where(eco3_survey_list["row_colour_name"] == "purple",
|
||||
"installer advised install complete + post works EPC",
|
||||
np.where(eco3_survey_list["row_colour_name"] == "yellow", "filler row - drop", "unknown")
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
# This is good enough for the indicative cancellation rates
|
||||
|
||||
# We now read in the indicative survey list which identified pospects for ECO4 works
|
||||
eco4_survey_workbook = openpyxl.load_workbook(
|
||||
f'etl/eligibility/ha_15_32/HESTIA - HA 25 ADHOC ISOLATED IDENTIFIED PROPERTIES FOR CWI.xlsx'
|
||||
)
|
||||
eco4_prospect_survey_sheet = eco4_survey_workbook["LiveWest"]
|
||||
|
||||
eco4_prospects_survey_rows = []
|
||||
eco4_prospects_survey_colors = []
|
||||
|
||||
for row in eco4_prospect_survey_sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
|
||||
row_data = [cell.value for cell in row] # This will get you the cell values
|
||||
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
|
||||
# row_color = COLOR_INDEX[row_color]
|
||||
eco4_prospects_survey_rows.append(row_data)
|
||||
eco4_prospects_survey_colors.append(row_color)
|
||||
|
||||
# Some adhoc analysis on the eco3 survey list, just to get completion and cancellation rates historically
|
||||
eco4_prospects_survey_list = pd.DataFrame(
|
||||
eco4_prospects_survey_rows, columns=[cell.value for cell in eco4_prospect_survey_sheet[1]]
|
||||
)
|
||||
eco4_prospects_survey_list["row_colour"] = eco4_prospects_survey_colors
|
||||
|
||||
eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.lower()
|
||||
eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.strip()
|
||||
|
||||
eco4_prospects_survey_list = eco4_prospects_survey_list[~pd.isnull(eco4_prospects_survey_list["ADDRESS 1"])]
|
||||
eco4_prospects_survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(eco4_prospects_survey_list))]
|
||||
|
||||
# Correct some errors in the survey list
|
||||
eco4_prospects_survey_list["POSTCODE"] = np.where(
|
||||
(eco4_prospects_survey_list["ADDRESS 1"] == "berry park") &
|
||||
(eco4_prospects_survey_list["POSTCODE"] == "PL12 6HP"),
|
||||
"PL12 6EN",
|
||||
eco4_prospects_survey_list["POSTCODE"]
|
||||
)
|
||||
|
||||
# Remove semi colons from address in asset and survey list
|
||||
asset_list["T1_Address"] = asset_list["T1_Address"].str.replace(";", "")
|
||||
eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.replace(";", "")
|
||||
|
||||
# In the prosepcts survey list, we have 6 WALKHAM MEADOWS listed twice, which should be 6a and 6b
|
||||
eco4_prospects_survey_list.loc[838, "NO"] = "6a"
|
||||
eco4_prospects_survey_list.loc[839, "NO"] = "6b"
|
||||
|
||||
# 3, 7, 9 BOLDVENTURE ROAD should be BOLDVENTURE CLOSE
|
||||
eco4_prospects_survey_list["ADDRESS 1"] = np.where(
|
||||
(eco4_prospects_survey_list["ADDRESS 1"] == "boldventure road") &
|
||||
(eco4_prospects_survey_list["NO"].isin([3, 7, 9])),
|
||||
"boldventure close",
|
||||
eco4_prospects_survey_list["ADDRESS 1"]
|
||||
)
|
||||
|
||||
eco4_prospects_survey_list["ADDRESS 1"] = np.where(
|
||||
(eco4_prospects_survey_list["ADDRESS 1"] == "old farm road") & (
|
||||
eco4_prospects_survey_list["POSTCODE"] == "PL5 1EP"),
|
||||
"old school road",
|
||||
eco4_prospects_survey_list["ADDRESS 1"]
|
||||
)
|
||||
|
||||
eco4_prospects_survey_list["ADDRESS 1"] = np.where(
|
||||
(eco4_prospects_survey_list["ADDRESS 1"] == "croft orchard") & (
|
||||
eco4_prospects_survey_list["POSTCODE"] == "TQ12 6RP") & (
|
||||
eco4_prospects_survey_list["NO"] == 52),
|
||||
"drum way",
|
||||
eco4_prospects_survey_list["ADDRESS 1"]
|
||||
)
|
||||
|
||||
# String replace
|
||||
eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.replace(
|
||||
"the gulls, collaton road", "the gulls collaton road"
|
||||
)
|
||||
eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.replace(
|
||||
"crows-an-eglose", "crows-an-eglos"
|
||||
)
|
||||
|
||||
# We have a high volume of rows that do not match
|
||||
matched = []
|
||||
nomatch = []
|
||||
for _, row in tqdm(eco4_prospects_survey_list.iterrows(), total=len(eco4_prospects_survey_list)):
|
||||
|
||||
# Not in the asset list
|
||||
if (row["ADDRESS 1"] == "berry park") and row["NO"] in [40, 42] and row["POSTCODE"] == "PL12 6EN":
|
||||
nomatch.append(row.to_dict())
|
||||
continue
|
||||
|
||||
# Not in the asset list
|
||||
if (row["ADDRESS 1"] == "roberts road") and row["NO"] == 23 and row["POSTCODE"] == "PL5 1DP":
|
||||
nomatch.append(row.to_dict())
|
||||
continue
|
||||
|
||||
# Not in the asset list
|
||||
if row["ADDRESS 1"] in [
|
||||
"kaynton mead", "broadmoor lane", "hoopers barton", "ecos court", "selwood road",
|
||||
"castle street"
|
||||
]:
|
||||
nomatch.append(row.to_dict())
|
||||
continue
|
||||
|
||||
house_number = row["NO"]
|
||||
if isinstance(house_number, str):
|
||||
house_number = house_number.lower()
|
||||
|
||||
if "flat" in house_number:
|
||||
house_number = house_number.split("flat")[1].strip()
|
||||
|
||||
# Filter on the first line of the address
|
||||
df = asset_list[asset_list["T1_Address"].str.lower().str.contains(row["ADDRESS 1"].lower())].copy()
|
||||
if house_number is not None:
|
||||
if df.shape[0] != 1:
|
||||
df = df[df["T1_Address"].str.lower().str.contains(str(house_number))]
|
||||
if df.shape[0] != 1:
|
||||
if house_number is not None:
|
||||
df = df[df["HouseNo"] == str(house_number)]
|
||||
if df.shape[0] != 1:
|
||||
if row["POSTCODE"] is not None:
|
||||
df = df[df["postcode"].str.lower().str.contains(row["POSTCODE"].lower())]
|
||||
if df.shape[0] != 1:
|
||||
nomatch.append(row.to_dict())
|
||||
continue
|
||||
|
||||
matched.append(
|
||||
{
|
||||
"survey_key": row["survey_key"],
|
||||
"matched_address": df["T1_Address"].values[0],
|
||||
"survey_house_no": row["NO"],
|
||||
"survey_street_name": row["ADDRESS 1"],
|
||||
"survey_postcode": row["POSTCODE"],
|
||||
}
|
||||
)
|
||||
|
||||
nomatch = pd.DataFrame(nomatch)
|
||||
matched = pd.DataFrame(matched)
|
||||
|
||||
matched["warmfront_identified"] = True
|
||||
|
||||
# Combine asset list and surveys
|
||||
data = asset_list.merge(
|
||||
matched, how="left", left_on="T1_Address", right_on="matched_address",
|
||||
)
|
||||
data["warmfront_identified"] = data["warmfront_identified"].fillna(False)
|
||||
|
||||
lost_identified_properties = eco4_prospects_survey_list[
|
||||
~eco4_prospects_survey_list["survey_key"].isin(matched["survey_key"])
|
||||
]
|
||||
|
||||
return data, eco4_prospects_survey_list, lost_identified_properties
|
||||
|
||||
|
||||
def map_year_to_age_band(year):
|
||||
try:
|
||||
year = int(year)
|
||||
except ValueError:
|
||||
return "Invalid Year" # Or any other way you want to handle invalid inputs
|
||||
|
||||
if year < 1900:
|
||||
return "England and Wales: before 1900"
|
||||
elif 1900 <= year <= 1929:
|
||||
return "England and Wales: 1900-1929"
|
||||
elif 1930 <= year <= 1949:
|
||||
return "England and Wales: 1930-1949"
|
||||
elif 1950 <= year <= 1966:
|
||||
return "England and Wales: 1950-1966"
|
||||
elif 1967 <= year <= 1975:
|
||||
return "England and Wales: 1967-1975"
|
||||
elif 1976 <= year <= 1982:
|
||||
return "England and Wales: 1976-1982"
|
||||
elif 1983 <= year <= 1990:
|
||||
return "England and Wales: 1983-1990"
|
||||
elif 1991 <= year <= 1995:
|
||||
return "England and Wales: 1991-1995"
|
||||
elif 1996 <= year <= 2002:
|
||||
return "England and Wales: 1996-2002"
|
||||
elif 2003 <= year <= 2006:
|
||||
return "England and Wales: 2003-2006"
|
||||
elif 2007 <= year <= 2011:
|
||||
return "England and Wales: 2007-2011"
|
||||
else: # Assuming all remaining years are 2012 onwards
|
||||
return "England and Wales: 2012 onwards"
|
||||
|
||||
|
||||
def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
|
||||
scoring_data = []
|
||||
results = []
|
||||
nodata = []
|
||||
|
||||
property_type_lookup = {
|
||||
"Flat": {"property-type": "Flat", "built-form": None},
|
||||
"Mid Terrace House": {"property-type": "House", "built-form": "Mid-Terrace"},
|
||||
"End Terrace House": {"property-type": "House", "built-form": "End-Terrace"},
|
||||
"Maisonnette": {"property-type": "Flat", "built-form": None},
|
||||
"Semi Detached House": {"property-type": "House", "built-form": "Semi-Detached"},
|
||||
"Detached House": {"property-type": "House", "built-form": "Detached"},
|
||||
"Coach House": {"property-type": "House", "built-form": "Detached"},
|
||||
"Bungalow": {"property-type": "Bungalow", "built-form": None},
|
||||
"Detached Bungalow": {"property-type": "Bungalow", "built-form": "Detached"},
|
||||
"House": {"property-type": "House", "built-form": None},
|
||||
"Semi Detached Bung": {"property-type": "Bungalow", "built-form": "Semi-Detached"},
|
||||
"Bedspace": {"property-type": None, "built-form": None},
|
||||
"Office Buildings": {"property-type": None, "built-form": None},
|
||||
"End Terrace Bungalow": {"property-type": "Bungalow", "built-form": "End-Terrace"},
|
||||
"Mid Terrace Bungalow": {"property-type": "Bungalow", "built-form": "Mid-Terrace"},
|
||||
"Bedsit": {"property-type": "Flat", "built-form": None},
|
||||
"Mid Terrace Housekeeping": {"property-type": "House", "built-form": "Mid-Terrace"},
|
||||
"Mid Terrace Housekeeping ": {"property-type": "House", "built-form": "Mid-Terrace"},
|
||||
"End Terrace Housex": {"property-type": "House", "built-form": "End-Terrace"},
|
||||
"Guest Room": {"property-type": None, "built-form": None}
|
||||
}
|
||||
|
||||
for _, property_meta in tqdm(data, total=len(data)):
|
||||
|
||||
searcher = SearchEpc(
|
||||
address1=property_meta["HouseNo"],
|
||||
postcode=property_meta["postcode"],
|
||||
auth_token=EPC_AUTH_TOKEN,
|
||||
os_api_key=None,
|
||||
full_address=property_meta["address"]
|
||||
)
|
||||
searcher.ordnance_survey_client.property_type = property_type_lookup[property_meta["T1_AssetType"]][
|
||||
"property-type"]
|
||||
searcher.ordnance_survey_client.built_form = property_type_lookup[property_meta["T1_AssetType"]]["built-form"]
|
||||
searcher.find_property(skip_os=True)
|
||||
|
||||
if searcher.newest_epc is None:
|
||||
nodata.append(property_meta)
|
||||
continue
|
||||
|
||||
if searcher.newest_epc.get("estimated"):
|
||||
# We insert the row ID as our proxy for UPRN
|
||||
proxy_uprn = int(property_meta["row_id"].split("_")[1])
|
||||
searcher.newest_epc["uprn"] = proxy_uprn
|
||||
|
||||
newest_epc = searcher.newest_epc
|
||||
older_epcs = searcher.older_epcs
|
||||
full_sap_epc = searcher.full_sap_epc
|
||||
# We also want to get the penultimate epc
|
||||
# penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
|
||||
# if not penultimate_epc:
|
||||
# penultimate_epc = newest_epc
|
||||
|
||||
eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
|
||||
eligibility.check_gbis_warmfront()
|
||||
eligibility.check_eco4_warmfront()
|
||||
|
||||
# if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
|
||||
# eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
|
||||
# eligibility.check_gbis_warmfront()
|
||||
# eligibility.check_eco4_warmfront()
|
||||
# # If this is the case, we need to update the older epcs
|
||||
# # We don't update just to make data cleaning easier
|
||||
# if penultimate_epc.get("estimated") is None:
|
||||
# older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
|
||||
|
||||
# If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity
|
||||
|
||||
# Loft MUST be suitable
|
||||
cavity_age = None
|
||||
if (
|
||||
eligibility.walls["is_cavity_wall"] and
|
||||
eligibility.walls["is_filled_cavity"] and
|
||||
eligibility.loft["suitability"] and
|
||||
eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
|
||||
):
|
||||
# We check the age of the cavity and if it's particularly old, we flag it
|
||||
cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
|
||||
|
||||
# Full checks
|
||||
eligibility.check_gbis()
|
||||
eligibility.check_eco4()
|
||||
|
||||
if eligibility.eco4_warmfront["eligible"]:
|
||||
if eligibility.epc["uprn"] in ["", None]:
|
||||
eligibility.epc["uprn"] = int(property_meta["row_id"].split("_")[1])
|
||||
|
||||
if eligibility.epc["construction-age-band"] in ["", None]:
|
||||
eligibility.epc["construction-age-band"] = map_year_to_age_band(property_meta["Build Yr"])
|
||||
|
||||
# This is not the right place to do this but this is temp
|
||||
if eligibility.epc["extension-count"] in ["", None]:
|
||||
eligibility.epc["extension-count"] = 0
|
||||
|
||||
# Not in the right place but temp
|
||||
if eligibility.epc["built-form"] in ["", None]:
|
||||
if not older_epcs:
|
||||
eligibility.epc["built-form"] = "Mid-Terrace"
|
||||
|
||||
scoring_dictionary = prepare_model_data_row(
|
||||
property_id=property_meta["row_id"],
|
||||
modelling_epc=eligibility.epc,
|
||||
cleaned=cleaned,
|
||||
cleaning_data=cleaning_data,
|
||||
created_at=created_at,
|
||||
old_data=older_epcs,
|
||||
full_sap_epc=full_sap_epc,
|
||||
photo_supply_lookup=photo_supply_lookup,
|
||||
floor_area_decile_thresholds=floor_area_decile_thresholds,
|
||||
)
|
||||
scoring_data.extend(scoring_dictionary)
|
||||
|
||||
results.append(
|
||||
{
|
||||
"row_id": property_meta["row_id"],
|
||||
"uprn": eligibility.epc["uprn"],
|
||||
"Address": property_meta["T1_Address"],
|
||||
"Postcode": property_meta["postcode"],
|
||||
"property_type": eligibility.epc["property-type"],
|
||||
"gbis_eligible": eligibility.gbis_warmfront,
|
||||
"eco4_eligible": eligibility.eco4_warmfront["eligible"],
|
||||
"eco4_message": eligibility.eco4_warmfront["message"],
|
||||
"sap": float(eligibility.epc["current-energy-efficiency"]),
|
||||
"gbis_eligible_future": eligibility.gbis["eligible"],
|
||||
"gbis_eligible_future_message": eligibility.gbis["message"],
|
||||
"eco4_eligible_future": eligibility.eco4["eligible"],
|
||||
"eco4_eligible_future_message": eligibility.eco4["message"],
|
||||
# Property components
|
||||
"roof": eligibility.roof["clean_description"],
|
||||
"walls": eligibility.walls["clean_description"],
|
||||
"cavity_type": eligibility.cavity["type"],
|
||||
"heating": eligibility.epc["mainheat-description"],
|
||||
"tenure": eligibility.tenure,
|
||||
"date_epc": eligibility.epc["lodgement-date"],
|
||||
"cavity_age": cavity_age,
|
||||
**eligibility.walls,
|
||||
**eligibility.roof,
|
||||
}
|
||||
)
|
||||
|
||||
scoring_df = pd.DataFrame(scoring_data)
|
||||
|
||||
# Perform the same cleaning as in the model - first clean number of room variables though
|
||||
scoring_df = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=scoring_df,
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
|
||||
colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
|
||||
)
|
||||
|
||||
scoring_df = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=scoring_df,
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
|
||||
).drop(columns=["LOCAL_AUTHORITY"])
|
||||
|
||||
scoring_df = DataProcessor.clean_missings_after_description_process(
|
||||
scoring_df,
|
||||
ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
|
||||
"insulation_thickness" in c) or ("ENERGY_EFF" in c)]
|
||||
)
|
||||
|
||||
scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
|
||||
scoring_df["UPRN"] = scoring_df["UPRN"].astype(int)
|
||||
|
||||
model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
|
||||
all_predictions = model_api.predict_all(
|
||||
df=scoring_df,
|
||||
bucket="retrofit-data-dev",
|
||||
prediction_buckets={
|
||||
"sap_change_predictions": "retrofit-sap-predictions-dev",
|
||||
"heat_demand_predictions": "retrofit-heat-predictions-dev",
|
||||
"carbon_change_predictions": "retrofit-carbon-predictions-dev"
|
||||
}
|
||||
)
|
||||
|
||||
predictions = all_predictions["sap_change_predictions"].copy()
|
||||
|
||||
results_df = pd.DataFrame(results)
|
||||
|
||||
predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
|
||||
results_df[["row_id", "sap"]], how="left", on="row_id"
|
||||
)
|
||||
predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
|
||||
predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
|
||||
|
||||
results_df = results_df.merge(
|
||||
predictions[["sap_uplift", "row_id"]],
|
||||
how="left",
|
||||
on="row_id"
|
||||
)
|
||||
results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
|
||||
|
||||
eligibility_assessment = []
|
||||
for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
|
||||
# The upgrade requirements are dependent on the current SAP
|
||||
|
||||
# If the property is an F or G, it only needs to upgrade to an %
|
||||
if row["sap"] <= 38:
|
||||
if row["post_install_sap"] >= 57:
|
||||
eligibility_classification = "highest confidence"
|
||||
elif row["post_install_sap"] >= 55:
|
||||
eligibility_classification = "high confidence"
|
||||
elif row["post_install_sap"] >= 53:
|
||||
eligibility_classification = "medium confidence"
|
||||
else:
|
||||
eligibility_classification = "unlikely"
|
||||
else:
|
||||
|
||||
if row["post_install_sap"] >= 71:
|
||||
eligibility_classification = "highest confidence"
|
||||
elif row["post_install_sap"] >= 69:
|
||||
eligibility_classification = "high confidence"
|
||||
elif row["post_install_sap"] >= 67:
|
||||
eligibility_classification = "medium confidence"
|
||||
else:
|
||||
eligibility_classification = "unlikely"
|
||||
|
||||
eligibility_assessment.append(
|
||||
{
|
||||
"row_id": row["row_id"],
|
||||
"eligibility_classification": eligibility_classification
|
||||
}
|
||||
)
|
||||
|
||||
eligibility_assessment = pd.DataFrame(eligibility_assessment)
|
||||
|
||||
results_df = results_df.merge(
|
||||
eligibility_assessment, how="left", on="row_id"
|
||||
)
|
||||
return results_df, scoring_data, nodata
|
||||
|
||||
|
||||
def get_epc_data_for_lost_surveys(
|
||||
lost_identified_properties, cleaned, cleaning_data, created_at, photo_supply_lookup,
|
||||
floor_area_decile_thresholds
|
||||
):
|
||||
lost_identified_properties["row_id"] = [
|
||||
"lost_surveys_ha25_" + str(i) for i in range(0, len(lost_identified_properties))
|
||||
]
|
||||
|
||||
scoring_data = []
|
||||
results = []
|
||||
nodata = []
|
||||
|
||||
property_type_lookup = {
|
||||
"MID-TERRACE": {"property-type": "House", "built-form": "Mid-Terrace"},
|
||||
"N/A": {"property-type": "House", "built-form": None},
|
||||
"END-TERRACE": {"property-type": "House", "built-form": "End-Terrace"},
|
||||
"GROUND-FLOOR": {"property-type": "House", "built-form": None},
|
||||
"TOP-FLOOR": {"property-type": "House", "built-form": None},
|
||||
"SEMI-DETACHED": {"property-type": "House", "built-form": "Semi-Detached"},
|
||||
"MID-FLOOR": {"property-type": "House", "built-form": None},
|
||||
"TOP-FLOOR FLAT": {"property-type": "House", "built-form": None},
|
||||
"DETACHED": {"property-type": "House", "built-form": "Detached"},
|
||||
"MID-FLOOR FLAT": {"property-type": "House", "built-form": None},
|
||||
"SEMI- DETACHED": {"property-type": "House", "built-form": "Semi-Detached"},
|
||||
"NO EPC ON GOV": {"property-type": "House", "built-form": None},
|
||||
"Top-floor flat": {"property-type": "House", "built-form": None},
|
||||
"GROUND-FLOOR FLAT": {"property-type": "House", "built-form": None},
|
||||
"NOT ON GOV SITE": {"property-type": "House", "built-form": None}
|
||||
}
|
||||
|
||||
for _, property_meta in tqdm(lost_identified_properties.iterrows(), total=len(lost_identified_properties)):
|
||||
|
||||
if property_meta["POSTCODE"] is None:
|
||||
continue
|
||||
|
||||
full_address = ", ".join(
|
||||
[str(x) for x in [
|
||||
property_meta["NO"], property_meta["ADDRESS 1"], property_meta["ADDRESS 2"], property_meta["ADDRESS 3"]
|
||||
] if x is not None]
|
||||
)
|
||||
|
||||
searcher = SearchEpc(
|
||||
address1=str(property_meta["NO"]),
|
||||
postcode=property_meta["POSTCODE"],
|
||||
auth_token=EPC_AUTH_TOKEN,
|
||||
os_api_key=None,
|
||||
full_address=full_address
|
||||
)
|
||||
|
||||
property_type_key = property_meta["PROPERTY TYPE"]
|
||||
if property_type_key is not None:
|
||||
searcher.ordnance_survey_client.property_type = property_type_lookup[property_type_key.strip()][
|
||||
"property-type"]
|
||||
searcher.ordnance_survey_client.built_form = property_type_lookup[property_type_key.strip()][
|
||||
"built-form"]
|
||||
searcher.find_property(skip_os=True)
|
||||
|
||||
if searcher.newest_epc is None:
|
||||
nodata.append(property_meta)
|
||||
continue
|
||||
|
||||
if searcher.newest_epc.get("estimated"):
|
||||
# We insert the row ID as our proxy for UPRN
|
||||
proxy_uprn = int(property_meta["row_id"].split("_")[-1])
|
||||
searcher.newest_epc["uprn"] = proxy_uprn
|
||||
|
||||
newest_epc = searcher.newest_epc
|
||||
older_epcs = searcher.older_epcs
|
||||
full_sap_epc = searcher.full_sap_epc
|
||||
# We also want to get the penultimate epc
|
||||
penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
|
||||
if not penultimate_epc:
|
||||
penultimate_epc = newest_epc
|
||||
|
||||
eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
|
||||
eligibility.check_gbis_warmfront()
|
||||
eligibility.check_eco4_warmfront()
|
||||
|
||||
if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
|
||||
eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
|
||||
eligibility.check_gbis_warmfront()
|
||||
eligibility.check_eco4_warmfront()
|
||||
# If this is the case, we need to update the older epcs
|
||||
# We don't update just to make data cleaning easier
|
||||
if penultimate_epc.get("estimated") is None:
|
||||
older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
|
||||
|
||||
# Full checks
|
||||
eligibility.check_gbis()
|
||||
eligibility.check_eco4()
|
||||
|
||||
if eligibility.eco4_warmfront["eligible"] & (eligibility.epc["construction-age-band"] not in ["", None]):
|
||||
if eligibility.epc["uprn"] in ["", None]:
|
||||
eligibility.epc["uprn"] = int(property_meta["row_id"].split("_")[1])
|
||||
|
||||
scoring_dictionary = prepare_model_data_row(
|
||||
property_id=property_meta["row_id"],
|
||||
modelling_epc=eligibility.epc,
|
||||
cleaned=cleaned,
|
||||
cleaning_data=cleaning_data,
|
||||
created_at=created_at,
|
||||
old_data=older_epcs,
|
||||
full_sap_epc=full_sap_epc,
|
||||
photo_supply_lookup=photo_supply_lookup,
|
||||
floor_area_decile_thresholds=floor_area_decile_thresholds,
|
||||
)
|
||||
scoring_data.extend(scoring_dictionary)
|
||||
|
||||
results.append(
|
||||
{
|
||||
"row_id": property_meta["row_id"],
|
||||
"uprn": eligibility.epc["uprn"],
|
||||
"Address": property_meta["ADDRESS 1"],
|
||||
"Postcode": property_meta["POSTCODE"],
|
||||
"property_type": eligibility.epc["property-type"],
|
||||
"gbis_eligible": eligibility.gbis_warmfront,
|
||||
"eco4_eligible": eligibility.eco4_warmfront["eligible"],
|
||||
"eco4_message": eligibility.eco4_warmfront["message"],
|
||||
"sap": float(eligibility.epc["current-energy-efficiency"]),
|
||||
"gbis_eligible_future": eligibility.gbis["eligible"],
|
||||
"gbis_eligible_future_message": eligibility.gbis["message"],
|
||||
"eco4_eligible_future": eligibility.eco4["eligible"],
|
||||
"eco4_eligible_future_message": eligibility.eco4["message"],
|
||||
# Property components
|
||||
"roof": eligibility.roof["clean_description"],
|
||||
"walls": eligibility.walls["clean_description"],
|
||||
"cavity_type": eligibility.cavity["type"],
|
||||
"heating": eligibility.epc["mainheat-description"],
|
||||
"tenure": eligibility.tenure,
|
||||
"date_epc": eligibility.epc["lodgement-date"],
|
||||
**eligibility.walls,
|
||||
**eligibility.roof,
|
||||
}
|
||||
)
|
||||
|
||||
scoring_df = pd.DataFrame(scoring_data)
|
||||
|
||||
# Perform the same cleaning as in the model - first clean number of room variables though
|
||||
scoring_df = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=scoring_df,
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
|
||||
colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
|
||||
)
|
||||
|
||||
scoring_df = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=scoring_df,
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
|
||||
).drop(columns=["LOCAL_AUTHORITY"])
|
||||
|
||||
scoring_df = DataProcessor.clean_missings_after_description_process(
|
||||
scoring_df,
|
||||
ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
|
||||
"insulation_thickness" in c) or ("ENERGY_EFF" in c)]
|
||||
)
|
||||
|
||||
scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
|
||||
scoring_df["UPRN"] = scoring_df["UPRN"].astype(int)
|
||||
|
||||
model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
|
||||
all_predictions = model_api.predict_all(
|
||||
df=scoring_df,
|
||||
bucket="retrofit-data-dev",
|
||||
prediction_buckets={
|
||||
"sap_change_predictions": "retrofit-sap-predictions-dev",
|
||||
"heat_demand_predictions": "retrofit-heat-predictions-dev",
|
||||
"carbon_change_predictions": "retrofit-carbon-predictions-dev"
|
||||
}
|
||||
)
|
||||
|
||||
predictions = all_predictions["sap_change_predictions"].copy()
|
||||
|
||||
results_df = pd.DataFrame(results)
|
||||
|
||||
predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
|
||||
results_df[["row_id", "sap"]], how="left", on="row_id"
|
||||
)
|
||||
predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
|
||||
predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
|
||||
|
||||
results_df = results_df.merge(
|
||||
predictions[["sap_uplift", "row_id"]],
|
||||
how="left",
|
||||
on="row_id"
|
||||
)
|
||||
results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
|
||||
|
||||
eligibility_assessment = []
|
||||
for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
|
||||
# The upgrade requirements are dependent on the current SAP
|
||||
|
||||
# If the property is an F or G, it only needs to upgrade to an %
|
||||
if row["sap"] <= 38:
|
||||
if row["post_install_sap"] >= 57:
|
||||
eligibility_classification = "highest confidence"
|
||||
elif row["post_install_sap"] >= 55:
|
||||
eligibility_classification = "high confidence"
|
||||
elif row["post_install_sap"] >= 53:
|
||||
eligibility_classification = "medium confidence"
|
||||
else:
|
||||
eligibility_classification = "unlikely"
|
||||
else:
|
||||
|
||||
if row["post_install_sap"] >= 71:
|
||||
eligibility_classification = "highest confidence"
|
||||
elif row["post_install_sap"] >= 69:
|
||||
eligibility_classification = "high confidence"
|
||||
elif row["post_install_sap"] >= 67:
|
||||
eligibility_classification = "medium confidence"
|
||||
else:
|
||||
eligibility_classification = "unlikely"
|
||||
|
||||
eligibility_assessment.append(
|
||||
{
|
||||
"row_id": row["row_id"],
|
||||
"eligibility_classification": eligibility_classification
|
||||
}
|
||||
)
|
||||
|
||||
eligibility_assessment = pd.DataFrame(eligibility_assessment)
|
||||
|
||||
results_df = results_df.merge(
|
||||
eligibility_assessment, how="left", on="row_id"
|
||||
)
|
||||
return results_df, scoring_data, nodata
|
||||
|
||||
|
||||
def analyse_results(results_df, data, eco4_prospects_survey_list):
|
||||
analysis_data = data[["row_id", "survey_key", "warmfront_identified"]].merge(
|
||||
results_df, how="left", on="row_id"
|
||||
)
|
||||
|
||||
analysis_data = analysis_data.merge(
|
||||
eco4_prospects_survey_list[["survey_key", "ADDRESS 1", "NO", "POSTCODE"]],
|
||||
how="left", on="survey_key"
|
||||
)
|
||||
|
||||
# NEW
|
||||
analysis_data["roof_insulation_thickness"] = np.where(
|
||||
pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
|
||||
)
|
||||
analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
|
||||
lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
|
||||
)
|
||||
|
||||
warmfront_identified = analysis_data[
|
||||
(analysis_data["warmfront_identified"] == True)
|
||||
] # 2204
|
||||
|
||||
# Because we don't know which property is for which scheme, we'll just look at what we found
|
||||
ideal_eco4 = analysis_data[
|
||||
(analysis_data["eco4_eligible"] == True) &
|
||||
(analysis_data["roof_insulation_thickness_numeric"] <= 100) &
|
||||
(analysis_data["sap"] <= 54)
|
||||
] # 335
|
||||
|
||||
gbis = analysis_data[
|
||||
(analysis_data["gbis_eligible"] == True) &
|
||||
~analysis_data["row_id"].isin(ideal_eco4["row_id"].values)
|
||||
]
|
||||
|
||||
ideal_eco4 = ideal_eco4[ideal_eco4["sap"] <= 54]
|
||||
|
||||
|
||||
def analyse_lost_surveys(results_df):
|
||||
results_df["roof_insulation_thickness"] = np.where(
|
||||
pd.isnull(results_df["roof_insulation_thickness"]), None, results_df["roof_insulation_thickness"]
|
||||
)
|
||||
results_df["roof_insulation_thickness_numeric"] = results_df["roof_insulation_thickness"].apply(
|
||||
lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
|
||||
)
|
||||
|
||||
ideal_eco4 = results_df[
|
||||
(results_df["eco4_eligible"] == True) &
|
||||
(results_df["roof_insulation_thickness_numeric"] <= 100) &
|
||||
(results_df["sap"] <= 54)
|
||||
] # 25
|
||||
|
||||
gbis = results_df[
|
||||
(results_df["gbis_eligible"] == True) &
|
||||
~results_df["row_id"].isin(ideal_eco4["row_id"].values)
|
||||
] # 82
|
||||
|
||||
|
||||
def app():
|
||||
data, eco4_prospects_survey_list, lost_identified_properties = load_data()
|
||||
|
||||
data["row_id"] = ["ha25_" + str(i) for i in range(0, len(data))]
|
||||
|
||||
cleaned = read_from_s3(
|
||||
s3_file_name="cleaned_epc_data/cleaned.bson",
|
||||
bucket_name="retrofit-data-dev"
|
||||
)
|
||||
cleaned = msgpack.unpackb(cleaned, raw=False)
|
||||
|
||||
cleaning_data = read_dataframe_from_s3_parquet(
|
||||
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
|
||||
)
|
||||
|
||||
created_at = datetime.now().isoformat()
|
||||
|
||||
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
|
||||
|
||||
results_df, scoring_data, nodata = get_epc_data(
|
||||
data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds
|
||||
)
|
||||
# Pickle the outputs
|
||||
# Old data was ha25.pickle
|
||||
# import pickle
|
||||
# with open("ha25_10_jan.pickle", "wb") as f:
|
||||
# pickle.dump(
|
||||
# {
|
||||
# "results_df": results_df,
|
||||
# "scoring_data": scoring_data,
|
||||
# "nodata": nodata
|
||||
# },
|
||||
# f
|
||||
# )
|
||||
|
||||
# Load in pickle
|
||||
import pickle
|
||||
with open("ha25_10_jan.pickle", "rb") as f:
|
||||
saved = pickle.load(f)
|
||||
results_df = saved["results_df"]
|
||||
scoring_data = saved["scoring_data"]
|
||||
nodata = saved["nodata"]
|
||||
326
etl/eligibility/ha_15_32/ha33_app.py
Normal file
326
etl/eligibility/ha_15_32/ha33_app.py
Normal file
|
|
@ -0,0 +1,326 @@
|
|||
import msgpack
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
import pandas as pd
|
||||
from utils.s3 import read_from_s3
|
||||
from utils.logger import setup_logger
|
||||
from dotenv import load_dotenv
|
||||
from backend.app.utils import read_parquet_from_s3
|
||||
from tqdm import tqdm
|
||||
from backend.SearchEpc import SearchEpc
|
||||
from etl.eligibility.Eligibility import Eligibility
|
||||
from etl.eligibility.ha_15_32.app import prepare_model_data_row
|
||||
from etl.epc.DataProcessor import DataProcessor
|
||||
from etl.epc.settings import COLUMNS_TO_MERGE_ON
|
||||
from backend.ml_models.api import ModelApi
|
||||
|
||||
import re
|
||||
|
||||
ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
|
||||
|
||||
logger = setup_logger()
|
||||
load_dotenv(ENV_FILE)
|
||||
|
||||
|
||||
def load_ha_33():
|
||||
"""
|
||||
Load HA33 data
|
||||
:return:
|
||||
"""
|
||||
pd.set_option('display.max_rows', 500)
|
||||
pd.set_option('display.max_columns', 500)
|
||||
pd.set_option('display.width', 1000)
|
||||
|
||||
files = [
|
||||
"HA 33 Assets 1 of 4.csv",
|
||||
"HA 33 Assets 2 of 4.csv",
|
||||
"HA 33 Assets 3 of 4.csv",
|
||||
"HA 33 Assets 4 of 4.csv"
|
||||
]
|
||||
|
||||
data = []
|
||||
for file in files:
|
||||
part = pd.read_csv(f"etl/eligibility/ha_15_32/{file}", low_memory=False)
|
||||
cols_to_top = [c for c in part.columns if "Unnamed:" in c]
|
||||
part = part.drop(columns=cols_to_top)
|
||||
data.append(part)
|
||||
|
||||
data = pd.concat(data)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def standardise_ha33(data):
|
||||
data = data[~pd.isnull(data["ADDRESS"])]
|
||||
|
||||
split_addresses = data['ADDRESS'].str.split(',', expand=True)
|
||||
split_addresses.columns = ['address1', 'address2', 'address3', 'address4', 'address5']
|
||||
|
||||
data = pd.concat([data, split_addresses], axis=1)
|
||||
del split_addresses
|
||||
|
||||
# Using regex to replace 'FT {number}' or 'FT{number}', with '{number}'
|
||||
data['address1'] = data['address1'].str.replace(r'FT\s*(\d+)', r'\1', regex=True)
|
||||
|
||||
data.columns = [col.strip() for col in data.columns]
|
||||
|
||||
# TODO: we have 23 THIRTY SEVENTH AVENUE, can we replace THIRTY SEVENTH with 37TH
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def get_ha_33data(data, cleaned, cleaning_data, created_at):
|
||||
house_type_lookup = {
|
||||
"Bungalow": "Bungalow",
|
||||
"Flat": "Flat",
|
||||
'House': "House",
|
||||
'Maisonette': "Maisonette",
|
||||
'Flalolflfp mujjjjunjimj': "Flat",
|
||||
'STUDIO': "Flat",
|
||||
}
|
||||
|
||||
# house = data[data["row_id"] == "h3390"].squeeze()
|
||||
|
||||
flat_pattern = r'flat\s+(\d+)'
|
||||
|
||||
# data = data[data["row_id"].isin(eco_row_ids)]
|
||||
|
||||
scoring_data = []
|
||||
results = []
|
||||
nodata = []
|
||||
for _, house in tqdm(data.iterrows(), total=len(data)):
|
||||
|
||||
# Check if we gave a flat in address 3
|
||||
if re.search(flat_pattern, house["address2"].lower(), re.IGNORECASE):
|
||||
address1 = house["address2"].strip()
|
||||
else:
|
||||
address1 = house["address1"].strip()
|
||||
|
||||
# I.e. just a number
|
||||
if len(address1) <= 3:
|
||||
address1 = address1 + " " + house["address2"].strip()
|
||||
|
||||
searcher = SearchEpc(
|
||||
address1=address1,
|
||||
postcode=house["POST CODE"]
|
||||
)
|
||||
|
||||
response = searcher.search()
|
||||
if response["status"] == 204:
|
||||
nodata.append(house["row_id"])
|
||||
continue
|
||||
|
||||
newest_epc, older_epcs, _ = searcher.retrieve(
|
||||
property_type=house_type_lookup.get(house["PROPERTY TYPE"], None),
|
||||
address=house["ADDRESS"],
|
||||
)
|
||||
|
||||
eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
|
||||
eligibility.check_gbis_warmfront()
|
||||
eligibility.check_eco4_warmfront()
|
||||
|
||||
# If the house is not identified, we do a full gbis and eco4 check
|
||||
eligibility.check_gbis()
|
||||
eligibility.check_eco4()
|
||||
|
||||
if eligibility.eco4_warmfront["eligible"]:
|
||||
scoring_dictionary = prepare_model_data_row(
|
||||
property_id=house["row_id"],
|
||||
modelling_epc=eligibility.epc,
|
||||
cleaned=cleaned,
|
||||
cleaning_data=cleaning_data,
|
||||
created_at=created_at
|
||||
)
|
||||
scoring_data.extend(scoring_dictionary)
|
||||
|
||||
# If nothing is eligible or gbis is eligible, then we make a record this
|
||||
results.append(
|
||||
{
|
||||
"row_id": house["row_id"],
|
||||
"gbis_eligible": eligibility.gbis_warmfront,
|
||||
"eco4_eligible": eligibility.eco4_warmfront["eligible"],
|
||||
"eco4_message": eligibility.eco4_warmfront["message"],
|
||||
"sap": float(eligibility.epc["current-energy-efficiency"]),
|
||||
"gbis_eligible_future": eligibility.gbis["eligible"],
|
||||
"gbis_eligible_future_message": eligibility.gbis["message"],
|
||||
"eco4_eligible_future": eligibility.eco4["eligible"],
|
||||
"eco4_eligible_future_message": eligibility.eco4["message"],
|
||||
# Property components
|
||||
"roof": eligibility.roof["clean_description"],
|
||||
"walls": eligibility.walls["clean_description"],
|
||||
"heating": eligibility.epc["mainheat-description"],
|
||||
"tenure": eligibility.tenure,
|
||||
"date_epc": eligibility.epc["lodgement-date"],
|
||||
}
|
||||
)
|
||||
|
||||
# import pickle
|
||||
# with open("ha33_results.pickle", "wb") as f:
|
||||
# pickle.dump({
|
||||
# "results": results,
|
||||
# "scoring_data": scoring_data,
|
||||
# "nodata": nodata
|
||||
# }, f)
|
||||
# with open("ha33_results.pickle", "rb") as f:
|
||||
# data = pickle.load(f)
|
||||
# results = data["results"]
|
||||
# scoring_data = data["scoring_data"]
|
||||
# nodata = data["nodata"]
|
||||
|
||||
scoring_df = pd.DataFrame(scoring_data)
|
||||
# Implement the same process that is being used in the recommendation engine to cleaning scoring_df
|
||||
|
||||
# Perform the same cleaning as in the model - first clean number of room variables though
|
||||
scoring_df = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=scoring_df,
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
|
||||
colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
|
||||
)
|
||||
|
||||
scoring_df = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=scoring_df,
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
|
||||
).drop(columns=["LOCAL_AUTHORITY"])
|
||||
|
||||
scoring_df = DataProcessor.clean_missings_after_description_process(
|
||||
scoring_df,
|
||||
ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
|
||||
"insulation_thickness" in c) or ("ENERGY_EFF" in c)]
|
||||
)
|
||||
|
||||
scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
|
||||
|
||||
model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
|
||||
all_predictions = model_api.predict_all(
|
||||
df=scoring_df,
|
||||
bucket="retrofit-data-dev",
|
||||
prediction_buckets={
|
||||
"sap_change_predictions": "retrofit-sap-predictions-dev",
|
||||
"heat_demand_predictions": "retrofit-heat-predictions-dev",
|
||||
"carbon_change_predictions": "retrofit-carbon-predictions-dev"
|
||||
}
|
||||
)
|
||||
|
||||
# merge the predictions onto the scoring_df
|
||||
predictions = all_predictions["sap_change_predictions"].copy()
|
||||
|
||||
results_df = pd.DataFrame(results)
|
||||
|
||||
predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
|
||||
results_df[["row_id", "sap"]], how="left", on="row_id"
|
||||
)
|
||||
predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
|
||||
predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
|
||||
|
||||
results_df = results_df.merge(
|
||||
predictions[["sap_uplift", "row_id"]],
|
||||
how="left",
|
||||
on="row_id"
|
||||
)
|
||||
|
||||
results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
|
||||
|
||||
eligibility_assessment = []
|
||||
for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
|
||||
# The upgrade requirements are dependent on the current SAP
|
||||
|
||||
# If the property is an F or G, it only needs to upgrade to an %
|
||||
if row["sap"] <= 38:
|
||||
if row["post_install_sap"] >= 57:
|
||||
eligibility_classification = "highest confidence"
|
||||
elif row["post_install_sap"] >= 55:
|
||||
eligibility_classification = "high confidence"
|
||||
elif row["post_install_sap"] >= 53:
|
||||
eligibility_classification = "medium confidence"
|
||||
else:
|
||||
eligibility_classification = "unlikely"
|
||||
else:
|
||||
|
||||
if row["post_install_sap"] >= 71:
|
||||
eligibility_classification = "highest confidence"
|
||||
elif row["post_install_sap"] >= 69:
|
||||
eligibility_classification = "high confidence"
|
||||
elif row["post_install_sap"] >= 67:
|
||||
eligibility_classification = "medium confidence"
|
||||
else:
|
||||
eligibility_classification = "unlikely"
|
||||
|
||||
eligibility_assessment.append(
|
||||
{
|
||||
"row_id": row["row_id"],
|
||||
"eligibility_classification": eligibility_classification
|
||||
}
|
||||
)
|
||||
|
||||
eligibility_assessment = pd.DataFrame(eligibility_assessment)
|
||||
|
||||
results_df = results_df.merge(
|
||||
eligibility_assessment, how="left", on="row_id"
|
||||
)
|
||||
|
||||
return results_df, scoring_data, nodata
|
||||
|
||||
|
||||
def analyse_ha_33(results_df, data):
|
||||
# results_df_social = results_df[results_df["tenure"] == "Rented (social)"]
|
||||
#
|
||||
# results_df_social["tenure"].value_counts()
|
||||
|
||||
data[data["row_id"].isin(results_df["row_id"].values)]["PROPERTY TYPE"].value_counts()
|
||||
|
||||
n_identified = (results_df["gbis_eligible"] | results_df["eco4_eligible"]).sum()
|
||||
n_eco4 = results_df["eco4_eligible"].sum()
|
||||
n_gbis = results_df[~results_df["eco4_eligible"]]["gbis_eligible"].sum()
|
||||
|
||||
eco_eligibile = results_df[results_df["eco4_eligible"]]
|
||||
eco_eligibile["walls"].value_counts()
|
||||
eco_eligibile["roof"].value_counts()
|
||||
|
||||
results_df[results_df["gbis_eligible"] | results_df["eco4_eligible"]]["tenure"].value_counts()
|
||||
|
||||
results_df_social["eligibility_classification"].value_counts()
|
||||
|
||||
future_possibilities_eco = results_df[
|
||||
(results_df["eco4_eligible_future"] == True) & (~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
|
||||
].copy()
|
||||
|
||||
future_possibilities_gbis = results_df[
|
||||
(results_df["gbis_eligible_future"] == True) & (results_df["eco4_eligible_future"] == False) & (
|
||||
~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
|
||||
].copy()
|
||||
|
||||
|
||||
def app():
|
||||
"""
|
||||
Because HA33 is large, we deal with it separately
|
||||
:return:
|
||||
"""
|
||||
|
||||
data = load_ha_33()
|
||||
|
||||
data = standardise_ha33(data)
|
||||
data["row_id"] = ["h33" + str(i) for i in range(0, len(data))]
|
||||
|
||||
cleaned = read_from_s3(
|
||||
s3_file_name="cleaned_epc_data/cleaned.bson",
|
||||
bucket_name="retrofit-data-dev"
|
||||
)
|
||||
cleaned = msgpack.unpackb(cleaned, raw=False)
|
||||
|
||||
cleaning_data = read_parquet_from_s3(
|
||||
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
|
||||
)
|
||||
|
||||
created_at = datetime.now().isoformat()
|
||||
|
||||
results_df, _, _ = get_ha_33data(data, cleaned, cleaning_data, created_at)
|
||||
|
||||
# Read in
|
||||
import pickle
|
||||
with open("ha33_results.pickle", "rb") as f:
|
||||
data = pickle.load(f)
|
||||
results_df = pd.DataFrame(data["results"])
|
||||
scoring_data = data["scoring_data"]
|
||||
nodata = data["nodata"]
|
||||
328
etl/eligibility/ha_15_32/ha4_app.py
Normal file
328
etl/eligibility/ha_15_32/ha4_app.py
Normal file
|
|
@ -0,0 +1,328 @@
|
|||
import os
|
||||
import msgpack
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from utils.s3 import read_from_s3
|
||||
from utils.logger import setup_logger
|
||||
from dotenv import load_dotenv
|
||||
from utils.s3 import read_dataframe_from_s3_parquet
|
||||
from tqdm import tqdm
|
||||
from backend.SearchEpc import SearchEpc
|
||||
from etl.eligibility.Eligibility import Eligibility
|
||||
from etl.eligibility.ha_15_32.app import prepare_model_data_row
|
||||
from etl.epc.DataProcessor import DataProcessor
|
||||
from etl.epc.settings import COLUMNS_TO_MERGE_ON
|
||||
from backend.ml_models.api import ModelApi
|
||||
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
|
||||
from recommendations.recommendation_utils import calculate_cavity_age
|
||||
from recommendation_utils import convert_thickness_to_numeric
|
||||
|
||||
import re
|
||||
|
||||
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
||||
ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
|
||||
|
||||
logger = setup_logger()
|
||||
load_dotenv(ENV_FILE)
|
||||
|
||||
|
||||
def load_ha_4():
|
||||
pd.set_option('display.max_rows', 500)
|
||||
pd.set_option('display.max_columns', 500)
|
||||
pd.set_option('display.width', 1000)
|
||||
|
||||
data = pd.read_csv(f"etl/eligibility/ha_15_32/HA 4 Asset List.csv", low_memory=False)
|
||||
return data
|
||||
|
||||
|
||||
def standardise_ha_4(data):
|
||||
# Location name contains some strings like {0664} which we remove
|
||||
data['Location Name'] = data['Location Name'].str.replace('\{.*?\}', '', regex=True)
|
||||
|
||||
# Trim whitespace from either end of location name
|
||||
data["Location Name"] = data["Location Name"].str.strip()
|
||||
|
||||
# Remove any unusable postcodes
|
||||
data = data[data["Post Code"] != '\\\\'].copy()
|
||||
|
||||
# Some specific replacements
|
||||
data["Location Name"] = np.where(
|
||||
data["Location Name"] == "Calderbrook Pl & Cog La",
|
||||
"Calderbrook Place",
|
||||
data["Location Name"]
|
||||
)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def get_ha_4_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
|
||||
scoring_data = []
|
||||
results = []
|
||||
nodata = []
|
||||
for _, property_meta in tqdm(data.iterrows(), total=len(data)):
|
||||
# For many of the entries in this dataset, we're actually given an entire building, so we EPCs for every
|
||||
# building
|
||||
searcher = SearchEpc(
|
||||
address1=property_meta["Address Line 1"],
|
||||
postcode=property_meta["Post Code"],
|
||||
auth_token=EPC_AUTH_TOKEN,
|
||||
os_api_key=None,
|
||||
property_type=property_type_lookup.get(house["Archetype"]),
|
||||
)
|
||||
|
||||
searcher.find_property(skip_os=True)
|
||||
|
||||
if searcher.newest_epc is None:
|
||||
searcher = SearchEpc(
|
||||
address1=property_meta["Location Name"],
|
||||
postcode=property_meta["Post Code"],
|
||||
auth_token=EPC_AUTH_TOKEN,
|
||||
os_api_key=None,
|
||||
property_type=property_type_lookup.get(house["Archetype"]),
|
||||
)
|
||||
searcher.search()
|
||||
|
||||
if searcher.newest_epc is None:
|
||||
nodata.append(house["row_id"])
|
||||
continue
|
||||
|
||||
newest_epc = searcher.newest_epc
|
||||
older_epcs = searcher.older_epcs
|
||||
full_sap_epc = searcher.full_sap_epc
|
||||
|
||||
searcher.search()
|
||||
|
||||
if searcher.data is None:
|
||||
nodata.append(property_meta.to_dict())
|
||||
continue
|
||||
|
||||
epcs = searcher.data["rows"]
|
||||
epcs = pd.DataFrame(epcs)
|
||||
|
||||
# Take the newest EPC by UPRN
|
||||
epcs = epcs.sort_values(by=["lodgement-date"], ascending=False)
|
||||
newest_epcs = epcs.drop_duplicates(subset=["uprn"], keep="first")
|
||||
|
||||
# For each EPC, we now check eligibility
|
||||
for _, epc in newest_epcs.iterrows():
|
||||
eligibility = Eligibility(epc=epc.to_dict(), cleaned=cleaned)
|
||||
eligibility.check_gbis_warmfront()
|
||||
eligibility.check_eco4_warmfront()
|
||||
|
||||
# If the house is not identified, we do a full gbis and eco4 check
|
||||
eligibility.check_gbis()
|
||||
eligibility.check_eco4()
|
||||
|
||||
if eligibility.eco4_warmfront["eligible"]:
|
||||
# We get old_eps
|
||||
old_data = epcs[
|
||||
(epcs["uprn"] == epc["uprn"]) &
|
||||
(epcs["lmk-key"] != epc["lmk-key"])
|
||||
].to_dict("records")
|
||||
|
||||
full_sap_epc = epcs[
|
||||
(epcs["uprn"] == epc["uprn"]) &
|
||||
(epcs["transaction-type"] == "new dwelling")
|
||||
].to_dict("records")
|
||||
|
||||
scoring_dictionary = prepare_model_data_row(
|
||||
property_id=eligibility.epc["uprn"],
|
||||
modelling_epc=eligibility.epc,
|
||||
cleaned=cleaned,
|
||||
cleaning_data=cleaning_data,
|
||||
created_at=created_at,
|
||||
old_data=old_data,
|
||||
full_sap_epc=full_sap_epc
|
||||
)
|
||||
scoring_data.extend(scoring_dictionary)
|
||||
|
||||
results.append(
|
||||
{
|
||||
"uprn": epc["uprn"],
|
||||
"Location Name": property_meta["Location Name"],
|
||||
"Post Code": property_meta["Post Code"],
|
||||
"property_type": eligibility.epc["property-type"],
|
||||
"gbis_eligible": eligibility.gbis_warmfront,
|
||||
"eco4_eligible": eligibility.eco4_warmfront["eligible"],
|
||||
"eco4_message": eligibility.eco4_warmfront["message"],
|
||||
"sap": float(eligibility.epc["current-energy-efficiency"]),
|
||||
"gbis_eligible_future": eligibility.gbis["eligible"],
|
||||
"gbis_eligible_future_message": eligibility.gbis["message"],
|
||||
"eco4_eligible_future": eligibility.eco4["eligible"],
|
||||
"eco4_eligible_future_message": eligibility.eco4["message"],
|
||||
# Property components
|
||||
"roof": eligibility.roof["clean_description"],
|
||||
"walls": eligibility.walls["clean_description"],
|
||||
"cavity_type": eligibility.cavity["type"],
|
||||
"heating": eligibility.epc["mainheat-description"],
|
||||
"tenure": eligibility.tenure,
|
||||
"date_epc": eligibility.epc["lodgement-date"],
|
||||
}
|
||||
)
|
||||
|
||||
scoring_df = pd.DataFrame(scoring_data)
|
||||
|
||||
# Perform the same cleaning as in the model - first clean number of room variables though
|
||||
scoring_df = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=scoring_df,
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
|
||||
colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
|
||||
)
|
||||
|
||||
scoring_df = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=scoring_df,
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
|
||||
).drop(columns=["LOCAL_AUTHORITY"])
|
||||
|
||||
scoring_df = DataProcessor.clean_missings_after_description_process(
|
||||
scoring_df,
|
||||
ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
|
||||
"insulation_thickness" in c) or ("ENERGY_EFF" in c)]
|
||||
)
|
||||
|
||||
scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
|
||||
|
||||
model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
|
||||
all_predictions = model_api.predict_all(
|
||||
df=scoring_df,
|
||||
bucket="retrofit-data-dev",
|
||||
prediction_buckets={
|
||||
"sap_change_predictions": "retrofit-sap-predictions-dev",
|
||||
"heat_demand_predictions": "retrofit-heat-predictions-dev",
|
||||
"carbon_change_predictions": "retrofit-carbon-predictions-dev"
|
||||
}
|
||||
)
|
||||
|
||||
predictions = all_predictions["sap_change_predictions"].copy()
|
||||
|
||||
results_df = pd.DataFrame(results)
|
||||
|
||||
predictions = predictions.rename(columns={"property_id": "uprn"}).merge(
|
||||
results_df[["uprn", "sap"]], how="left", on="uprn"
|
||||
)
|
||||
predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
|
||||
predictions = predictions.groupby("uprn")["sap_uplift"].sum().reset_index()
|
||||
|
||||
results_df = results_df.merge(
|
||||
predictions[["sap_uplift", "uprn"]],
|
||||
how="left",
|
||||
on="uprn"
|
||||
)
|
||||
results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
|
||||
|
||||
results_df = results_df[~pd.isnull(results_df["uprn"])]
|
||||
|
||||
eligibility_assessment = []
|
||||
for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
|
||||
# The upgrade requirements are dependent on the current SAP
|
||||
|
||||
# If the property is an F or G, it only needs to upgrade to an %
|
||||
if row["sap"] <= 38:
|
||||
if row["post_install_sap"] >= 57:
|
||||
eligibility_classification = "highest confidence"
|
||||
elif row["post_install_sap"] >= 55:
|
||||
eligibility_classification = "high confidence"
|
||||
elif row["post_install_sap"] >= 53:
|
||||
eligibility_classification = "medium confidence"
|
||||
else:
|
||||
eligibility_classification = "unlikely"
|
||||
else:
|
||||
|
||||
if row["post_install_sap"] >= 71:
|
||||
eligibility_classification = "highest confidence"
|
||||
elif row["post_install_sap"] >= 69:
|
||||
eligibility_classification = "high confidence"
|
||||
elif row["post_install_sap"] >= 67:
|
||||
eligibility_classification = "medium confidence"
|
||||
else:
|
||||
eligibility_classification = "unlikely"
|
||||
|
||||
eligibility_assessment.append(
|
||||
{
|
||||
"uprn": row["uprn"],
|
||||
"eligibility_classification": eligibility_classification
|
||||
}
|
||||
)
|
||||
|
||||
eligibility_assessment = pd.DataFrame(eligibility_assessment)
|
||||
|
||||
results_df = results_df.merge(
|
||||
eligibility_assessment, how="left", on="uprn"
|
||||
)
|
||||
# We have some properties that are duplicated so we take just one instance
|
||||
results_df = results_df.drop_duplicates(subset=["uprn"])
|
||||
|
||||
return results_df, scoring_data, nodata
|
||||
|
||||
|
||||
def analyse_ha_4(results_df, data):
|
||||
n_identified = (results_df["gbis_eligible"] | results_df["eco4_eligible"]).sum()
|
||||
n_eco4 = results_df["eco4_eligible"].sum()
|
||||
n_gbis = results_df[~results_df["eco4_eligible"]]["gbis_eligible"].sum()
|
||||
|
||||
eco_eligibile = results_df[results_df["eco4_eligible"]]
|
||||
eco_eligibile["eligibility_classification"].value_counts()
|
||||
|
||||
future_possibilities_eco = results_df[
|
||||
(results_df["eco4_eligible_future"] == True) & (~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
|
||||
].copy()
|
||||
|
||||
future_possibilities_gbis = results_df[
|
||||
(results_df["gbis_eligible_future"] == True) & (results_df["eco4_eligible_future"] == False) & (
|
||||
~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
|
||||
].copy()
|
||||
|
||||
total_future_possibilities = future_possibilities_eco.shape[0] + future_possibilities_gbis.shape[0]
|
||||
|
||||
|
||||
def app():
|
||||
data = load_ha_4()
|
||||
|
||||
data = standardise_ha_4(data)
|
||||
|
||||
data["row_id"] = ["h4" + str(i) for i in range(0, len(data))]
|
||||
|
||||
cleaned = read_from_s3(
|
||||
s3_file_name="cleaned_epc_data/cleaned.bson",
|
||||
bucket_name="retrofit-data-dev"
|
||||
)
|
||||
cleaned = msgpack.unpackb(cleaned, raw=False)
|
||||
|
||||
cleaning_data = read_dataframe_from_s3_parquet(
|
||||
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
|
||||
)
|
||||
|
||||
created_at = datetime.now().isoformat()
|
||||
|
||||
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
|
||||
|
||||
results_df, scoring_data, nodata = get_ha_4_data(
|
||||
data=data,
|
||||
cleaned=cleaned,
|
||||
cleaning_data=cleaning_data,
|
||||
created_at=created_at,
|
||||
photo_supply_lookup=photo_supply_lookup,
|
||||
floor_area_decile_thresholds=floor_area_decile_thresholds
|
||||
)
|
||||
|
||||
# Store the data locally as a pickle
|
||||
# import pickle
|
||||
# with open("ha_4.pickle", "wb") as f:
|
||||
# pickle.dump(
|
||||
# {
|
||||
# "results_df": results_df,
|
||||
# "scoring_data": scoring_data,
|
||||
# "nodata": nodata
|
||||
# }, f)
|
||||
|
||||
# Read in
|
||||
# import pickle
|
||||
# with open("ha_4.pickle", "rb") as f:
|
||||
# data = pickle.load(f)
|
||||
# results_df = data["results_df"]
|
||||
# scoring_data = data["scoring_data"]
|
||||
# nodata = data["nodata"]
|
||||
383
etl/eligibility/ha_15_32/ha7_app.py
Normal file
383
etl/eligibility/ha_15_32/ha7_app.py
Normal file
|
|
@ -0,0 +1,383 @@
|
|||
import os
|
||||
import msgpack
|
||||
import openpyxl
|
||||
from openpyxl.styles.colors import COLOR_INDEX
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from utils.s3 import read_from_s3, read_dataframe_from_s3_parquet
|
||||
from utils.logger import setup_logger
|
||||
from dotenv import load_dotenv
|
||||
from tqdm import tqdm
|
||||
from backend.SearchEpc import SearchEpc
|
||||
from etl.eligibility.Eligibility import Eligibility
|
||||
from etl.eligibility.ha_15_32.app import prepare_model_data_row
|
||||
from etl.epc.DataProcessor import DataProcessor
|
||||
from etl.epc.settings import COLUMNS_TO_MERGE_ON
|
||||
from backend.ml_models.api import ModelApi
|
||||
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
|
||||
from recommendations.recommendation_utils import calculate_cavity_age
|
||||
from recommendation_utils import convert_thickness_to_numeric
|
||||
|
||||
ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
|
||||
|
||||
logger = setup_logger()
|
||||
load_dotenv(ENV_FILE)
|
||||
|
||||
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
||||
OS_API_KEY = os.getenv("ORDNANCE_SURVEY_API_KEY")
|
||||
|
||||
|
||||
def load_data():
|
||||
"""
|
||||
Load the data from the excel
|
||||
"""
|
||||
|
||||
workbook = openpyxl.load_workbook('etl/eligibility/ha_15_32/HESTIA - HA 7 ASSET LIST.xlsx')
|
||||
sheet = workbook.active
|
||||
|
||||
# Prepare lists to collect rows data and their colors
|
||||
rows_data = []
|
||||
rows_colors = []
|
||||
for row in sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
|
||||
row_data = [cell.value for cell in row] # This will get you the cell values
|
||||
|
||||
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
|
||||
row_color = COLOR_INDEX[row_color]
|
||||
rows_data.append(row_data)
|
||||
rows_colors.append(row_color)
|
||||
|
||||
df = pd.DataFrame(rows_data, columns=[cell.value for cell in sheet[1]])
|
||||
|
||||
# Add the row colors as a new column
|
||||
df['row_color'] = rows_colors
|
||||
df.columns.values[8] = "is_active"
|
||||
|
||||
# Remove None columns
|
||||
df = df.dropna(axis=1, how='all')
|
||||
# We now parse the colours
|
||||
df["row_color"].unique()
|
||||
df["row_colour_name"] = np.where(
|
||||
df["row_color"] == "0000FFFF", "red",
|
||||
np.where(df["row_color"] == "00FF00FF", "green", "yellow")
|
||||
)
|
||||
df["row_code"] = np.where(
|
||||
df["row_colour_name"] == "red", "invalid",
|
||||
np.where(df["row_colour_name"] == "green", "potential ECO4", "needs criteria change")
|
||||
)
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def get_ha7_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
|
||||
property_type_lookup = {
|
||||
# "Mid Terrace": "Mid-Terrace",
|
||||
# "End Terrace": "End-Terrace",
|
||||
# "Semi Detached": "Semi-Detached",
|
||||
# "Detached": "Detached",
|
||||
"House": "House",
|
||||
"Flat": "Flat",
|
||||
"Bungalow": "Bungalow",
|
||||
"Maisonette": "Maisonette",
|
||||
}
|
||||
|
||||
scoring_data = []
|
||||
results = []
|
||||
nodata = []
|
||||
for _, house in tqdm(data.iterrows(), total=len(data)):
|
||||
|
||||
if house["Address"]:
|
||||
address = house["Address"]
|
||||
else:
|
||||
address = house["Address2"]
|
||||
|
||||
searcher = SearchEpc(
|
||||
address1=address,
|
||||
postcode=house["Postcode"],
|
||||
auth_token=EPC_AUTH_TOKEN,
|
||||
os_api_key=None,
|
||||
property_type=property_type_lookup.get(house["Archetype"]),
|
||||
)
|
||||
|
||||
searcher.find_property(skip_os=True)
|
||||
|
||||
if searcher.newest_epc is None:
|
||||
nodata.append(house["row_id"])
|
||||
continue
|
||||
|
||||
newest_epc = searcher.newest_epc
|
||||
older_epcs = searcher.older_epcs
|
||||
full_sap_epc = searcher.full_sap_epc
|
||||
|
||||
eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
|
||||
eligibility.check_gbis_warmfront()
|
||||
eligibility.check_eco4_warmfront()
|
||||
|
||||
# If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity
|
||||
|
||||
# Loft MUST be suitable
|
||||
cavity_age = None
|
||||
if (
|
||||
eligibility.walls["is_cavity_wall"] and
|
||||
eligibility.walls["is_filled_cavity"] and
|
||||
eligibility.loft["suitability"] and
|
||||
eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
|
||||
):
|
||||
# We check the age of the cavity and if it's particularly old, we flag it
|
||||
cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
|
||||
|
||||
# If the house is not identified, we do a full gbis and eco4 check
|
||||
eligibility.check_gbis()
|
||||
eligibility.check_eco4()
|
||||
|
||||
if eligibility.eco4_warmfront["eligible"]:
|
||||
scoring_dictionary = prepare_model_data_row(
|
||||
property_id=house["row_id"],
|
||||
modelling_epc=eligibility.epc,
|
||||
cleaned=cleaned,
|
||||
cleaning_data=cleaning_data,
|
||||
created_at=created_at,
|
||||
old_data=older_epcs,
|
||||
full_sap_epc=full_sap_epc,
|
||||
photo_supply_lookup=photo_supply_lookup,
|
||||
floor_area_decile_thresholds=floor_area_decile_thresholds
|
||||
)
|
||||
scoring_data.extend(scoring_dictionary)
|
||||
|
||||
# If nothing is eligible or gbis is eligible, then we make a record this
|
||||
results.append(
|
||||
{
|
||||
"row_id": house["row_id"],
|
||||
"address": house["Address"],
|
||||
"postcode": house["Postcode"],
|
||||
"gbis_eligible": eligibility.gbis_warmfront,
|
||||
"eco4_eligible": eligibility.eco4_warmfront["eligible"],
|
||||
"eco4_message": eligibility.eco4_warmfront["message"],
|
||||
"sap": float(eligibility.epc["current-energy-efficiency"]),
|
||||
"gbis_eligible_future": eligibility.gbis["eligible"],
|
||||
"gbis_eligible_future_message": eligibility.gbis["message"],
|
||||
"eco4_eligible_future": eligibility.eco4["eligible"],
|
||||
"eco4_eligible_future_message": eligibility.eco4["message"],
|
||||
# Property components
|
||||
"roof": eligibility.roof["clean_description"],
|
||||
"walls": eligibility.walls["clean_description"],
|
||||
"heating": eligibility.epc["mainheat-description"],
|
||||
"tenure": eligibility.tenure,
|
||||
"date_epc": eligibility.epc["lodgement-date"],
|
||||
**newest_epc,
|
||||
"cavity_age": cavity_age,
|
||||
**eligibility.walls,
|
||||
**eligibility.roof,
|
||||
}
|
||||
)
|
||||
|
||||
scoring_df = pd.DataFrame(scoring_data)
|
||||
# Implement the same process that is being used in the recommendation engine to cleaning scoring_df
|
||||
|
||||
# Perform the same cleaning as in the model - first clean number of room variables though
|
||||
scoring_df = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=scoring_df,
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
|
||||
colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
|
||||
)
|
||||
|
||||
scoring_df = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=scoring_df,
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
|
||||
).drop(columns=["LOCAL_AUTHORITY"])
|
||||
|
||||
scoring_df = DataProcessor.clean_missings_after_description_process(
|
||||
scoring_df,
|
||||
ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
|
||||
"insulation_thickness" in c) or ("ENERGY_EFF" in c)]
|
||||
)
|
||||
|
||||
scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
|
||||
|
||||
model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
|
||||
all_predictions = model_api.predict_all(
|
||||
df=scoring_df,
|
||||
bucket="retrofit-data-dev",
|
||||
prediction_buckets={
|
||||
"sap_change_predictions": "retrofit-sap-predictions-dev",
|
||||
"heat_demand_predictions": "retrofit-heat-predictions-dev",
|
||||
"carbon_change_predictions": "retrofit-carbon-predictions-dev"
|
||||
}
|
||||
)
|
||||
|
||||
predictions = all_predictions["sap_change_predictions"].copy()
|
||||
|
||||
results_df = pd.DataFrame(results)
|
||||
|
||||
predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
|
||||
results_df[["row_id", "sap"]], how="left", on="row_id"
|
||||
)
|
||||
predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
|
||||
predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
|
||||
|
||||
results_df = results_df.merge(
|
||||
predictions[["sap_uplift", "row_id"]],
|
||||
how="left",
|
||||
on="row_id"
|
||||
)
|
||||
|
||||
results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
|
||||
|
||||
eligibility_assessment = []
|
||||
for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
|
||||
# The upgrade requirements are dependent on the current SAP
|
||||
|
||||
# If the property is an F or G, it only needs to upgrade to an %
|
||||
if row["sap"] <= 38:
|
||||
if row["post_install_sap"] >= 57:
|
||||
eligibility_classification = "highest confidence"
|
||||
elif row["post_install_sap"] >= 55:
|
||||
eligibility_classification = "high confidence"
|
||||
elif row["post_install_sap"] >= 53:
|
||||
eligibility_classification = "medium confidence"
|
||||
else:
|
||||
eligibility_classification = "unlikely"
|
||||
else:
|
||||
|
||||
if row["post_install_sap"] >= 71:
|
||||
eligibility_classification = "highest confidence"
|
||||
elif row["post_install_sap"] >= 69:
|
||||
eligibility_classification = "high confidence"
|
||||
elif row["post_install_sap"] >= 67:
|
||||
eligibility_classification = "medium confidence"
|
||||
else:
|
||||
eligibility_classification = "unlikely"
|
||||
|
||||
eligibility_assessment.append(
|
||||
{
|
||||
"row_id": row["row_id"],
|
||||
"eligibility_classification": eligibility_classification
|
||||
}
|
||||
)
|
||||
|
||||
eligibility_assessment = pd.DataFrame(eligibility_assessment)
|
||||
|
||||
results_df = results_df.merge(
|
||||
eligibility_assessment, how="left", on="row_id"
|
||||
)
|
||||
|
||||
return results_df, scoring_data, nodata
|
||||
|
||||
|
||||
def analyse_ha_7(results_df, data):
|
||||
analysis_data = results_df.merge(
|
||||
data[["row_id", "row_code", "Property Type", "Construction Year Band"]], how="left", on="row_id"
|
||||
)
|
||||
|
||||
analysis_data["row_code"].value_counts()
|
||||
|
||||
# NEW
|
||||
|
||||
analysis_data["roof_insulation_thickness"] = np.where(
|
||||
pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
|
||||
)
|
||||
analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
|
||||
lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
|
||||
)
|
||||
|
||||
ideal_eco4 = analysis_data[
|
||||
(analysis_data["eco4_eligible"] == True) & (
|
||||
analysis_data["roof_insulation_thickness_numeric"] <= 100)
|
||||
]
|
||||
|
||||
secondary_eco4_warmfront_not_sold = analysis_data[
|
||||
(analysis_data["eco4_eligible"] == True) & (
|
||||
analysis_data["roof_insulation_thickness_numeric"] > 100)
|
||||
]
|
||||
|
||||
# underperforming cavities
|
||||
underperforming_cavities = analysis_data[
|
||||
(analysis_data["eco4_message"] == "Failed due to full cavity - check cavity age") & (
|
||||
analysis_data["cavity_age"] > 9 * 365
|
||||
) & (analysis_data["roof_insulation_thickness_numeric"] <= 100)
|
||||
]
|
||||
|
||||
identified_gbis_not_sold = analysis_data[
|
||||
(analysis_data["gbis_eligible"] == True) & (
|
||||
analysis_data["eco4_eligible"] == False
|
||||
)
|
||||
]
|
||||
|
||||
wf_identified = analysis_data[
|
||||
(analysis_data["row_code"] == "potential ECO4")
|
||||
]
|
||||
|
||||
# END NEW
|
||||
|
||||
warmfront_identification = analysis_data["row_code"].value_counts()
|
||||
warmfront_identified = analysis_data[analysis_data["row_code"] == "potential ECO4"]
|
||||
warmfront_identified["walls"].value_counts(normalize=True)
|
||||
|
||||
analysis_data["Construction Year Band"].value_counts(normalize=True)
|
||||
|
||||
# Number of days from today
|
||||
|
||||
days_to_today = (datetime.now() - pd.to_datetime(warmfront_identified["date_epc"])).dt.days
|
||||
days_to_today.mean()
|
||||
|
||||
property_types = analysis_data["Property Type"].value_counts()
|
||||
|
||||
n_identified = (results_df["gbis_eligible"] | results_df["eco4_eligible"]).sum()
|
||||
|
||||
eco_identified = results_df[results_df["eco4_eligible"]]
|
||||
n_eco4 = eco_identified["eco4_eligible"].sum()
|
||||
gbis_identified = results_df[~results_df["eco4_eligible"] & results_df["gbis_eligible"]]
|
||||
n_gbis = results_df[~results_df["eco4_eligible"]]["gbis_eligible"].sum()
|
||||
|
||||
eco_eligibile = results_df[results_df["eco4_eligible"]]
|
||||
eco_eligibile["eligibility_classification"].value_counts()
|
||||
|
||||
future_possibilities_eco = results_df[
|
||||
(results_df["eco4_eligible_future"] == True) & (~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
|
||||
].copy()
|
||||
|
||||
future_possibilities_gbis = results_df[
|
||||
(results_df["gbis_eligible_future"] == True) & (results_df["eco4_eligible_future"] == False) & (
|
||||
~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
|
||||
].copy()
|
||||
|
||||
total_future_possibilities = future_possibilities_eco.shape[0] + future_possibilities_gbis.shape[0]
|
||||
|
||||
|
||||
def app():
|
||||
data = load_data()
|
||||
data["row_id"] = ["ha7" + str(i) for i in range(0, len(data))]
|
||||
|
||||
cleaned = read_from_s3(
|
||||
s3_file_name="cleaned_epc_data/cleaned.bson",
|
||||
bucket_name="retrofit-data-dev"
|
||||
)
|
||||
cleaned = msgpack.unpackb(cleaned, raw=False)
|
||||
|
||||
cleaning_data = read_dataframe_from_s3_parquet(
|
||||
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
|
||||
)
|
||||
|
||||
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
|
||||
|
||||
created_at = datetime.now().isoformat()
|
||||
|
||||
results_df, scoring_data, nodata = get_ha7_data(
|
||||
data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds
|
||||
)
|
||||
|
||||
# Pickle results
|
||||
# import pickle
|
||||
# with open("ha7_results_jan_10.pkl", "wb") as f:
|
||||
# pickle.dump({"results_df": results_df, "scoring_data": scoring_data, "nodata": nodata}, f)
|
||||
|
||||
# Read in the old data
|
||||
# import pickle
|
||||
# with open("ha7_results_jan_10.pkl", "rb") as f:
|
||||
# old_data = pickle.load(f)
|
||||
# results_df = old_data["results_df"]
|
||||
# scoring_data = old_data["scoring_data"]
|
||||
# nodata = old_data["nodata"]
|
||||
7286
etl/eligibility/ha_15_32/ha_analysis_batch_3.py
Normal file
7286
etl/eligibility/ha_15_32/ha_analysis_batch_3.py
Normal file
File diff suppressed because it is too large
Load diff
11
etl/eligibility/ha_15_32/requirements.txt
Normal file
11
etl/eligibility/ha_15_32/requirements.txt
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
pandas
|
||||
pydantic==1.10.11
|
||||
epc-api-python==1.0.2
|
||||
msgpack
|
||||
tqdm
|
||||
python-dotenv
|
||||
boto3
|
||||
textblob
|
||||
pyarrow==12.0.1
|
||||
fuzzywuzzy
|
||||
python-Levenshtein
|
||||
|
|
@ -5,6 +5,10 @@ from BaseUtility import Definitions
|
|||
from etl.epc.settings import (
|
||||
DATA_PROCESSOR_SETTINGS,
|
||||
EARLIEST_EPC_DATE,
|
||||
IGNORED_TRANSACTION_TYPES,
|
||||
IGNORED_FLOOR_LEVELS,
|
||||
IGNORED_PROPERTY_TYPES,
|
||||
IGNORED_TENURES,
|
||||
FULLY_GLAZED_DESCRIPTIONS,
|
||||
AVERAGE_FIXED_FEATURES,
|
||||
BUILT_FORM_REMAP,
|
||||
|
|
@ -24,8 +28,14 @@ from recommendations.rdsap_tables import FLOOR_LEVEL_MAP
|
|||
|
||||
from typing import List
|
||||
|
||||
# TODO: change the setting columns to lower
|
||||
STARTING_SUFFIX_COMPONENT_COLS = [x.lower() for x in STARTING_SUFFIX_COMPONENT_COLS]
|
||||
NO_SUFFIX_COMPONENT_COLS = [x.lower() for x in NO_SUFFIX_COMPONENT_COLS]
|
||||
ENDING_SUFFIX_COMPONENT_COLS = [x.lower() for x in ENDING_SUFFIX_COMPONENT_COLS]
|
||||
POTENTIAL_COLUMNS = [x.lower() for x in POTENTIAL_COLUMNS]
|
||||
|
||||
# These lookups are used to clean the construction age band
|
||||
bounds_map = {
|
||||
construction_age_bounds_map = {
|
||||
"England and Wales: before 1900": {"l": 0, "u": 1899},
|
||||
"England and Wales: 1930-1949": {"l": 1930, "u": 1949},
|
||||
"England and Wales: 1900-1929": {"l": 1900, "u": 1929},
|
||||
|
|
@ -40,13 +50,13 @@ bounds_map = {
|
|||
"England and Wales: 2012 onwards": {"l": 2012, "u": 3000},
|
||||
}
|
||||
|
||||
remap = {
|
||||
construction_age_remap = {
|
||||
"England and Wales: 2007 onwards": "England and Wales: 2007-2011"
|
||||
}
|
||||
|
||||
expanded_map = {
|
||||
i: [
|
||||
label for label, bounds in bounds_map.items() if (i <= bounds["u"]) and (i >= bounds['l'])
|
||||
label for label, bounds in construction_age_bounds_map.items() if (i <= bounds["u"]) and (i >= bounds['l'])
|
||||
][0] for i in range(0, 3001)
|
||||
}
|
||||
|
||||
|
|
@ -59,26 +69,205 @@ def is_int(x):
|
|||
return False
|
||||
|
||||
|
||||
class DataProcessor:
|
||||
class EPCDataProcessor:
|
||||
"""
|
||||
Handle data loading and data preprocessing
|
||||
"""
|
||||
|
||||
def __init__(self, filepath: Path | None, newdata: bool = False) -> None:
|
||||
def __init__(self, data: pd.DataFrame | None = None, cleaning_averages: pd.DataFrame | None = None,
|
||||
run_mode: str = "training", violation_mode: bool = False) -> None:
|
||||
"""
|
||||
:param filepath: If specified, is the physical location of the data
|
||||
:param newdata: Indicates if we are processing new, testing data.
|
||||
:param is_newdata: Indicates if we are processing new, testing data.
|
||||
In this instance, there are some operations we do not
|
||||
want to perform, such as confine_data()
|
||||
"""
|
||||
self.filepath = filepath
|
||||
self.data = None
|
||||
self.newdata = newdata
|
||||
is_data_a_dataframe = isinstance(data, pd.DataFrame)
|
||||
self.data: pd.DataFrame = data if is_data_a_dataframe else pd.DataFrame()
|
||||
|
||||
def load_data(self, low_memory=False) -> None:
|
||||
if not self.filepath:
|
||||
is_cleaning_averages_a_dataframe = isinstance(cleaning_averages, pd.DataFrame)
|
||||
self.cleaning_averages: pd.DataFrame = cleaning_averages if is_cleaning_averages_a_dataframe else pd.DataFrame()
|
||||
|
||||
# FOR NOW IF VIOLATION MODE IS ON, WE USE RUN MODE AS NEWDATA
|
||||
self.violation_mode = violation_mode
|
||||
if run_mode not in ["training", "newdata"]:
|
||||
raise ValueError("Run mode must be either training or newdata")
|
||||
self.run_mode = run_mode if not violation_mode else "newdata"
|
||||
|
||||
def prepare_data(self, filepath: Path | str | None = None) -> None:
|
||||
"""
|
||||
Given the run mode, we apply the relevant pipeline steps
|
||||
Ignore step is used to highlight which steps are not needed in newdata
|
||||
"""
|
||||
|
||||
ignore_step = True if self.run_mode == "newdata" else False
|
||||
|
||||
if filepath is not None:
|
||||
self.load_data(filepath=filepath, low_memory=DATA_PROCESSOR_SETTINGS["low_memory"])
|
||||
|
||||
if len(self.data) == 0:
|
||||
raise Exception("No data to process - check filepath/ data being passed in")
|
||||
|
||||
self.confine_data(ignore_step=ignore_step)
|
||||
self.remap_anomalies()
|
||||
self.remap_floor_level(ignore_step=ignore_step)
|
||||
self.remap_build_form()
|
||||
self.cast_data_column_values_to_lower()
|
||||
self.standardise_construction_age_band(ignore_step=ignore_step)
|
||||
self.clean_missing_rooms(ignore_step=ignore_step)
|
||||
self.recast_df_columns(
|
||||
column_mappings=DATA_PROCESSOR_SETTINGS["column_mappings"]
|
||||
)
|
||||
self.clean_multi_glaze_proportion(ignore_step=ignore_step)
|
||||
self.clean_photo_supply()
|
||||
self.retain_multiple_epc_properties(
|
||||
epc_minimum_count=DATA_PROCESSOR_SETTINGS["epc_minimum_count"], ignore_step=ignore_step
|
||||
)
|
||||
|
||||
self.fill_na_fields()
|
||||
|
||||
self.sort_data_by_uprn_lodgement_date(ignore_step=ignore_step)
|
||||
|
||||
# Final re-casting after data transformed and prepared
|
||||
self.recast_df_columns(column_mappings=COLUMNTYPES, auto_subset_columns=True)
|
||||
self.recast_all_data(column_mappings=COLUMNTYPES, auto_subset_columns=True)
|
||||
self.na_remapping(auto_subset_columns=True)
|
||||
|
||||
self.fill_invalid_constituency_fields(ignore_step=ignore_step)
|
||||
|
||||
self.make_cleaning_averages(ignore_step=ignore_step)
|
||||
self.add_local_authority_to_cleaning_average(ignore_step=ignore_step)
|
||||
|
||||
# TODO: check if this has impact on training dataset
|
||||
# cleaned_data = self.apply_averages_cleaning(
|
||||
# data_to_clean=self.data,
|
||||
# cleaning_data=self.cleaning_averages,
|
||||
# cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
|
||||
# colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
|
||||
# )
|
||||
|
||||
# When running in newdata mode, cleaning_averages has lower cases so we co-erce back to upper
|
||||
cleaning_averages = self.cleaning_averages.copy()
|
||||
if self.run_mode == "newdata":
|
||||
cleaning_averages.columns = cleaning_averages.columns.str.upper()
|
||||
|
||||
cleaned_data = self.apply_averages_cleaning(
|
||||
data_to_clean=self.data,
|
||||
cleaning_data=cleaning_averages,
|
||||
cols_to_merge_on=COLUMNS_TO_MERGE_ON,
|
||||
)
|
||||
|
||||
self.data = self.data if cleaned_data is None else cleaned_data
|
||||
|
||||
self.cast_cleaning_averages_columns_to_lower(ignore_step=ignore_step)
|
||||
self.cast_data_columns_to_lower()
|
||||
|
||||
def cast_data_columns_to_lower(self):
|
||||
"""
|
||||
Convert all columns names to lower
|
||||
"""
|
||||
self.data.columns = self.data.columns.str.lower()
|
||||
|
||||
def cast_cleaning_averages_columns_to_lower(self, ignore_step: bool = False):
|
||||
"""
|
||||
Convert all column names to lower
|
||||
No need in newdata mode
|
||||
"""
|
||||
|
||||
if ignore_step:
|
||||
return
|
||||
|
||||
self.cleaning_averages.columns = self.cleaning_averages.columns.str.lower()
|
||||
|
||||
def add_local_authority_to_cleaning_average(self, ignore_step: bool = False):
|
||||
"""
|
||||
Add the Local authority column to the cleaning averages
|
||||
No need in newdata mode
|
||||
"""
|
||||
|
||||
if ignore_step:
|
||||
return
|
||||
|
||||
self.cleaning_averages["LOCAL_AUTHORITY"] = self.data["LOCAL_AUTHORITY"].values[0]
|
||||
|
||||
def fill_invalid_constituency_fields(self, ignore_step: bool = False):
|
||||
"""
|
||||
For some weird cases, where data has missing constituency, we add a dummy value
|
||||
"""
|
||||
if self.violation_mode:
|
||||
# TODO: to fill in
|
||||
return
|
||||
|
||||
if ignore_step:
|
||||
return
|
||||
|
||||
self.data = self.data.fillna({"CONSTITUENCY": self.data["CONSTITUENCY"].mode().values[0]})
|
||||
|
||||
def sort_data_by_uprn_lodgement_date(self, ignore_step: bool = False):
|
||||
"""
|
||||
Order data by uprn and lodgement data
|
||||
No Violation mode needed
|
||||
"""
|
||||
|
||||
if ignore_step:
|
||||
return
|
||||
|
||||
self.data = self.data.sort_values(["UPRN", "LODGEMENT_DATE"], ascending=True)
|
||||
|
||||
def cast_data_column_values_to_lower(self):
|
||||
"""
|
||||
For given columns, cast values to lower
|
||||
No Violation mode or newdata modes required
|
||||
"""
|
||||
convert_to_lower = ["TRANSACTION_TYPE"]
|
||||
for col in convert_to_lower:
|
||||
self.data[col] = self.data[col].str.lower()
|
||||
|
||||
def remap_build_form(self):
|
||||
"""
|
||||
Remap build form to standard values
|
||||
No Violation mode or newdata modes required
|
||||
"""
|
||||
self.data["BUILT_FORM"] = self.data["BUILT_FORM"].replace(BUILT_FORM_REMAP)
|
||||
|
||||
def remap_anomalies(self):
|
||||
"""
|
||||
Remap anomalies to None
|
||||
No Violation mode or newdata modes required
|
||||
"""
|
||||
|
||||
# Map all anomaly values to None
|
||||
data_anomaly_map = dict(
|
||||
zip(
|
||||
Definitions.DATA_ANOMALY_MATCHES,
|
||||
[None] * len(Definitions.DATA_ANOMALY_MATCHES),
|
||||
)
|
||||
)
|
||||
|
||||
# Use replace function to map data (if exists in key), to corresponding value - i.e. Remove invalid values
|
||||
data = self.data.replace(data_anomaly_map)
|
||||
data = data.replace(np.NAN, None)
|
||||
|
||||
self.data = data
|
||||
|
||||
def remap_floor_level(self, ignore_step: bool = False):
|
||||
"""
|
||||
Remap floor level to standard values
|
||||
"""
|
||||
|
||||
if self.violation_mode:
|
||||
# TODO: We need to handle this case
|
||||
return
|
||||
|
||||
if ignore_step:
|
||||
return
|
||||
|
||||
self.data["FLOOR_LEVEL"] = self.data["FLOOR_LEVEL"].replace(FLOOR_LEVEL_MAP)
|
||||
|
||||
def load_data(self, filepath, low_memory=False) -> None:
|
||||
if not filepath:
|
||||
raise ValueError("No filepath specified")
|
||||
self.data = pd.read_csv(self.filepath, low_memory=low_memory)
|
||||
self.data = pd.read_csv(filepath, low_memory=low_memory)
|
||||
|
||||
def insert_data(self, data: pd.DataFrame) -> None:
|
||||
self.data = data
|
||||
|
|
@ -90,11 +279,11 @@ class DataProcessor:
|
|||
return x
|
||||
|
||||
# Next, we check if it's a value in our map
|
||||
if bounds_map.get(x):
|
||||
if construction_age_bounds_map.get(x):
|
||||
return x
|
||||
|
||||
# We check if it's a standard remap value
|
||||
remap_value = remap.get(x, None)
|
||||
remap_value = construction_age_remap.get(x, None)
|
||||
if remap_value:
|
||||
return remap_value
|
||||
|
||||
|
|
@ -105,12 +294,19 @@ class DataProcessor:
|
|||
|
||||
raise NotImplementedError("Not handled the case for value %s" % x)
|
||||
|
||||
def standardise_construction_age_band(self):
|
||||
def standardise_construction_age_band(self, ignore_step: bool = False):
|
||||
"""
|
||||
This function will tidy up some of the non-standard values that are populated in the construction age
|
||||
band, which is useful for cleaning
|
||||
"""
|
||||
|
||||
if self.violation_mode:
|
||||
# TODO: to fill in
|
||||
return
|
||||
|
||||
if ignore_step:
|
||||
return
|
||||
|
||||
self.data["CONSTRUCTION_AGE_BAND"] = self.data["CONSTRUCTION_AGE_BAND"].apply(
|
||||
lambda x: self.clean_construction_age_band(x)
|
||||
)
|
||||
|
|
@ -119,7 +315,7 @@ class DataProcessor:
|
|||
~pd.isnull(self.data["CONSTRUCTION_AGE_BAND"])
|
||||
]
|
||||
|
||||
def clean_missing_rooms(self):
|
||||
def clean_missing_rooms(self, ignore_step: bool = False):
|
||||
"""
|
||||
For the number of heated rooms and number of habitable rooms, we clean these values up front,
|
||||
based on property archetype and age
|
||||
|
|
@ -127,6 +323,14 @@ class DataProcessor:
|
|||
TODO: We could use a model based impution approach for possibly more accurate cleaning
|
||||
"""
|
||||
|
||||
if self.violation_mode:
|
||||
# TODO: to fill in
|
||||
return
|
||||
|
||||
if ignore_step:
|
||||
return
|
||||
|
||||
# TODO: DO we want to move this out of this function? (i.e. alter the data before we do any cleaning)
|
||||
self.data["POSTAL_AREA"] = self.data["POSTCODE"].apply(lambda x: x.split(" ")[0])
|
||||
|
||||
def apply_clean(data, matching_columns):
|
||||
|
|
@ -164,59 +368,78 @@ class DataProcessor:
|
|||
break
|
||||
to_index -= 1
|
||||
|
||||
def pre_process(self) -> pd.DataFrame:
|
||||
"""
|
||||
Load data and begin initial cleaning
|
||||
"""
|
||||
if self.data is None:
|
||||
self.load_data(low_memory=DATA_PROCESSOR_SETTINGS["low_memory"])
|
||||
# def pre_process(self, filepath: Path | None = None) -> tuple[pd.DataFrame, pd.DataFrame]:
|
||||
# """
|
||||
# Load data and begin initial cleaning
|
||||
# """
|
||||
# if self.data is None:
|
||||
# self.load_data(filepath=filepath, low_memory=DATA_PROCESSOR_SETTINGS["low_memory"])
|
||||
|
||||
if not self.newdata:
|
||||
self.confine_data()
|
||||
# if not self.is_newdata:
|
||||
# self.confine_data()
|
||||
|
||||
self.remap_columns()
|
||||
# self.remap_columns()
|
||||
|
||||
# We have some non-standard construction age bands which we'll clean for matching
|
||||
if not self.newdata:
|
||||
self.standardise_construction_age_band()
|
||||
self.clean_missing_rooms()
|
||||
# # We have some non-standard construction age bands which we'll clean for matching
|
||||
# if not self.is_newdata:
|
||||
# self.standardise_construction_age_band()
|
||||
# self.clean_missing_rooms()
|
||||
|
||||
self.recast_df_columns(
|
||||
column_mappings=DATA_PROCESSOR_SETTINGS["column_mappings"]
|
||||
)
|
||||
# self.recast_df_columns(
|
||||
# column_mappings=DATA_PROCESSOR_SETTINGS["column_mappings"]
|
||||
# )
|
||||
|
||||
if not self.newdata:
|
||||
self.clean_multi_glaze_proportion()
|
||||
# if not self.is_newdata:
|
||||
# self.clean_multi_glaze_proportion()
|
||||
|
||||
self.clean_photo_supply()
|
||||
# self.clean_photo_supply()
|
||||
|
||||
if not self.newdata:
|
||||
self.retain_multiple_epc_properties(
|
||||
epc_minimum_count=DATA_PROCESSOR_SETTINGS["epc_minimum_count"]
|
||||
)
|
||||
# if not self.is_newdata:
|
||||
# self.retain_multiple_epc_properties(
|
||||
# epc_minimum_count=DATA_PROCESSOR_SETTINGS["epc_minimum_count"]
|
||||
# )
|
||||
|
||||
if DATA_PROCESSOR_SETTINGS["epc_minimum_count"] >= 1:
|
||||
# If we have multiple EPC records, we can try and do filling
|
||||
self.fill_na_fields()
|
||||
# if DATA_PROCESSOR_SETTINGS["epc_minimum_count"] >= 1:
|
||||
# # If we have multiple EPC records, we can try and do filling
|
||||
# self.fill_na_fields()
|
||||
|
||||
if not self.newdata:
|
||||
self.data = self.data.sort_values(["UPRN", "LODGEMENT_DATE"], ascending=True)
|
||||
# if not self.is_newdata:
|
||||
# self.data = self.data.sort_values(["UPRN", "LODGEMENT_DATE"], ascending=True)
|
||||
|
||||
# Final re-casting after data transformed and prepared
|
||||
coltypes = {k: v for k, v in COLUMNTYPES.items() if k in self.data.columns} if self.newdata else COLUMNTYPES
|
||||
for k, v in coltypes.items():
|
||||
self.data[k] = self.data[k].astype(v)
|
||||
self.data = self.data.astype(coltypes)
|
||||
# # Final re-casting after data transformed and prepared
|
||||
# coltypes = {k: v for k, v in COLUMNTYPES.items() if k in self.data.columns} if self.is_newdata else
|
||||
# COLUMNTYPES
|
||||
# for k, v in coltypes.items():
|
||||
# self.data[k] = self.data[k].astype(v)
|
||||
# self.data = self.data.astype(coltypes)
|
||||
|
||||
self.na_remapping()
|
||||
# self.na_remapping()
|
||||
|
||||
return self.data
|
||||
# self.cleaning_averages = None
|
||||
# if not self.is_newdata:
|
||||
# # We have some odd cases with missing constituency so we fill
|
||||
# self.data = self.data.fillna({"CONSTITUENCY": self.data["CONSTITUENCY"].mode().values[0]})
|
||||
|
||||
def na_remapping(self):
|
||||
# self.cleaning_averages = self.make_cleaning_averages()
|
||||
# # We apply averages cleaning to the data
|
||||
# self.data = self.apply_averages_cleaning(
|
||||
# data_to_clean=self.data,
|
||||
# cleaning_data=self.cleaning_averages,
|
||||
# cols_to_merge_on=COLUMNS_TO_MERGE_ON
|
||||
# )
|
||||
|
||||
# self.cleaning_averages["LOCAL_AUTHORITY"] = self.data["LOCAL_AUTHORITY"].values[0]
|
||||
# self.cleaning_averages.columns = self.cleaning_averages.columns.str.lower()
|
||||
|
||||
# self.data.columns = self.data.columns.str.lower()
|
||||
|
||||
# return self.data, self.cleaning_averages
|
||||
|
||||
def na_remapping(self, auto_subset_columns: bool = False):
|
||||
|
||||
fill_na_map_apply = {
|
||||
k: v for k, v in fill_na_map.items() if k in self.data.columns
|
||||
} if self.newdata else fill_na_map
|
||||
} if auto_subset_columns else fill_na_map
|
||||
|
||||
for column, fill_value in fill_na_map_apply.items():
|
||||
self.data[column] = self.data[column].fillna(fill_value)
|
||||
|
|
@ -243,35 +466,15 @@ class DataProcessor:
|
|||
["FLOOR_HEIGHT", "TOTAL_FLOOR_AREA"]
|
||||
].replace("", None)
|
||||
|
||||
def remap_columns(self):
|
||||
def make_cleaning_averages(self, ignore_step: bool = False) -> pd.DataFrame:
|
||||
"""
|
||||
Remap all columns, for any non values
|
||||
Create a dataset to hold averages based on property type, built form, construction age, and rooms.
|
||||
Not require in newdata mode
|
||||
"""
|
||||
|
||||
# Map all anomaly values to None
|
||||
data_anomaly_map = dict(
|
||||
zip(
|
||||
Definitions.DATA_ANOMALY_MATCHES,
|
||||
[None] * len(Definitions.DATA_ANOMALY_MATCHES),
|
||||
)
|
||||
)
|
||||
if ignore_step:
|
||||
return pd.DataFrame()
|
||||
|
||||
# Use replace function to map data (if exists in key), to corresponding value - i.e. Remove invalid values
|
||||
data = self.data.replace(data_anomaly_map)
|
||||
data = data.replace(np.NAN, None)
|
||||
|
||||
# Remap certain columns
|
||||
if not self.newdata:
|
||||
data["FLOOR_LEVEL"] = data["FLOOR_LEVEL"].replace(FLOOR_LEVEL_MAP)
|
||||
data["BUILT_FORM"] = data["BUILT_FORM"].replace(BUILT_FORM_REMAP)
|
||||
|
||||
convert_to_lower = ["TRANSACTION_TYPE"]
|
||||
for col in convert_to_lower:
|
||||
data[col] = data[col].str.lower()
|
||||
|
||||
self.data = data
|
||||
|
||||
def make_cleaning_averages(self) -> pd.DataFrame:
|
||||
# Define a custom function to calculate the median, excluding missing values
|
||||
def median_without_missing(group):
|
||||
return group[AVERAGE_FIXED_FEATURES].median(skipna=True)
|
||||
|
|
@ -368,13 +571,20 @@ class DataProcessor:
|
|||
# "FLOOR_HEIGHT"
|
||||
# ].fillna(FLOOR_HEIGHT_NATIONAL_AVERAGE)
|
||||
|
||||
return cleaning_averages_filled
|
||||
self.cleaning_averages = cleaning_averages_filled
|
||||
|
||||
def retain_multiple_epc_properties(self, epc_minimum_count: int = 1) -> None:
|
||||
def retain_multiple_epc_properties(self, epc_minimum_count: int = 1, ignore_step: bool = False) -> None:
|
||||
"""
|
||||
Reduce the data futher by keeping only datasets with multiple epcs
|
||||
"""
|
||||
|
||||
if self.violation_mode:
|
||||
# TODO: to fill in
|
||||
return
|
||||
|
||||
if ignore_step:
|
||||
return
|
||||
|
||||
counts = self.data.groupby("UPRN").size().reset_index()
|
||||
counts.columns = ["UPRN", "count"]
|
||||
|
||||
|
|
@ -382,22 +592,81 @@ class DataProcessor:
|
|||
counts = counts[counts["count"] > epc_minimum_count]
|
||||
self.data = pd.merge(self.data, counts, on="UPRN")
|
||||
|
||||
def recast_df_columns(self, column_mappings: dict) -> None:
|
||||
def recast_df_columns(self, column_mappings: dict, auto_subset_columns: bool = False) -> None:
|
||||
"""
|
||||
Recast columns from the dataframe to ensure the behaviour we want
|
||||
"""
|
||||
if auto_subset_columns:
|
||||
column_mappings = {k: v for k, v in column_mappings.items() if k in self.data.columns}
|
||||
|
||||
for key, values in column_mappings.items():
|
||||
if key not in self.data.columns:
|
||||
raise ValueError("Column mapping incorrectly specified")
|
||||
for value in values:
|
||||
self.data[key] = self.data[key].astype(value)
|
||||
if isinstance(values, list):
|
||||
for value in values:
|
||||
self.data[key] = self.data[key].astype(value)
|
||||
else:
|
||||
self.data[key] = self.data[key].astype(values)
|
||||
|
||||
def confine_data(self) -> None:
|
||||
def recast_all_data(self, column_mappings: dict, auto_subset_columns: bool = False) -> None:
|
||||
"""
|
||||
Using a dictionary to recast all columns at once
|
||||
"""
|
||||
|
||||
if auto_subset_columns:
|
||||
column_mappings = {k: v for k, v in column_mappings.items() if k in self.data.columns}
|
||||
|
||||
self.data = self.data.astype(column_mappings)
|
||||
|
||||
def confine_data(self, ignore_step: bool = False):
|
||||
"""
|
||||
Include all step to reduce down the data based on assumptions
|
||||
"""
|
||||
|
||||
if self.violation_mode:
|
||||
violation_uprn_missing = pd.isnull(self.data["UPRN"])
|
||||
violation_old_lodgment_date = self.data["LODGEMENT_DATE"] < EARLIEST_EPC_DATE
|
||||
violation_invalid_transaction_type = self.data["TRANSACTION_TYPE"] == IGNORED_TRANSACTION_TYPES
|
||||
violation_ignored_floor_level = self.data["FLOOR_LEVEL"].isin(IGNORED_FLOOR_LEVELS)
|
||||
violation_rdsap_score_above_max = self.data[RDSAP_RESPONSE] > MAX_SAP_SCORE
|
||||
violation_missing_windows_description = pd.isnull(self.data["WINDOWS_DESCRIPTION"])
|
||||
violation_missing_hotwater_description = pd.isnull(self.data["HOTWATER_DESCRIPTION"])
|
||||
violation_missing_roof_description = pd.isnull(self.data["ROOF_DESCRIPTION"])
|
||||
violation_invalid_property_type = self.data["PROPERTY_TYPE"] == IGNORED_PROPERTY_TYPES
|
||||
violation_invalid_tenure = self.data["TENURE"].isin(IGNORED_TENURES)
|
||||
|
||||
violation_df = pd.concat(
|
||||
[
|
||||
violation_uprn_missing,
|
||||
violation_old_lodgment_date,
|
||||
violation_invalid_transaction_type,
|
||||
violation_ignored_floor_level,
|
||||
violation_rdsap_score_above_max,
|
||||
violation_missing_windows_description,
|
||||
violation_missing_hotwater_description,
|
||||
violation_missing_roof_description,
|
||||
violation_invalid_property_type,
|
||||
violation_invalid_tenure,
|
||||
], axis=1,
|
||||
keys=[
|
||||
"violation_uprn_missing",
|
||||
"violation_old_lodgment_date",
|
||||
"violation_invalid_transaction_type",
|
||||
"violation_ignored_floor_level",
|
||||
"violation_rdsap_score_above_max",
|
||||
"violation_missing_windows_description",
|
||||
"violation_missing_hotwater_description",
|
||||
"violation_missing_roof_description",
|
||||
"violation_invalid_property_type",
|
||||
"violation_invalid_tenure"
|
||||
]
|
||||
)
|
||||
|
||||
self.data = pd.concat([self.data, violation_df], axis=1)
|
||||
|
||||
if ignore_step:
|
||||
return
|
||||
|
||||
# Filter 1: UPRN is a unique identifier for a property, so we remove any EPCs that don't have one
|
||||
|
||||
# Filter 2: Lodgement date is the date the EPC was lodged, so we remove any EPCs that were lodged
|
||||
|
|
@ -416,9 +685,9 @@ class DataProcessor:
|
|||
|
||||
self.data = self.data[~pd.isnull(self.data["UPRN"])]
|
||||
self.data = self.data[self.data["LODGEMENT_DATE"] >= EARLIEST_EPC_DATE]
|
||||
self.data = self.data[self.data["TRANSACTION_TYPE"] != "new dwelling"]
|
||||
self.data = self.data[self.data["TRANSACTION_TYPE"] != IGNORED_TRANSACTION_TYPES]
|
||||
self.data = self.data[
|
||||
~self.data["FLOOR_LEVEL"].isin(["top floor", "mid floor"])
|
||||
~self.data["FLOOR_LEVEL"].isin(IGNORED_FLOOR_LEVELS)
|
||||
]
|
||||
self.data = self.data[self.data[RDSAP_RESPONSE] <= MAX_SAP_SCORE]
|
||||
|
||||
|
|
@ -430,16 +699,30 @@ class DataProcessor:
|
|||
# Because park homes are surveyed unusually (for example, we don't have u-values to
|
||||
# look up for their different components, they need to be collected in survey and aren't reflected in
|
||||
# EPCs) we'll ignore them from the model
|
||||
self.data = self.data[self.data["PROPERTY_TYPE"] != "Park home"]
|
||||
self.data = self.data[self.data["PROPERTY_TYPE"] != IGNORED_PROPERTY_TYPES]
|
||||
|
||||
def clean_multi_glaze_proportion(self) -> None:
|
||||
# We remove EPCs where the tenure is unknown, but is usually an indicator of a new build
|
||||
self.data = self.data[~self.data["TENURE"].isin(IGNORED_TENURES)]
|
||||
|
||||
# We remap zero values to None
|
||||
self.data.loc[self.data['FLOOR_HEIGHT'] == 0, 'FLOOR_HEIGHT'] = None
|
||||
|
||||
def clean_multi_glaze_proportion(self, ignore_step: bool = False) -> None:
|
||||
"""
|
||||
If there is no multi-glaze proportion but the windows are fully glazed, then we should assume a score of 100
|
||||
"""
|
||||
|
||||
if self.violation_mode:
|
||||
# TODO:
|
||||
return
|
||||
|
||||
if ignore_step:
|
||||
return
|
||||
|
||||
no_multi_glaze_proportion_index = pd.isnull(
|
||||
self.data["MULTI_GLAZE_PROPORTION"]
|
||||
) & (self.data["WINDOWS_DESCRIPTION"].isin(FULLY_GLAZED_DESCRIPTIONS))
|
||||
|
||||
self.data.loc[no_multi_glaze_proportion_index, "MULTI_GLAZE_PROPORTION"] = 100
|
||||
|
||||
def clean_photo_supply(self) -> None:
|
||||
|
|
@ -450,7 +733,9 @@ class DataProcessor:
|
|||
self.data["PHOTO_SUPPLY"] = self.data["PHOTO_SUPPLY"].fillna(0)
|
||||
|
||||
@staticmethod
|
||||
def apply_averages_cleaning(data_to_clean, cleaning_data, cols_to_merge_on, colnames=None):
|
||||
def apply_averages_cleaning(
|
||||
data_to_clean, cleaning_data, cols_to_merge_on, colnames=None, ignore_step: bool = False
|
||||
):
|
||||
"""
|
||||
Clean the input DataFrame using averages from a cleaning DataFrame.
|
||||
|
||||
|
|
@ -462,6 +747,9 @@ class DataProcessor:
|
|||
:return: Cleaned DataFrame.
|
||||
"""
|
||||
|
||||
if ignore_step:
|
||||
return None
|
||||
|
||||
# The desired colnames to clean - which may not be present
|
||||
if colnames is None:
|
||||
colnames = ["TOTAL_FLOOR_AREA", "FLOOR_HEIGHT", "FIXED_LIGHTING_OUTLETS_COUNT"]
|
||||
|
|
@ -492,12 +780,16 @@ class DataProcessor:
|
|||
how='left'
|
||||
)
|
||||
|
||||
global_averages = cleaning_data[cols_to_clean].mean()
|
||||
|
||||
# Fill NaN values with averages
|
||||
for col in cols_to_clean:
|
||||
data_to_clean[col].fillna(data_to_clean[f"{col}_AVERAGE"], inplace=True)
|
||||
data_to_clean.drop(columns=[f"{col}_AVERAGE"], inplace=True)
|
||||
# If we still have missings
|
||||
data_to_clean[col].fillna(data_to_clean[col].mean(), inplace=True)
|
||||
# Final step if we still have missings - use global mean
|
||||
data_to_clean[col].fillna(global_averages[col], inplace=True)
|
||||
|
||||
return data_to_clean
|
||||
|
||||
|
|
@ -510,8 +802,8 @@ class DataProcessor:
|
|||
:return: Pandas dataframe containing the subset of columns defined in COMPONENT_FEATURES
|
||||
"""
|
||||
|
||||
if suffix not in ["_STARTING", "_ENDING"]:
|
||||
raise Exception("Suffix should be one of _STARTING or _ENDING")
|
||||
if suffix not in ["_starting", "_ending"]:
|
||||
raise Exception("Suffix should be one of _starting or _ending")
|
||||
|
||||
if suffix == "_STARTING":
|
||||
starting_cols = self.data[STARTING_SUFFIX_COMPONENT_COLS + EFFICIENCY_FEATURES].copy().add_suffix(suffix)
|
||||
|
|
@ -573,6 +865,7 @@ class DataProcessor:
|
|||
|
||||
for col in missings.index:
|
||||
unique_values = df[col].unique()
|
||||
# TODO: confirm this behaviour
|
||||
if True in unique_values or False in unique_values:
|
||||
df[col] = df[col].fillna(False)
|
||||
if "none" in unique_values:
|
||||
|
|
|
|||
836
etl/epc/Dataset.py
Normal file
836
etl/epc/Dataset.py
Normal file
|
|
@ -0,0 +1,836 @@
|
|||
import numpy as np
|
||||
import pandas as pd
|
||||
from typing import List
|
||||
from etl.epc.Record import EPCDifferenceRecord
|
||||
from etl.epc.ValidationConfiguration import DatasetValidationConfiguration
|
||||
from etl.epc.settings import EARLIEST_EPC_DATE
|
||||
|
||||
from recommendations.rdsap_tables import england_wales_age_band_lookup
|
||||
from recommendations.recommendation_utils import (
|
||||
estimate_number_of_floors,
|
||||
get_wall_u_value,
|
||||
get_roof_u_value,
|
||||
get_floor_u_value,
|
||||
estimate_perimeter,
|
||||
get_wall_type,
|
||||
)
|
||||
|
||||
# TODO: Can probably produce this in the property change app and store in S3
|
||||
BOOLEAN_VARIABLES = [
|
||||
"is_cavity_wall",
|
||||
"is_filled_cavity",
|
||||
"is_solid_brick",
|
||||
"is_system_built",
|
||||
"is_timber_frame",
|
||||
"is_granite_or_whinstone",
|
||||
"is_as_built",
|
||||
"is_cob",
|
||||
"is_sandstone_or_limestone",
|
||||
"is_park_home",
|
||||
"external_insulation",
|
||||
"internal_insulation",
|
||||
"is_park_home_ending",
|
||||
"external_insulation_ending",
|
||||
"internal_insulation_ending",
|
||||
"is_to_unheated_space",
|
||||
"is_to_external_air",
|
||||
"is_suspended",
|
||||
"is_solid",
|
||||
"another_property_below",
|
||||
"is_pitched",
|
||||
"is_roof_room",
|
||||
"is_loft",
|
||||
"is_flat",
|
||||
"is_thatched",
|
||||
"is_at_rafters",
|
||||
"has_dwelling_above",
|
||||
"has_radiators",
|
||||
"has_fan_coil_units",
|
||||
"has_pipes_in_screed_above_insulation",
|
||||
"has_pipes_in_insulated_timber_floor",
|
||||
"has_pipes_in_concrete_slab",
|
||||
"has_boiler",
|
||||
"has_air_source_heat_pump",
|
||||
"has_room_heaters",
|
||||
"has_electric_storage_heaters",
|
||||
"has_warm_air",
|
||||
"has_electric_underfloor_heating",
|
||||
"has_electric_ceiling_heating",
|
||||
"has_community_scheme",
|
||||
"has_ground_source_heat_pump",
|
||||
"has_no_system_present",
|
||||
"has_portable_electric_heaters",
|
||||
"has_water_source_heat_pump",
|
||||
"has_electric_heat_pump",
|
||||
"has_micro-cogeneration",
|
||||
"has_solar_assisted_heat_pump",
|
||||
"has_exhaust_source_heat_pump",
|
||||
"has_community_heat_pump",
|
||||
"has_electric",
|
||||
"has_mains_gas",
|
||||
"has_wood_logs",
|
||||
"has_coal",
|
||||
"has_oil",
|
||||
"has_wood_pellets",
|
||||
"has_anthracite",
|
||||
"has_dual_fuel_mineral_and_wood",
|
||||
"has_smokeless_fuel",
|
||||
"has_lpg",
|
||||
"has_b30k",
|
||||
"has_electricaire",
|
||||
"has_assumed_for_most_rooms",
|
||||
"has_underfloor_heating",
|
||||
"has_radiators_ending",
|
||||
"has_fan_coil_units_ending",
|
||||
"has_pipes_in_screed_above_insulation_ending",
|
||||
"has_pipes_in_insulated_timber_floor_ending",
|
||||
"has_pipes_in_concrete_slab_ending",
|
||||
"has_boiler_ending",
|
||||
"has_air_source_heat_pump_ending",
|
||||
"has_room_heaters_ending",
|
||||
"has_electric_storage_heaters_ending",
|
||||
"has_warm_air_ending",
|
||||
"has_electric_underfloor_heating_ending",
|
||||
"has_electric_ceiling_heating_ending",
|
||||
"has_community_scheme_ending",
|
||||
"has_ground_source_heat_pump_ending",
|
||||
"has_no_system_present_ending",
|
||||
"has_portable_electric_heaters_ending",
|
||||
"has_water_source_heat_pump_ending",
|
||||
"has_electric_heat_pump_ending",
|
||||
"has_micro-cogeneration_ending",
|
||||
"has_solar_assisted_heat_pump_ending",
|
||||
"has_exhaust_source_heat_pump_ending",
|
||||
"has_community_heat_pump_ending",
|
||||
"has_electric_ending",
|
||||
"has_mains_gas_ending",
|
||||
"has_wood_logs_ending",
|
||||
"has_coal_ending",
|
||||
"has_oil_ending",
|
||||
"has_wood_pellets_ending",
|
||||
"has_anthracite_ending",
|
||||
"has_dual_fuel_mineral_and_wood_ending",
|
||||
"has_smokeless_fuel_ending",
|
||||
"has_lpg_ending",
|
||||
"has_b30k_ending",
|
||||
"has_electricaire_ending",
|
||||
"has_assumed_for_most_rooms_ending",
|
||||
"has_underfloor_heating_ending",
|
||||
"multiple_room_thermostats",
|
||||
"multiple_room_thermostats_ending",
|
||||
"is_community",
|
||||
"no_individual_heating_or_community_network",
|
||||
"is_community_ending",
|
||||
"no_individual_heating_or_community_network_ending",
|
||||
]
|
||||
|
||||
|
||||
class BaseDataset:
|
||||
"""
|
||||
Base class for all datasets
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.pipeline_steps = {}
|
||||
|
||||
def validate_dataset(self):
|
||||
"""
|
||||
Validate the dataset against the validation configuration
|
||||
"""
|
||||
self.dataset_validation: dict = DatasetValidationConfiguration
|
||||
|
||||
# def pipeline_factory(self, pipeline_type: str) -> dict:
|
||||
# """
|
||||
# Factory method for creating a pipeline
|
||||
# """
|
||||
# if pipeline_type not in self.pipeline_steps:
|
||||
# raise ValueError(f"Pipeline type {pipeline_type} not found")
|
||||
|
||||
# return self.pipeline_steps[pipeline_type]
|
||||
|
||||
|
||||
class TrainingDataset(BaseDataset):
|
||||
"""
|
||||
A collection of EPCDifferenceRecords can be combined into a TrainingDataset.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, datasets: List[EPCDifferenceRecord], cleaned_lookup: dict
|
||||
) -> None:
|
||||
# self.pipeline_steps = self.pipeline_factory("training")
|
||||
self.datasets = datasets
|
||||
self.df = pd.DataFrame([dataset.difference_record for dataset in datasets])
|
||||
|
||||
self._feature_generation()
|
||||
self._drop_features()
|
||||
self._clean_efficiency_variables()
|
||||
self._null_validation(information="Clean Efficiency Variables")
|
||||
self._expand_description_to_features(cleaned_lookup)
|
||||
self._adjust_assumed_values_in_wall_descriptions()
|
||||
self._generate_u_values_from_features()
|
||||
# TODO: For some of the features that we clean, we have either a true, false or possibly null value
|
||||
# Those nulls should be False. clean_missings_after_description_process handles this but shouldn't
|
||||
# need to
|
||||
self._clean_missing_values()
|
||||
self._null_validation(information="Clean Missing Values")
|
||||
self._remove_abnormal_change_in_floor_area()
|
||||
self._ensure_numeric()
|
||||
self._organise_starting_ending_columns()
|
||||
|
||||
def _organise_starting_ending_columns(self):
|
||||
"""
|
||||
Organise the starting and ending columns so that they are next to each other
|
||||
"""
|
||||
no_suffix_cols = [
|
||||
col
|
||||
for col in self.df.columns
|
||||
if "_ending" not in col and "_starting" not in col
|
||||
]
|
||||
starting_cols = [col for col in self.df.columns if "_starting" in col]
|
||||
ending_cols = [col for col in self.df.columns if "_ending" in col]
|
||||
|
||||
common_cols = [
|
||||
col.rsplit("_", 1)[0]
|
||||
for col in starting_cols
|
||||
if col.replace("_starting", "_ending") in ending_cols
|
||||
]
|
||||
only_ending_cols = [
|
||||
col
|
||||
for col in ending_cols
|
||||
if col.replace("_ending", "_starting") not in starting_cols
|
||||
]
|
||||
|
||||
common_cols = [[col + "_starting", col + "_ending"] for col in common_cols]
|
||||
|
||||
self.df = self.df.loc[
|
||||
:,
|
||||
no_suffix_cols
|
||||
+ only_ending_cols
|
||||
+ [col for cols in common_cols for col in cols],
|
||||
]
|
||||
|
||||
def _remove_abnormal_change_in_floor_area(self):
|
||||
"""
|
||||
Remove properties where the change in floor area is greater than 100%
|
||||
"""
|
||||
|
||||
self.df["tfa_diff_abs"] = abs(
|
||||
self.df["total_floor_area_ending"] - self.df["total_floor_area_starting"]
|
||||
)
|
||||
self.df["tfa_diff_prop"] = (
|
||||
self.df["tfa_diff_abs"] / self.df["total_floor_area_starting"]
|
||||
)
|
||||
self.df = self.df[self.df["tfa_diff_prop"] < 0.5]
|
||||
self.df = self.df.drop(columns=["tfa_diff_abs", "tfa_diff_prop"])
|
||||
|
||||
def _ensure_numeric(self):
|
||||
"""
|
||||
Ensure that all columns are numeric
|
||||
"""
|
||||
# TODO: move into EPCRecord record
|
||||
uvalue_columns = [
|
||||
col for col in self.df.columns if "thermal_transmittance" in col
|
||||
]
|
||||
for uvalue_col in uvalue_columns:
|
||||
self.df[uvalue_col] = pd.to_numeric(self.df[uvalue_col])
|
||||
|
||||
@staticmethod
|
||||
def _lambda_function_to_generate_roof_uvalue(row, is_end=False):
|
||||
"""
|
||||
Using the apply method, use the get_roof_u_value method to generate the u-value
|
||||
"""
|
||||
|
||||
col_name = (
|
||||
"roof_insulation_thickness"
|
||||
if not is_end
|
||||
else "roof_insulation_thickness_ending"
|
||||
)
|
||||
|
||||
if row["has_dwelling_above"]:
|
||||
if row["roof_thermal_transmittance"] != 0:
|
||||
raise ValueError("Should have 0 u-value for roof")
|
||||
|
||||
if row["roof_thermal_transmittance_ending"] != 0:
|
||||
raise ValueError("Should have 0 u-value for roof")
|
||||
|
||||
return get_roof_u_value(
|
||||
insulation_thickness=row[col_name],
|
||||
has_dwelling_above=row["has_dwelling_above"],
|
||||
is_loft=row["is_loft"],
|
||||
is_roof_room=row["is_roof_room"],
|
||||
is_thatched=row["is_thatched"],
|
||||
is_flat=row["is_flat"],
|
||||
is_pitched=row["is_pitched"],
|
||||
is_at_rafters=row["is_at_rafters"],
|
||||
age_band=england_wales_age_band_lookup[row["construction_age_band"]],
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _lambda_function_to_generate_wall_uvalue(row, is_end=False):
|
||||
"""
|
||||
Using the apply method, use the get_wall_u_value method to generate the u-value
|
||||
"""
|
||||
description_col_name = (
|
||||
"walls_clean_description"
|
||||
if not is_end
|
||||
else "walls_clean_description_ending"
|
||||
)
|
||||
thermal_transistance_col_name = (
|
||||
"walls_thermal_transmittance"
|
||||
if not is_end
|
||||
else "walls_thermal_transmittance_ending"
|
||||
)
|
||||
|
||||
if pd.isnull(row[thermal_transistance_col_name]):
|
||||
output = get_wall_u_value(
|
||||
clean_description=row[description_col_name],
|
||||
age_band=england_wales_age_band_lookup[row["construction_age_band"]],
|
||||
is_granite_or_whinstone=row["is_granite_or_whinstone"],
|
||||
is_sandstone_or_limestone=row["is_sandstone_or_limestone"],
|
||||
)
|
||||
else:
|
||||
output = row[thermal_transistance_col_name]
|
||||
|
||||
return output
|
||||
|
||||
@staticmethod
|
||||
def _lambda_function_to_generate_floor_uvalue(row, is_end=False):
|
||||
"""
|
||||
Using the apply method, use the get_floor_u_value method to generate the u-value
|
||||
"""
|
||||
|
||||
floor_thermal_col_name = (
|
||||
"floor_thermal_transmittance"
|
||||
if not is_end
|
||||
else "floor_thermal_transmittance_ending"
|
||||
)
|
||||
|
||||
if row["another_property_below"]:
|
||||
if row["floor_thermal_transmittance"] != 0:
|
||||
raise ValueError("Should have 0 u-value for floor")
|
||||
|
||||
if row["floor_thermal_transmittance_ending"] != 0:
|
||||
raise ValueError("Should have 0 u-value for floor")
|
||||
return 0
|
||||
else:
|
||||
uvalue = row[floor_thermal_col_name]
|
||||
|
||||
if pd.isnull(uvalue):
|
||||
insulation_col_name = (
|
||||
"floor_insulation_thickness"
|
||||
if not is_end
|
||||
else "floor_insulation_thickness_ending"
|
||||
)
|
||||
perimeter_col_name = (
|
||||
"estimated_perimeter_starting"
|
||||
if not is_end
|
||||
else "estimated_perimeter_ending"
|
||||
)
|
||||
floor_area_col_name = (
|
||||
"ground_floor_area_starting"
|
||||
if not is_end
|
||||
else "ground_floor_area_ending"
|
||||
)
|
||||
|
||||
uvalue = get_floor_u_value(
|
||||
floor_type=row["floor_type"],
|
||||
perimeter=row[perimeter_col_name],
|
||||
area=row[floor_area_col_name],
|
||||
insulation_thickness=row[insulation_col_name],
|
||||
wall_type=row["wall_type"],
|
||||
age_band=england_wales_age_band_lookup[row["construction_age_band"]],
|
||||
)
|
||||
|
||||
return uvalue
|
||||
|
||||
def _generate_u_values_from_features(self):
|
||||
"""
|
||||
Generate u-values from the features
|
||||
"""
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~
|
||||
# Walls
|
||||
# ~~~~~~~~~~~~~~~~~~
|
||||
|
||||
walls_starting_uvalue = self.df.apply(
|
||||
lambda row: self._lambda_function_to_generate_wall_uvalue(row), axis=1
|
||||
)
|
||||
walls_ending_uvalue = self.df.apply(
|
||||
lambda row: self._lambda_function_to_generate_wall_uvalue(row, is_end=True),
|
||||
axis=1,
|
||||
)
|
||||
|
||||
walls_starting_uvalue = self.df["walls_thermal_transmittance"].fillna(
|
||||
walls_starting_uvalue
|
||||
)
|
||||
walls_starting_equals_ending_flag = (
|
||||
self.df["walls_clean_description"]
|
||||
== self.df["walls_clean_description_ending"]
|
||||
)
|
||||
walls_ending_uvalue[walls_starting_equals_ending_flag] = walls_starting_uvalue[
|
||||
walls_starting_equals_ending_flag
|
||||
]
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~
|
||||
# Roof
|
||||
# ~~~~~~~~~~~~~~~~~~
|
||||
|
||||
roof_starting_uvalue = self.df.apply(
|
||||
lambda row: self._lambda_function_to_generate_roof_uvalue(row), axis=1
|
||||
)
|
||||
roof_ending_uvalue = self.df.apply(
|
||||
lambda row: self._lambda_function_to_generate_roof_uvalue(row, is_end=True),
|
||||
axis=1,
|
||||
)
|
||||
|
||||
roof_starting_uvalue = self.df["roof_thermal_transmittance"].fillna(
|
||||
roof_starting_uvalue
|
||||
)
|
||||
roof_ending_uvalue = self.df["roof_thermal_transmittance_ending"].fillna(
|
||||
roof_ending_uvalue
|
||||
)
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~
|
||||
# Floor
|
||||
# ~~~~~~~~~~~~~~~~~~
|
||||
|
||||
self.df["estimated_number_of_floors"] = self.df.apply(
|
||||
lambda row: estimate_number_of_floors(row["property_type"]), axis=1
|
||||
)
|
||||
|
||||
self.df["ground_floor_area_starting"] = (
|
||||
self.df["total_floor_area_starting"] / self.df["estimated_number_of_floors"]
|
||||
)
|
||||
self.df["ground_floor_area_ending"] = (
|
||||
self.df["total_floor_area_ending"] / self.df["estimated_number_of_floors"]
|
||||
)
|
||||
|
||||
self.df["estimated_perimeter_starting"] = self.df.apply(
|
||||
lambda row: estimate_perimeter(
|
||||
row["ground_floor_area_starting"],
|
||||
row["number_habitable_rooms_starting"]
|
||||
/ row["estimated_number_of_floors"],
|
||||
),
|
||||
axis=1,
|
||||
)
|
||||
self.df["estimated_perimeter_ending"] = self.df.apply(
|
||||
lambda row: estimate_perimeter(
|
||||
row["ground_floor_area_starting"],
|
||||
row["number_habitable_rooms_ending"]
|
||||
/ row["estimated_number_of_floors"],
|
||||
),
|
||||
axis=1,
|
||||
)
|
||||
self.df["floor_type"] = self.df["is_suspended"].replace(
|
||||
{True: "suspended", False: "solid"}
|
||||
)
|
||||
self.df["wall_type"] = self.df.apply(
|
||||
lambda row: get_wall_type(
|
||||
is_cavity_wall=row["is_cavity_wall"],
|
||||
is_solid_brick=row["is_solid_brick"],
|
||||
is_timber_frame=row["is_timber_frame"],
|
||||
is_granite_or_whinstone=row["is_granite_or_whinstone"],
|
||||
is_cob=row["is_cob"],
|
||||
is_sandstone_or_limestone=row["is_sandstone_or_limestone"],
|
||||
is_system_built=row["is_system_built"],
|
||||
is_park_home=row["is_park_home"],
|
||||
),
|
||||
axis=1,
|
||||
)
|
||||
|
||||
floor_starting_uvalue = self.df.apply(
|
||||
lambda row: self._lambda_function_to_generate_floor_uvalue(row), axis=1
|
||||
)
|
||||
floor_ending_uvalue = self.df.apply(
|
||||
lambda row: self._lambda_function_to_generate_floor_uvalue(
|
||||
row, is_end=True
|
||||
),
|
||||
axis=1,
|
||||
)
|
||||
|
||||
floor_starting_uvalue = self.df["floor_thermal_transmittance"].fillna(
|
||||
floor_starting_uvalue
|
||||
)
|
||||
floor_ending_uvalue = self.df["floor_thermal_transmittance_ending"].fillna(
|
||||
floor_ending_uvalue
|
||||
)
|
||||
|
||||
for component in ["walls", "roof", "floor"]:
|
||||
self.df[f"{component}_thermal_transmittance"] = self.df[
|
||||
f"{component}_thermal_transmittance"
|
||||
].fillna(eval(f"{component}_starting_uvalue"))
|
||||
self.df[f"{component}_thermal_transmittance_ending"] = self.df[
|
||||
f"{component}_thermal_transmittance_ending"
|
||||
].fillna(eval(f"{component}_ending_uvalue"))
|
||||
|
||||
self.df = self.df.drop(
|
||||
columns=[
|
||||
"floor_type",
|
||||
"wall_type",
|
||||
"walls_clean_description",
|
||||
"walls_clean_description_ending",
|
||||
"estimated_number_of_floors",
|
||||
"ground_floor_area_starting",
|
||||
"ground_floor_area_ending",
|
||||
]
|
||||
)
|
||||
|
||||
def _adjust_assumed_values_in_wall_descriptions(self):
|
||||
"""
|
||||
Strip out assumed values for all wall descriptions
|
||||
"""
|
||||
for col in ["walls_clean_description", "walls_clean_description_ending"]:
|
||||
self.df[col] = (
|
||||
self.df[col].str.replace("(assumed)", "", regex=False).str.rstrip()
|
||||
)
|
||||
|
||||
def _drop_inconsistent_properties(self, expanded_df: pd.DataFrame, component: str):
|
||||
"""
|
||||
Drop properties that have inconsistent data, i.e. changing material types
|
||||
"""
|
||||
|
||||
if component == "walls":
|
||||
expanded_df = expanded_df[
|
||||
(expanded_df["is_cavity_wall"] == expanded_df["is_cavity_wall_ending"])
|
||||
& (
|
||||
expanded_df["is_solid_brick"]
|
||||
== expanded_df["is_solid_brick_ending"]
|
||||
)
|
||||
& (
|
||||
expanded_df["is_timber_frame"]
|
||||
== expanded_df["is_timber_frame_ending"]
|
||||
)
|
||||
& (
|
||||
expanded_df["is_granite_or_whinstone"]
|
||||
== expanded_df["is_granite_or_whinstone_ending"]
|
||||
)
|
||||
& (expanded_df["is_cob"] == expanded_df["is_cob_ending"])
|
||||
& (
|
||||
expanded_df["is_sandstone_or_limestone"]
|
||||
== expanded_df["is_sandstone_or_limestone_ending"]
|
||||
)
|
||||
]
|
||||
elif component == "floor":
|
||||
expanded_df = expanded_df[
|
||||
(expanded_df["is_suspended"] == expanded_df["is_suspended_ending"])
|
||||
& (expanded_df["is_solid"] == expanded_df["is_solid_ending"])
|
||||
& (
|
||||
expanded_df["another_property_below"]
|
||||
== expanded_df["another_property_below_ending"]
|
||||
)
|
||||
& (
|
||||
expanded_df["is_to_unheated_space"]
|
||||
== expanded_df["is_to_unheated_space_ending"]
|
||||
)
|
||||
& (
|
||||
expanded_df["is_to_external_air"]
|
||||
== expanded_df["is_to_external_air_ending"]
|
||||
)
|
||||
]
|
||||
elif component == "roof":
|
||||
expanded_df = expanded_df[
|
||||
(expanded_df["is_pitched"] == expanded_df["is_pitched_ending"])
|
||||
& (expanded_df["is_roof_room"] == expanded_df["is_roof_room_ending"])
|
||||
& (expanded_df["is_loft"] == expanded_df["is_loft_ending"])
|
||||
& (expanded_df["is_flat"] == expanded_df["is_flat_ending"])
|
||||
& (expanded_df["is_thatched"] == expanded_df["is_thatched_ending"])
|
||||
& (expanded_df["is_at_rafters"] == expanded_df["is_at_rafters_ending"])
|
||||
& (
|
||||
expanded_df["has_dwelling_above"]
|
||||
== expanded_df["has_dwelling_above_ending"]
|
||||
)
|
||||
]
|
||||
|
||||
return expanded_df
|
||||
|
||||
def _expand_description_to_features(self, cleaned_lookup: dict):
|
||||
"""
|
||||
This method will merge on the cleaned lookup table and ensure that the building fabric in the
|
||||
starting and ending EPC is consistent, so ensure that we are performing our modelling on the cleanest
|
||||
possible dataset.
|
||||
# We look for key building fabric features that have changed from one EPC to the next.
|
||||
# if, for example, we see that a home has gone from being a cavity wall to a solid wall, we
|
||||
# remove this record, as it indicates that the quality of the EPC conducted in the first instance
|
||||
# is low
|
||||
# We also replace descriptions with their cleaned variants
|
||||
"""
|
||||
|
||||
cols_to_drop = {
|
||||
"walls": [
|
||||
# We need to cleaned descriptions for pulling out u-values
|
||||
"original_description",
|
||||
"thermal_transmittance_unit",
|
||||
"original_description_ending",
|
||||
"thermal_transmittance_unit_ending",
|
||||
"is_cavity_wall_ending",
|
||||
"is_solid_brick_ending",
|
||||
"is_system_built_ending",
|
||||
"is_timber_frame_ending",
|
||||
"is_granite_or_whinstone_ending",
|
||||
"is_as_built_ending",
|
||||
"is_cob_ending",
|
||||
"is_assumed_ending",
|
||||
"is_sandstone_or_limestone_ending",
|
||||
# Re remove the is_assumed columns
|
||||
"is_assumed",
|
||||
"is_assumed_ending",
|
||||
],
|
||||
"floor": [
|
||||
"original_description",
|
||||
"clean_description",
|
||||
"thermal_transmittance_unit",
|
||||
"no_data",
|
||||
"no_data_ending",
|
||||
"original_description_ending",
|
||||
"clean_description_ending",
|
||||
"thermal_transmittance_unit_ending",
|
||||
"is_suspended_ending",
|
||||
"is_solid_ending",
|
||||
"another_property_below_ending",
|
||||
"is_to_unheated_space_ending",
|
||||
"is_to_external_air_ending",
|
||||
"is_assumed",
|
||||
"is_assumed_ending",
|
||||
],
|
||||
"roof": [
|
||||
"original_description",
|
||||
"clean_description",
|
||||
"thermal_transmittance_unit",
|
||||
"is_assumed",
|
||||
"is_valid",
|
||||
"original_description_ending",
|
||||
"clean_description_ending",
|
||||
"thermal_transmittance_unit_ending",
|
||||
"is_pitched_ending",
|
||||
"is_roof_room_ending",
|
||||
"is_loft_ending",
|
||||
"is_flat_ending",
|
||||
"is_thatched_ending",
|
||||
"has_dwelling_above_ending",
|
||||
"is_assumed_ending",
|
||||
"is_valid_ending",
|
||||
],
|
||||
"hotwater": [
|
||||
"original_description",
|
||||
"clean_description",
|
||||
"assumed",
|
||||
"original_description_ending",
|
||||
"clean_description_ending",
|
||||
"assumed_ending",
|
||||
],
|
||||
"mainheat": [
|
||||
"original_description",
|
||||
"clean_description",
|
||||
"original_description_ending",
|
||||
"has_assumed",
|
||||
"original_description_ending",
|
||||
"clean_description_ending",
|
||||
"has_assumed_ending",
|
||||
],
|
||||
"mainheatcont": [
|
||||
"original_description",
|
||||
"clean_description",
|
||||
"original_description_ending",
|
||||
"clean_description_ending",
|
||||
],
|
||||
"windows": [
|
||||
"original_description",
|
||||
"clean_description",
|
||||
"original_description_ending",
|
||||
"clean_description_ending",
|
||||
# We don't need many of the glazing coverage features because we have the multi_glaze_proportion feature
|
||||
"has_glazing",
|
||||
"glazing_coverage",
|
||||
"no_data",
|
||||
"has_glazing_ending",
|
||||
"glazing_coverage_ending",
|
||||
"no_data_ending",
|
||||
],
|
||||
"main-fuel": [
|
||||
"original_description",
|
||||
"clean_description",
|
||||
"original_description_ending",
|
||||
"clean_description_ending",
|
||||
],
|
||||
}
|
||||
|
||||
components_to_expand = cols_to_drop.keys()
|
||||
|
||||
for component in components_to_expand:
|
||||
# TODO: change cleaned dataframe to have underscores instead of dashes
|
||||
if component == "main-fuel":
|
||||
cleaned_key = "main-fuel"
|
||||
left_on_starting = "main_fuel_starting"
|
||||
left_on_ending = "main_fuel_ending"
|
||||
original_cols = ["main_fuel_starting", "main_fuel_ending"]
|
||||
else:
|
||||
cleaned_key = f"{component}-description"
|
||||
left_on_starting = f"{component}_description_starting"
|
||||
left_on_ending = f"{component}_description_ending"
|
||||
original_cols = [
|
||||
f"{component}_description_starting",
|
||||
f"{component}_description_ending",
|
||||
]
|
||||
|
||||
cleaned_lookup_df_for_key = pd.DataFrame(cleaned_lookup[cleaned_key])
|
||||
|
||||
expanded_df = self.df.merge(
|
||||
cleaned_lookup_df_for_key,
|
||||
how="left",
|
||||
left_on=left_on_starting,
|
||||
right_on="original_description",
|
||||
).merge(
|
||||
cleaned_lookup_df_for_key,
|
||||
how="left",
|
||||
left_on=left_on_ending,
|
||||
right_on="original_description",
|
||||
suffixes=("", "_ending"),
|
||||
)
|
||||
|
||||
# Drop properties where key material types have changed
|
||||
expanded_df = self._drop_inconsistent_properties(expanded_df, component)
|
||||
|
||||
# Drop original cols and cols to drop
|
||||
expanded_df = expanded_df.drop(
|
||||
columns=cols_to_drop[component] + original_cols
|
||||
)
|
||||
|
||||
# Rename columns to component specific names, if they have not been dropped
|
||||
expanded_df = expanded_df.rename(
|
||||
columns={
|
||||
"insulation_thickness": f"{component}_insulation_thickness",
|
||||
"insulation_thickness_ending": f"{component}_insulation_thickness_ending",
|
||||
"thermal_transmittance": f"{component}_thermal_transmittance",
|
||||
"thermal_transmittance_ending": f"{component}_thermal_transmittance_ending",
|
||||
"tariff_type": f"{component}_tariff_type",
|
||||
"tariff_type_ending": f"{component}_tariff_type_ending",
|
||||
"clean_description": f"{component}_clean_description",
|
||||
"clean_description_ending": f"{component}_clean_description_ending",
|
||||
}
|
||||
)
|
||||
self.df = expanded_df
|
||||
|
||||
# We don't need any lighting specific cleaning, we just drop the original description as we use
|
||||
# LOW_ENERGY_LIGHTING_STARTING, LOW_ENERGY_LIGHTING_ENDING
|
||||
self.df = self.df.drop(
|
||||
columns=["lighting_description_starting", "lighting_description_ending"]
|
||||
)
|
||||
|
||||
def _clean_missing_values(self, ignore_cols=None):
|
||||
missings = pd.isnull(self.df).sum()
|
||||
missings = missings[missings > 0]
|
||||
|
||||
if ignore_cols:
|
||||
missings = missings[~missings.index.isin(ignore_cols)]
|
||||
|
||||
for col in missings.index:
|
||||
unique_values = self.df[col].unique()
|
||||
if (
|
||||
(True in unique_values)
|
||||
or (False in unique_values)
|
||||
or (col in BOOLEAN_VARIABLES)
|
||||
):
|
||||
self.df[col] = self.df[col].fillna(False)
|
||||
if "none" in unique_values:
|
||||
self.df[col] = self.df[col].fillna("none")
|
||||
else:
|
||||
self.df[col] = self.df[col].fillna("Unknown")
|
||||
|
||||
def _null_validation(self, information: str):
|
||||
print(f"Null validation after {information}")
|
||||
if pd.isnull(self.df).sum().sum():
|
||||
raise ValueError(f"Null values found in dataset, after step {information}")
|
||||
|
||||
def _drop_features(self):
|
||||
"""
|
||||
Drop features that are not needed for modelling
|
||||
"""
|
||||
self.df = self.df.drop(
|
||||
columns=["lodgement_date_starting", "lodgement_date_ending"]
|
||||
)
|
||||
|
||||
def _feature_generation(self):
|
||||
"""
|
||||
Generate features for modelling
|
||||
"""
|
||||
self.df["days_to_starting"] = self._calculate_days_to(
|
||||
self.df["lodgement_date_starting"]
|
||||
)
|
||||
self.df["days_to_ending"] = self._calculate_days_to(
|
||||
self.df["lodgement_date_ending"]
|
||||
)
|
||||
|
||||
def _clean_efficiency_variables(self):
|
||||
"""
|
||||
These is scope to clean this by the model per corresponding description.
|
||||
E.g. for WALLS_ENG_EFF we could look at the mode efficiency rating by description and
|
||||
fill in the missing values with this.
|
||||
When looking at this initially, there are a large volume of records with missing energy efficiency
|
||||
values and therefore a simpler approach was taken just to test including these variables
|
||||
:param df:
|
||||
:return:
|
||||
"""
|
||||
|
||||
missings = pd.isnull(self.df).sum()
|
||||
missings = missings[missings >= 1]
|
||||
|
||||
if len(missings) == 0:
|
||||
return
|
||||
|
||||
# Make sure they are all efficiency columns
|
||||
if any(~missings.index.str.contains("energy_eff")):
|
||||
raise ValueError("Non efficiency columns are missing")
|
||||
|
||||
for m in missings.index:
|
||||
self.df[m] = self.df[m].fillna("NO_RATING")
|
||||
|
||||
@staticmethod
|
||||
def _calculate_days_to(lodgement_date):
|
||||
if isinstance(lodgement_date, str):
|
||||
return (
|
||||
pd.to_datetime(lodgement_date) - pd.to_datetime(EARLIEST_EPC_DATE)
|
||||
).days
|
||||
|
||||
return (
|
||||
pd.to_datetime(lodgement_date) - pd.to_datetime(EARLIEST_EPC_DATE)
|
||||
).dt.days
|
||||
|
||||
# def __add__(self, other) -> "TrainingDataset":
|
||||
# if not isinstance(other, TrainingDataset):
|
||||
# raise TypeError("Addition can only be performed with another instance of TrainingDataset")
|
||||
# return TrainingDataset(self.datasets + other.datasets)
|
||||
|
||||
# def __radd__(self, other):
|
||||
# """
|
||||
# Required for sum() to work
|
||||
# """
|
||||
# if isinstance(other, int):
|
||||
# return self
|
||||
# else:
|
||||
# return self.__add__(other)
|
||||
|
||||
|
||||
class NewDataset(BaseDataset):
|
||||
"""
|
||||
A collection of EPCDifferenceRecords can be combined into a ScoringDataset.
|
||||
"""
|
||||
|
||||
def __init__(self, datasets: List[EPCDifferenceRecord]) -> None:
|
||||
# self.pipeline_steps = self.pipeline_factory("newdata")
|
||||
self.datasets = datasets
|
||||
|
||||
def __add__(self, other) -> "NewDataset":
|
||||
if not isinstance(other, NewDataset):
|
||||
raise TypeError(
|
||||
"Addition can only be performed with another instance of ScoringDataset"
|
||||
)
|
||||
return NewDataset(self.datasets + other.datasets)
|
||||
|
||||
def __radd__(self, other):
|
||||
"""
|
||||
Required for sum() to work
|
||||
"""
|
||||
if isinstance(other, int):
|
||||
return self
|
||||
else:
|
||||
return self.__add__(other)
|
||||
410
etl/epc/Pipeline.py
Normal file
410
etl/epc/Pipeline.py
Normal file
|
|
@ -0,0 +1,410 @@
|
|||
import msgpack
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
|
||||
from typing import List
|
||||
from pathlib import Path
|
||||
from tqdm import tqdm
|
||||
import multiprocessing as mp
|
||||
|
||||
from etl.epc.DataProcessor import EPCDataProcessor
|
||||
from etl.epc.Record import EPCRecord, EPCDifferenceRecord
|
||||
from etl.epc.Dataset import TrainingDataset
|
||||
from utils.s3 import save_dataframe_to_s3_parquet, read_from_s3
|
||||
from etl.epc.settings import (
|
||||
MANDATORY_FIXED_FEATURES,
|
||||
LATEST_FIELD,
|
||||
COMPONENT_FEATURES,
|
||||
RDSAP_RESPONSE,
|
||||
HEAT_DEMAND_RESPONSE,
|
||||
CARBON_RESPONSE,
|
||||
CORE_COMPONENT_FEATURES,
|
||||
EFFICIENCY_FEATURES,
|
||||
POTENTIAL_COLUMNS,
|
||||
ROOM_FEATURES,
|
||||
)
|
||||
|
||||
# TODO: change in setting file
|
||||
MANDATORY_FIXED_FEATURES = [x.lower() for x in MANDATORY_FIXED_FEATURES]
|
||||
# LATEST_FIELD = [x.lower() for x in LATEST_FIELD if x.lower() not in ROOM_FEATURES]
|
||||
LATEST_FIELD = [x.lower() for x in LATEST_FIELD]
|
||||
COMPONENT_FEATURES = [x.lower() for x in COMPONENT_FEATURES]
|
||||
RDSAP_RESPONSE = RDSAP_RESPONSE.lower()
|
||||
HEAT_DEMAND_RESPONSE = HEAT_DEMAND_RESPONSE.lower()
|
||||
CARBON_RESPONSE = CARBON_RESPONSE.lower()
|
||||
CORE_COMPONENT_FEATURES = [x.lower() for x in CORE_COMPONENT_FEATURES]
|
||||
EFFICIENCY_FEATURES = [x.lower() for x in EFFICIENCY_FEATURES]
|
||||
POTENTIAL_COLUMNS = [x.lower() for x in POTENTIAL_COLUMNS]
|
||||
VARIABLE_DATA_FEATURES = (
|
||||
COMPONENT_FEATURES
|
||||
+ ROOM_FEATURES
|
||||
+ EFFICIENCY_FEATURES
|
||||
+ POTENTIAL_COLUMNS
|
||||
+ ["lodgement_date", RDSAP_RESPONSE, HEAT_DEMAND_RESPONSE, CARBON_RESPONSE]
|
||||
)
|
||||
|
||||
|
||||
def get_cleaned_description_mapping():
|
||||
"""
|
||||
This function will retrieve the cleaned dataset from s3 which has the cleaned
|
||||
descriptions for the epc dataset
|
||||
|
||||
This data is stored in MessagePack format and therefore needs to be decoded
|
||||
:return:
|
||||
"""
|
||||
|
||||
cleaned = read_from_s3(
|
||||
s3_file_name="cleaned_epc_data/cleaned.bson", bucket_name="retrofit-data-dev"
|
||||
)
|
||||
|
||||
cleaned = msgpack.unpackb(cleaned, raw=False)
|
||||
|
||||
return cleaned
|
||||
|
||||
|
||||
clean_lookup = get_cleaned_description_mapping()
|
||||
|
||||
|
||||
class EPCPipeline:
|
||||
"""
|
||||
This class will take a list of directories and process them to create a dataset:
|
||||
- Load the data
|
||||
- Pre-process the data
|
||||
- Create a dataset
|
||||
- Clean the dataset
|
||||
- Store the dataset
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
epc_data_processor: EPCDataProcessor,
|
||||
api_epc_records: dict = None,
|
||||
directories: List[Path] | None = None,
|
||||
run_mode="training",
|
||||
epc_local_file="certificates.csv",
|
||||
epc_bucket_name="retrofit-data-dev",
|
||||
epc_cleaning_dataset_key="sap_change_model/{}/cleaning_dataset_rooms.parquet",
|
||||
epc_all_equal_rows_key="sap_change_model/{}/all_equal_rows_rooms.parquet",
|
||||
epc_compiled_dataset_key="sap_change_model/{}/dataset_rooms.parquet",
|
||||
use_parallel=False,
|
||||
):
|
||||
"""
|
||||
:param directories: List of directories to process
|
||||
:param epc_data_processor: EPCDataProcessor object
|
||||
:param run_mode: Either training or newdata
|
||||
:param epc_local_file: Local file name of the EPC data
|
||||
:param epc_bucket_name: S3 bucket name
|
||||
:param epc_cleaning_dataset_key: S3 key for the cleaning dataset
|
||||
:param epc_all_equal_rows_key: S3 key for the all equal rows dataset
|
||||
:param epc_compiled_dataset_key: S3 key for the compiled dataset
|
||||
"""
|
||||
self.compiled_dataset: pd.DataFrame = pd.DataFrame()
|
||||
self.compiled_all_equal_rows: list = []
|
||||
self.compiled_cleaning_averages: list = []
|
||||
|
||||
self.directories = directories
|
||||
self.epc_data_processor = epc_data_processor
|
||||
self.api_epc_records = api_epc_records
|
||||
self.run_mode = run_mode
|
||||
self.epc_local_file = epc_local_file
|
||||
self.epc_bucket_name = epc_bucket_name
|
||||
|
||||
self.use_parallel = use_parallel
|
||||
self.timeprefix = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
|
||||
|
||||
self.epc_cleaning_dataset_key = epc_cleaning_dataset_key.format(self.timeprefix)
|
||||
self.epc_all_equal_rows_key = epc_all_equal_rows_key.format(self.timeprefix)
|
||||
self.epc_compiled_dataset_key = epc_compiled_dataset_key.format(self.timeprefix)
|
||||
|
||||
def run(self):
|
||||
"""
|
||||
Entrypoint to run the pipeline
|
||||
"""
|
||||
if self.run_mode == "training":
|
||||
self.run_training_dataset_pipeline()
|
||||
elif self.run_mode == "newdata":
|
||||
self.run_newdata_dataset_pipeline()
|
||||
else:
|
||||
raise ValueError("Run mode defined needs to be in 'training' or 'newdata'")
|
||||
|
||||
def run_newdata_dataset_pipeline(self):
|
||||
"""
|
||||
Main function to run the newdata pipeline
|
||||
"""
|
||||
prepared_epc = EPCRecord(
|
||||
self.api_epc_records, run_mode="newdata"
|
||||
) # This uses all the epc records to clean the data
|
||||
|
||||
self.epc_data_processor.insert_data(prepared_epc)
|
||||
self.epc_data_processor.prepare_data()
|
||||
|
||||
data = self.epc_data_processor.data
|
||||
|
||||
epc_records = [
|
||||
EPCRecord(**x, run_mode="newdata") for x in data.to_dict(orient="records")
|
||||
]
|
||||
|
||||
def run_training_dataset_pipeline(self):
|
||||
"""
|
||||
Main function to run the training dataset generation pipeline
|
||||
"""
|
||||
if self.directories is None:
|
||||
raise ValueError(
|
||||
"Directories not specified - Unable to run Training pipeline"
|
||||
)
|
||||
|
||||
if self.use_parallel:
|
||||
self.run_training_dataset_parallel_pipeline()
|
||||
else:
|
||||
for directory in tqdm(self.directories):
|
||||
self.process_directory(directory)
|
||||
|
||||
save_dataframe_to_s3_parquet(
|
||||
df=self.compiled_dataset,
|
||||
bucket_name=self.epc_bucket_name,
|
||||
file_key=self.epc_compiled_dataset_key,
|
||||
)
|
||||
|
||||
save_dataframe_to_s3_parquet(
|
||||
df=pd.DataFrame(self.compiled_all_equal_rows),
|
||||
bucket_name=self.epc_bucket_name,
|
||||
file_key=self.epc_all_equal_rows_key,
|
||||
)
|
||||
|
||||
save_dataframe_to_s3_parquet(
|
||||
df=pd.concat(self.compiled_cleaning_averages),
|
||||
bucket_name=self.epc_bucket_name,
|
||||
file_key=self.epc_cleaning_dataset_key,
|
||||
)
|
||||
|
||||
def run_training_dataset_parallel_pipeline(self):
|
||||
"""
|
||||
Run the training pipeline in parallel
|
||||
"""
|
||||
|
||||
with mp.Pool() as pool:
|
||||
results = list(
|
||||
tqdm(
|
||||
pool.imap(self.process_directory_task, self.directories),
|
||||
total=len(self.directories),
|
||||
),
|
||||
)
|
||||
|
||||
for result in tqdm(results):
|
||||
self.compiled_dataset = pd.concat(
|
||||
[self.compiled_dataset, result["dataset"]]
|
||||
)
|
||||
self.compiled_cleaning_averages.append(result["cleaning_averages"])
|
||||
self.compiled_all_equal_rows.extend(result["all_equal_rows"])
|
||||
|
||||
def process_directory_task(self, directory: str) -> pd.DataFrame:
|
||||
"""
|
||||
Task to enable parallel processing
|
||||
"""
|
||||
|
||||
self.process_directory(directory=directory)
|
||||
|
||||
output = {
|
||||
"dataset": self.compiled_dataset,
|
||||
"cleaning_averages": self.epc_data_processor.cleaning_averages,
|
||||
"all_equal_rows": self.compiled_all_equal_rows,
|
||||
}
|
||||
|
||||
return output
|
||||
|
||||
def process_directory(self, directory: Path):
|
||||
"""
|
||||
Process a single directory
|
||||
:param directory:
|
||||
:return:
|
||||
"""
|
||||
filepath = directory / self.epc_local_file
|
||||
|
||||
self.epc_data_processor.prepare_data(filepath=filepath)
|
||||
|
||||
constituency_data = self.epc_data_processor.data
|
||||
|
||||
self.compiled_cleaning_averages.append(
|
||||
self.epc_data_processor.cleaning_averages
|
||||
)
|
||||
|
||||
constituency_difference_records = []
|
||||
|
||||
for uprn, property_data in constituency_data.groupby("uprn", observed=True):
|
||||
difference_records = self.process_uprn(
|
||||
uprn=str(uprn), property_data=property_data, directory=directory
|
||||
)
|
||||
if difference_records is not None:
|
||||
constituency_difference_records.extend(difference_records)
|
||||
|
||||
constituency_dataset = TrainingDataset(
|
||||
datasets=constituency_difference_records, cleaned_lookup=clean_lookup
|
||||
)
|
||||
|
||||
self.compiled_dataset = pd.concat(
|
||||
[self.compiled_dataset, constituency_dataset.df]
|
||||
)
|
||||
|
||||
def process_uprn(self, uprn: str, property_data: pd.DataFrame, directory: Path):
|
||||
"""
|
||||
Process a single UPRN, which may have multiple different EPCs
|
||||
:param uprn: UPRN
|
||||
:param property_data: pd.DataFrame, Data for a single UPRN
|
||||
:param directory: Path, Directory of the UPRN
|
||||
:return:
|
||||
"""
|
||||
# If a property has changed building type, we can ignore the epc rating i.e. this should be 1 unique row
|
||||
if any(property_data[MANDATORY_FIXED_FEATURES].nunique() > 1) or (
|
||||
pd.isnull(property_data[MANDATORY_FIXED_FEATURES]).sum().sum() > 0
|
||||
):
|
||||
return None
|
||||
|
||||
# Fixed features - these are property attributes that shouldn't change over time
|
||||
# Take the latest row for both the LATEST_FEILDS and MANDATORY FIELDS and combine all fields together
|
||||
fixed_data = (
|
||||
property_data[MANDATORY_FIXED_FEATURES + LATEST_FIELD].iloc[-1].to_dict()
|
||||
)
|
||||
|
||||
# We include the lodgement date here as we probably need to factor time into the
|
||||
# model, since EPC standards and rigour have changed over time
|
||||
variable_data = property_data[VARIABLE_DATA_FEATURES]
|
||||
|
||||
uprn = str(uprn)
|
||||
epc_records = [
|
||||
EPCRecord(uprn, **x, run_mode="training")
|
||||
for x in variable_data.to_dict(orient="records")
|
||||
]
|
||||
|
||||
# TODO: We want to be able to provide value for the u values in the main pipeline so this will need to be part of the EPCRecord
|
||||
|
||||
# We can use multiple types of comparison datasets - i.e. Compare consecutive records, or compare all permutations of records
|
||||
property_difference_records = self._generate_property_difference_records(
|
||||
epc_records, uprn, directory, fixed_data
|
||||
)
|
||||
|
||||
return property_difference_records
|
||||
|
||||
def _generate_property_difference_records(
|
||||
self, epc_records: List[EPCRecord], uprn: str, directory: Path, fixed_data: dict
|
||||
):
|
||||
"""
|
||||
We can use multiple types of comparison datasets, for example:
|
||||
- First vs second
|
||||
- Second vs third
|
||||
- First vs third
|
||||
:param epc_records:
|
||||
:return:
|
||||
"""
|
||||
|
||||
property_difference_records: list = []
|
||||
|
||||
# property_difference_records = self._compare_consecutive_epcs(epc_records, uprn, directory, fixed_data, property_difference_records)
|
||||
|
||||
property_difference_records = self._compare_all_permutation_epcs(
|
||||
epc_records, uprn, directory, fixed_data, property_difference_records
|
||||
)
|
||||
|
||||
return property_difference_records
|
||||
|
||||
def _compare_all_permutation_epcs(
|
||||
self,
|
||||
epc_records: List[EPCRecord],
|
||||
uprn: str,
|
||||
directory: Path,
|
||||
fixed_data: dict,
|
||||
property_difference_records: list,
|
||||
):
|
||||
"""
|
||||
Compare all permutations of EPCs for a given UPRN
|
||||
:param epc_records:
|
||||
:return:
|
||||
"""
|
||||
|
||||
for idx in range(0, len(epc_records) - 1):
|
||||
for idx2 in range(idx + 1, len(epc_records)):
|
||||
earliest_record: EPCRecord = epc_records[idx]
|
||||
latest_record: EPCRecord = epc_records[idx2]
|
||||
|
||||
# Auto sort the records so that the record with highest RDSAP score is always record1
|
||||
difference_record: EPCDifferenceRecord = (
|
||||
latest_record.create_EPCDifferenceRecord(
|
||||
other=earliest_record, fixed_data=fixed_data
|
||||
)
|
||||
)
|
||||
# difference_record: EPCDifferenceRecord = latest_record - earliest_record
|
||||
# # TODO: Use method above instead of overloading operator
|
||||
# difference_record.append_fixed_data(fixed_data)
|
||||
|
||||
# TODO: Pull out RDSAP_CHANGE to a variable
|
||||
if difference_record.get("rdsap_change") == 0:
|
||||
if not difference_record.ensure_adequate_data():
|
||||
# Rdsap hasn't changed but we have enough data to use this record
|
||||
# i.e. all fields aside from mechnical ventilation are the same]
|
||||
# self.check_records.append({"uprn": uprn, "directory_name": directory.name, "difference_record": difference_record, "earliest_record": earliest_record, "latest_record": latest_record})
|
||||
continue
|
||||
|
||||
all_equal = difference_record.compare_fields_in_records(
|
||||
fields=[x.lower() for x in CORE_COMPONENT_FEATURES]
|
||||
)
|
||||
|
||||
if all_equal:
|
||||
# Keep track of this for the moment so we can analyse
|
||||
self.compiled_all_equal_rows.append(
|
||||
{"uprn": uprn, "directory_name": directory.name}
|
||||
)
|
||||
continue
|
||||
|
||||
property_difference_records.append(difference_record)
|
||||
|
||||
return property_difference_records
|
||||
|
||||
def _compare_consecutive_epcs(
|
||||
self,
|
||||
epc_records: List[EPCRecord],
|
||||
uprn: str,
|
||||
directory: Path,
|
||||
fixed_data: dict,
|
||||
property_difference_records: list,
|
||||
):
|
||||
"""
|
||||
Compare consecutive EPCs for a given UPRN
|
||||
:param epc_records:
|
||||
:return:
|
||||
"""
|
||||
|
||||
for idx in range(0, len(epc_records) - 1):
|
||||
if idx >= len(epc_records) - 1:
|
||||
break
|
||||
|
||||
earliest_record: EPCRecord = epc_records[idx]
|
||||
latest_record: EPCRecord = epc_records[idx + 1]
|
||||
|
||||
# Auto sort the records so that the record with highest RDSAP score is always record1
|
||||
difference_record: EPCDifferenceRecord = latest_record - earliest_record
|
||||
# TODO: Use method above instead of overloading operator
|
||||
difference_record.append_fixed_data(fixed_data)
|
||||
|
||||
# TODO: Pull out RDSAP_CHANGE to a variable
|
||||
if difference_record.get("rdsap_change") == 0:
|
||||
if not difference_record.ensure_adequate_data():
|
||||
# Rdsap hasn't changed but we have enough data to use this record
|
||||
# i.e. all fields aside from mechnical ventilation are the same]
|
||||
# self.check_records.append({"uprn": uprn, "directory_name": directory.name, "difference_record": difference_record, "earliest_record": earliest_record, "latest_record": latest_record})
|
||||
continue
|
||||
|
||||
all_equal = difference_record.compare_fields_in_records(
|
||||
fields=[x.lower() for x in CORE_COMPONENT_FEATURES]
|
||||
)
|
||||
|
||||
if all_equal:
|
||||
# Keep track of this for the moment so we can analyse
|
||||
self.compiled_all_equal_rows.append(
|
||||
{"uprn": uprn, "directory_name": directory.name}
|
||||
)
|
||||
continue
|
||||
|
||||
# difference_record.append_fixed_data(fixed_data)
|
||||
|
||||
property_difference_records.append(difference_record)
|
||||
|
||||
return property_difference_records
|
||||
1167
etl/epc/Record.py
Normal file
1167
etl/epc/Record.py
Normal file
File diff suppressed because it is too large
Load diff
61
etl/epc/ValidationConfiguration.py
Normal file
61
etl/epc/ValidationConfiguration.py
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
"""
|
||||
Specify the validation rules for each field in the differents record.
|
||||
"""
|
||||
|
||||
def validate_walls_description(value):
|
||||
if value not in ["Cavity", "Solid", "System built", "Timber frame", "Suspended timber", "Other"]:
|
||||
raise ValueError("Walls description is not valid")
|
||||
|
||||
EPCRecordValidationConfiguration = {
|
||||
"WALLS_DESCRIPTION": {
|
||||
"type": "string",
|
||||
"acceptable_values": ["Cavity", "Solid", "System built", "Timber frame", "Suspended timber", "Other"],
|
||||
"function": validate_walls_description
|
||||
},
|
||||
"FLOOR_DESCRIPTION": {
|
||||
"type": "string",
|
||||
"acceptable_values": ["Solid", "Suspended", "Other"]
|
||||
},
|
||||
"ENERGY_CONSUMPTION_CURRENT": {
|
||||
"type": "float",
|
||||
"range": [0, 100]
|
||||
}
|
||||
}
|
||||
|
||||
EPCDifferenceRecordValidationConfiguration = {
|
||||
}
|
||||
|
||||
EPCDifferenceRecordFixedDataValidationConfiguration = {
|
||||
"PROPERTY_TYPE": {
|
||||
"type": "string",
|
||||
"acceptable_values": ["House", "Flat", "Bungalow", "Maisonette", "Park home", "Other"]
|
||||
},
|
||||
"BUILT_FORM": {
|
||||
"type": "string",
|
||||
"acceptable_values": ["Detached", "Semi-Detached", "End-Terrace", "Mid-Terrace", "Enclosed Mid-Terrace", "Enclosed End-Terrace", "Enclosed Detached", "Not applicable"]
|
||||
},
|
||||
"CONSITUENCY": {
|
||||
"type": "string",
|
||||
"acceptable_values": ["England", "Wales", "Scotland", "Northern Ireland"]
|
||||
},
|
||||
"NUMBER_HABITABLE_ROOMS": {
|
||||
"type": "integer",
|
||||
"range": [0, 100]
|
||||
},
|
||||
"NUMBER_HEATED_ROOMS": {
|
||||
"type": "integer",
|
||||
"range": [0, 100]
|
||||
},
|
||||
"FIXED_LIGHTING_OUTLETS_COUNT": {
|
||||
"type": "integer",
|
||||
"range": [0, 100]
|
||||
},
|
||||
"CONSTRUCTION_AGE_BAND": {
|
||||
"type": "string",
|
||||
"acceptable_values": []
|
||||
}
|
||||
}
|
||||
|
||||
DatasetValidationConfiguration = {
|
||||
|
||||
}
|
||||
289
etl/epc/generate_scenarios_data.py
Normal file
289
etl/epc/generate_scenarios_data.py
Normal file
|
|
@ -0,0 +1,289 @@
|
|||
from datetime import datetime
|
||||
import itertools
|
||||
|
||||
import pandas as pd
|
||||
from etl.epc.Record import EPCRecord
|
||||
from backend.SearchEpc import SearchEpc
|
||||
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
from backend.app.config import get_settings
|
||||
from backend.app.db.connection import db_engine
|
||||
from backend.app.db.functions.materials_functions import get_materials
|
||||
|
||||
from backend.app.plan.utils import get_cleaned
|
||||
|
||||
from backend.Property import Property
|
||||
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
|
||||
|
||||
from recommendations.Recommendations import Recommendations
|
||||
from utils.logger import setup_logger
|
||||
from utils.s3 import read_dataframe_from_s3_parquet, save_dataframe_to_s3_parquet
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
now = datetime.now().strftime("%d-%m-%Y-%H-%M-%S")
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
logger.info("Connecting to db")
|
||||
session = sessionmaker(bind=db_engine)()
|
||||
created_at = datetime.now().isoformat()
|
||||
|
||||
session.begin()
|
||||
logger.info("Getting the inputs")
|
||||
|
||||
cleaning_data = read_dataframe_from_s3_parquet(
|
||||
bucket_name=get_settings().DATA_BUCKET,
|
||||
file_key="sap_change_model/cleaning_dataset.parquet",
|
||||
)
|
||||
|
||||
materials = get_materials(session)
|
||||
cleaned = get_cleaned()
|
||||
|
||||
uprn_filenames = read_dataframe_from_s3_parquet(
|
||||
bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
|
||||
)
|
||||
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(
|
||||
bucket=get_settings().DATA_BUCKET
|
||||
)
|
||||
|
||||
scenario_properties = [
|
||||
{
|
||||
"address": "2 South Terrace",
|
||||
"postcode": "NN1 5JY",
|
||||
"lmk-key": "1459796789102016070507274146560098",
|
||||
"measures": [
|
||||
[
|
||||
["internal_wall_insulation"],
|
||||
"11",
|
||||
{"walls_insulation_thickness_ending": "average"},
|
||||
[0],
|
||||
],
|
||||
[
|
||||
["external_wall_insulation"],
|
||||
"10",
|
||||
{"walls_insulation_thickness_ending": "average"},
|
||||
[0],
|
||||
],
|
||||
[["solar", "windows"], "15", {"photo_supply_ending": 50}, [0, 1]],
|
||||
],
|
||||
},
|
||||
{
|
||||
"address": "8 Lindlings",
|
||||
"postcode": "HP1 2HA",
|
||||
"lmk-key": "c14029235739827d5f627dc8aa9bb567d026b267e851e0db0001db24638667b1",
|
||||
"measures": [
|
||||
[
|
||||
["cavity_wall_insulation", "loft_insulation"],
|
||||
"15",
|
||||
{"walls_insulation_thickness_ending": "average"},
|
||||
[0, 1],
|
||||
],
|
||||
],
|
||||
},
|
||||
{
|
||||
"address": "44 Lindlings",
|
||||
"postcode": "HP1 2HE",
|
||||
"lmk-key": "99296a6dda21314fef3a61cda59e441e9a2aacf115eb96f4a0fa85696bf7b117",
|
||||
"measures": [
|
||||
[
|
||||
["cavity_wall_insulation", "loft_insulation"],
|
||||
"15",
|
||||
{"walls_insulation_thickness_ending": "average"},
|
||||
[0, 1],
|
||||
],
|
||||
],
|
||||
},
|
||||
{
|
||||
"address": "46 Chaulden Terrace",
|
||||
"postcode": "HP1 2AN",
|
||||
"lmk-key": "d1e0534be3a44c33003323b21d0e322e3daddc65b5ee71936f89c59ddab96b50",
|
||||
"measures": [
|
||||
[
|
||||
["cavity_wall_insulation", "loft_insulation"],
|
||||
"15",
|
||||
{"walls_insulation_thickness_ending": "average"},
|
||||
[0, 1],
|
||||
],
|
||||
],
|
||||
},
|
||||
{
|
||||
"address": "73 Long Chaulden",
|
||||
"postcode": "HP1 2HX",
|
||||
"lmk-key": "1eae354db522a95188018d9cd0502ed8c609910b6c88f8797d3a25f59b11770a",
|
||||
"measures": [
|
||||
[
|
||||
["cavity_wall_insulation", "loft_insulation"],
|
||||
"15",
|
||||
{"walls_insulation_thickness_ending": "average"},
|
||||
[0, 1],
|
||||
],
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
recommendations_scoring_data = []
|
||||
|
||||
for scenario_property in scenario_properties:
|
||||
# We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
|
||||
|
||||
epc_searcher = SearchEpc(
|
||||
address1=scenario_property["address"],
|
||||
postcode=scenario_property["postcode"],
|
||||
auth_token=get_settings().EPC_AUTH_TOKEN,
|
||||
os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY,
|
||||
)
|
||||
epc_searcher.find_property()
|
||||
|
||||
# Find the epc with the same LMK key
|
||||
all_epcs = epc_searcher.older_epcs.copy()
|
||||
all_epcs.extend([epc_searcher.newest_epc, epc_searcher.full_sap_epc])
|
||||
original_epc = [
|
||||
epc
|
||||
for epc in all_epcs
|
||||
if epc.get("lmk-key", None) == scenario_property.get("lmk-key")
|
||||
][0]
|
||||
|
||||
epc_records = {
|
||||
"original_epc": original_epc,
|
||||
"full_sap_epc": {},
|
||||
"old_data": [],
|
||||
}
|
||||
|
||||
prepared_epc = EPCRecord(
|
||||
epc_records=epc_records, run_mode="newdata", cleaning_data=cleaning_data
|
||||
)
|
||||
|
||||
p = Property(
|
||||
id=prepared_epc.uprn,
|
||||
address=epc_searcher.address_clean,
|
||||
postcode=epc_searcher.postcode_clean,
|
||||
epc_record=prepared_epc,
|
||||
)
|
||||
|
||||
p.get_spatial_data(uprn_filenames)
|
||||
p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
|
||||
|
||||
recommender = Recommendations(property_instance=p, materials=materials)
|
||||
property_recommendations = recommender.recommend("0")
|
||||
|
||||
wall_recommendations = recommender.wall_recomender.recommendations
|
||||
loft_recommendations = recommender.roof_recommender.recommendations
|
||||
solar_recommendations = recommender.solar_recommender.recommendation
|
||||
windows_recommendations = recommender.windows_recommender.recommendation
|
||||
|
||||
p.create_base_difference_epc_record(cleaned_lookup=cleaned)
|
||||
|
||||
scoring_list = []
|
||||
|
||||
# Create the record for each of the different measures
|
||||
for measure_impact_override in scenario_property["measures"]:
|
||||
|
||||
measure = measure_impact_override[0]
|
||||
impact = measure_impact_override[1]
|
||||
override = measure_impact_override[2]
|
||||
|
||||
wall_recs = []
|
||||
loft_recs = []
|
||||
solar_recs = []
|
||||
windows_recs = []
|
||||
|
||||
if "internal_wall_insulation" in measure:
|
||||
for rec in wall_recommendations:
|
||||
if rec["type"] == "internal_wall_insulation":
|
||||
wall_recs.append(rec)
|
||||
|
||||
if "external_wall_insulation" in measure:
|
||||
for rec in wall_recommendations:
|
||||
if rec["type"] == "external_wall_insulation":
|
||||
wall_recs.append(rec)
|
||||
|
||||
if "cavity_wall_insulation" in measure:
|
||||
for rec in wall_recommendations:
|
||||
if rec["type"] == "cavity_wall_insulation":
|
||||
wall_recs.append(rec)
|
||||
|
||||
if "loft_insulation" in measure:
|
||||
loft_recs = []
|
||||
for rec in loft_recommendations:
|
||||
if rec["type"] == "loft_insulation":
|
||||
loft_recs.append(rec)
|
||||
|
||||
if "solar" in measure:
|
||||
for rec in solar_recommendations:
|
||||
if rec["type"] == "solar_pv":
|
||||
solar_recs.append(rec)
|
||||
|
||||
if "windows" in measure:
|
||||
for rec in windows_recommendations:
|
||||
if rec["type"] == "windows_glazing":
|
||||
windows_recs.append(rec)
|
||||
|
||||
combi_list = [wall_recs, loft_recs, solar_recs, windows_recs]
|
||||
combi_list = [element for element in combi_list if len(element) != 0]
|
||||
|
||||
all_combi_recommendations = list(itertools.product(*combi_list))
|
||||
|
||||
for i, combi in enumerate(all_combi_recommendations):
|
||||
recommendation_record = p.base_difference_record.df.to_dict("records")[
|
||||
0
|
||||
].copy()
|
||||
recommendation_record = p.create_recommendation_scoring_data(
|
||||
property_id=i,
|
||||
primary_recommendation_id=i,
|
||||
recommendation_record=recommendation_record,
|
||||
recommendations=combi,
|
||||
)
|
||||
|
||||
if override is not None:
|
||||
for key, value in override.items():
|
||||
recommendation_record[key] = value
|
||||
|
||||
recommendation_record["id"] = "&".join(measure) + "+" + str(i)
|
||||
recommendation_record["impact"] = impact
|
||||
scoring_list.append(recommendation_record)
|
||||
|
||||
recommendations_scoring_data.extend(scoring_list)
|
||||
|
||||
recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
|
||||
recommendations_scoring_data["impact"] = recommendations_scoring_data["impact"].astype(
|
||||
int
|
||||
)
|
||||
recommendations_scoring_data = recommendations_scoring_data.drop(
|
||||
columns=[
|
||||
"rdsap_change",
|
||||
"heat_demand_change",
|
||||
"carbon_change",
|
||||
"sap_ending",
|
||||
"heat_demand_ending",
|
||||
"carbon_ending",
|
||||
]
|
||||
)
|
||||
|
||||
impact_col = recommendations_scoring_data.pop("impact")
|
||||
recommendations_scoring_data.insert(0, "impact", impact_col)
|
||||
|
||||
id_col = recommendations_scoring_data.pop("id")
|
||||
recommendations_scoring_data.insert(0, "id", id_col)
|
||||
|
||||
from backend.ml_models.api import ModelApi
|
||||
|
||||
model_api = ModelApi(portfolio_id="generate-scenarios-data", timestamp=created_at)
|
||||
|
||||
all_predictions = model_api.predict_all(
|
||||
df=recommendations_scoring_data,
|
||||
bucket=get_settings().DATA_BUCKET,
|
||||
prediction_buckets={
|
||||
"sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET,
|
||||
"heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET,
|
||||
"carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET,
|
||||
},
|
||||
)
|
||||
|
||||
save_dataframe_to_s3_parquet(
|
||||
recommendations_scoring_data,
|
||||
"retrofit-data-dev",
|
||||
f"scenario_data/{now}/recommendations_scoring_data.parquet",
|
||||
)
|
||||
|
|
@ -1,636 +1,39 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
import msgpack
|
||||
|
||||
from pathlib import Path
|
||||
from etl.epc.settings import (
|
||||
MANDATORY_FIXED_FEATURES,
|
||||
LATEST_FIELD,
|
||||
COMPONENT_FEATURES,
|
||||
RDSAP_RESPONSE,
|
||||
HEAT_DEMAND_RESPONSE,
|
||||
COLUMNS_TO_MERGE_ON,
|
||||
CARBON_RESPONSE,
|
||||
CORE_COMPONENT_FEATURES,
|
||||
EFFICIENCY_FEATURES,
|
||||
POTENTIAL_COLUMNS,
|
||||
MINIMUM_FLOOR_HEIGHT
|
||||
)
|
||||
from etl.epc.DataProcessor import DataProcessor
|
||||
from utils.s3 import save_dataframe_to_s3_parquet, read_from_s3
|
||||
from recommendations.rdsap_tables import england_wales_age_band_lookup
|
||||
from recommendations.recommendation_utils import (
|
||||
get_wall_u_value, get_roof_u_value, get_floor_u_value, estimate_perimeter,
|
||||
get_wall_type
|
||||
)
|
||||
from etl.epc.DataProcessor import EPCDataProcessor
|
||||
from etl.epc.Pipeline import EPCPipeline
|
||||
|
||||
DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"
|
||||
|
||||
|
||||
def get_cleaned():
|
||||
def main():
|
||||
"""
|
||||
This function will retrieve the cleaned dataset from s3 which has the cleaned
|
||||
descriptions for the epc dataset
|
||||
|
||||
This data is stored in MessagePack format and therefore needs to be decoded
|
||||
:return:
|
||||
Orchestration function
|
||||
"""
|
||||
|
||||
cleaned = read_from_s3(
|
||||
s3_file_name="cleaned_epc_data/cleaned.bson",
|
||||
bucket_name="retrofit-data-dev"
|
||||
)
|
||||
|
||||
cleaned = msgpack.unpackb(cleaned, raw=False)
|
||||
|
||||
return cleaned
|
||||
|
||||
|
||||
def process_and_prune_desriptions(df, cleaned_lookup):
|
||||
"""
|
||||
This method will merge on the cleaned lookup table and ensure that the building fabric in the
|
||||
starting and ending EPC is consistent, so ensure that we are performing our modelling on the cleanest
|
||||
possible dataset.
|
||||
:param df:
|
||||
:param cleaned_lookup:
|
||||
:return:
|
||||
"""
|
||||
|
||||
cols_to_drop = {
|
||||
"walls": [
|
||||
# We need to cleaned descriptions for pulling out u-values
|
||||
'original_description', 'thermal_transmittance_unit',
|
||||
'original_description_ENDING',
|
||||
'thermal_transmittance_unit_ENDING',
|
||||
'is_cavity_wall_ENDING', 'is_filled_cavity_ENDING',
|
||||
'is_solid_brick_ENDING', 'is_system_built_ENDING',
|
||||
'is_timber_frame_ENDING', 'is_granite_or_whinstone_ENDING',
|
||||
'is_as_built_ENDING', 'is_cob_ENDING', 'is_assumed_ENDING',
|
||||
'is_sandstone_or_limestone_ENDING',
|
||||
# Re remove the is_assumed columns
|
||||
"is_assumed", "is_assumed_ENDING"
|
||||
],
|
||||
"floor": [
|
||||
"original_description", "clean_description", "thermal_transmittance_unit",
|
||||
"no_data", "no_data_ENDING", "original_description_ENDING",
|
||||
"clean_description_ENDING", "thermal_transmittance_unit_ENDING",
|
||||
"is_suspended_ENDING", "is_solid_ENDING", "another_property_below_ENDING",
|
||||
"is_to_unheated_space_ENDING", "is_to_external_air_ENDING", "is_assumed",
|
||||
"is_assumed_ENDING"
|
||||
],
|
||||
"roof": [
|
||||
"original_description", "clean_description", "thermal_transmittance_unit",
|
||||
"is_assumed", "is_valid", "original_description_ENDING", "clean_description_ENDING",
|
||||
"thermal_transmittance_unit_ENDING", "is_pitched_ENDING", "is_roof_room_ENDING",
|
||||
"is_loft_ENDING", "is_flat_ENDING", "is_thatched_ENDING", "is_at_rafters_ENDING",
|
||||
"has_dwelling_above_ENDING", "is_assumed_ENDING", "is_valid_ENDING"
|
||||
],
|
||||
"hotwater": [
|
||||
"original_description", "clean_description", "assumed", "original_description_ENDING",
|
||||
"clean_description_ENDING", "assumed_ENDING"
|
||||
],
|
||||
"mainheat": [
|
||||
"original_description", "clean_description", "original_description_ENDING",
|
||||
"has_assumed", "original_description_ENDING", "clean_description_ENDING",
|
||||
"has_assumed_ENDING",
|
||||
],
|
||||
"mainheatcont": [
|
||||
"original_description", "clean_description", "original_description_ENDING", "clean_description_ENDING"
|
||||
],
|
||||
"windows": [
|
||||
"original_description", "clean_description", "original_description_ENDING", "clean_description_ENDING",
|
||||
# We don't need many of the glazing coverage features because we have the multi_glaze_proportion feature
|
||||
"has_glazing", "glazing_coverage", "no_data", "has_glazing_ENDING", "glazing_coverage_ENDING",
|
||||
"no_data_ENDING"
|
||||
],
|
||||
"main-fuel": [
|
||||
"original_description", "clean_description", "original_description_ENDING", "clean_description_ENDING"
|
||||
],
|
||||
}
|
||||
|
||||
for component in ["walls", "floor", "roof", "hotwater", "mainheat", "mainheatcont", "windows", "main-fuel"]:
|
||||
component_upper = component.upper()
|
||||
if component == "main-fuel":
|
||||
component_upper = component_upper.replace("-", "_")
|
||||
|
||||
cleaned_key = "main-fuel" if component == "main-fuel" else f"{component}-description"
|
||||
left_on_starting = (
|
||||
f"{component_upper}_STARTING" if component == "main-fuel" else f"{component_upper}_DESCRIPTION_STARTING"
|
||||
)
|
||||
|
||||
left_on_ending = (
|
||||
f"{component_upper}_ENDING" if component == "main-fuel" else f"{component_upper}_DESCRIPTION_ENDING"
|
||||
)
|
||||
|
||||
df = df.merge(
|
||||
pd.DataFrame(cleaned_lookup[cleaned_key]),
|
||||
how="left",
|
||||
left_on=left_on_starting,
|
||||
right_on="original_description",
|
||||
).merge(
|
||||
pd.DataFrame(cleaned_lookup[cleaned_key]),
|
||||
how="left",
|
||||
left_on=left_on_ending,
|
||||
right_on="original_description",
|
||||
suffixes=("", "_ENDING")
|
||||
)
|
||||
|
||||
if component == "walls":
|
||||
# We make sure the wall construction hasn't changed
|
||||
df = df[
|
||||
(df["is_cavity_wall"] == df["is_cavity_wall_ENDING"]) &
|
||||
(df["is_solid_brick"] == df["is_solid_brick_ENDING"]) &
|
||||
(df["is_timber_frame"] == df["is_timber_frame_ENDING"]) &
|
||||
(df["is_granite_or_whinstone"] == df["is_granite_or_whinstone_ENDING"]) &
|
||||
(df["is_cob"] == df["is_cob_ENDING"]) &
|
||||
(df["is_sandstone_or_limestone"] == df["is_sandstone_or_limestone_ENDING"])
|
||||
]
|
||||
elif component == "floor":
|
||||
df = df[
|
||||
(df["is_suspended"] == df["is_suspended_ENDING"]) &
|
||||
(df["is_solid"] == df["is_solid_ENDING"]) &
|
||||
(df["another_property_below"] == df["another_property_below_ENDING"]) &
|
||||
(df["is_to_unheated_space"] == df["is_to_unheated_space_ENDING"]) &
|
||||
(df["is_to_external_air"] == df["is_to_external_air_ENDING"])
|
||||
]
|
||||
elif component == "roof":
|
||||
df = df[
|
||||
(df["is_pitched"] == df["is_pitched_ENDING"]) &
|
||||
(df["is_roof_room"] == df["is_roof_room_ENDING"]) &
|
||||
(df["is_loft"] == df["is_loft_ENDING"]) &
|
||||
(df["is_flat"] == df["is_flat_ENDING"]) &
|
||||
(df["is_thatched"] == df["is_thatched_ENDING"]) &
|
||||
(df["is_at_rafters"] == df["is_at_rafters_ENDING"]) &
|
||||
(df["has_dwelling_above"] == df["has_dwelling_above_ENDING"])
|
||||
]
|
||||
|
||||
# Drop the binary indicators and replace the original description with the cleaned version
|
||||
|
||||
# Drop original cols
|
||||
original_cols = [
|
||||
f"{component_upper}_DESCRIPTION_STARTING", f"{component_upper}_DESCRIPTION_ENDING"
|
||||
] if component != "main-fuel" else [
|
||||
f"{component_upper}_STARTING", f"{component_upper}_ENDING"
|
||||
]
|
||||
|
||||
df = df.drop(columns=cols_to_drop[component] + original_cols)
|
||||
|
||||
# If we have an insulation_thickness column, rename it
|
||||
if "insulation_thickness" in cleaned_lookup[cleaned_key][0]:
|
||||
df = df.rename(
|
||||
columns={
|
||||
"insulation_thickness": f"{component}_insulation_thickness",
|
||||
"insulation_thickness_ENDING": f"{component}_insulation_thickness_ENDING",
|
||||
}
|
||||
)
|
||||
# If we have thermal transmittance, rename it
|
||||
if "thermal_transmittance" in cleaned_lookup[cleaned_key][0]:
|
||||
df = df.rename(
|
||||
columns={
|
||||
"thermal_transmittance": f"{component}_thermal_transmittance",
|
||||
"thermal_transmittance_ENDING": f"{component}_thermal_transmittance_ENDING",
|
||||
}
|
||||
)
|
||||
|
||||
# If we have tarrif, rename it
|
||||
if "tariff_type" in cleaned_lookup[cleaned_key][0]:
|
||||
df = df.rename(
|
||||
columns={
|
||||
"tariff_type": f"{component}_tariff_type",
|
||||
"tariff_type_ENDING": f"{component}_tariff_type_ENDING",
|
||||
}
|
||||
)
|
||||
|
||||
# We need the walls descriptions so we rename them to distinguish them
|
||||
if component == "walls":
|
||||
df = df.rename(
|
||||
columns={
|
||||
"clean_description": f"{component}_clean_description",
|
||||
"clean_description_ENDING": f"{component}_clean_description_ENDING",
|
||||
}
|
||||
)
|
||||
|
||||
# We don't need any lighting specific cleaning, we just drop the original description as we use
|
||||
# LOW_ENERGY_LIGHTING_STARTING, LOW_ENERGY_LIGHTING_ENDING
|
||||
|
||||
df = df.drop(columns=["LIGHTING_DESCRIPTION_STARTING", "LIGHTING_DESCRIPTION_ENDING"])
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def make_uvalues(df):
|
||||
df["row_index"] = df.index
|
||||
|
||||
uvalues = []
|
||||
for _, x in df.iterrows():
|
||||
|
||||
uprn = x["UPRN"]
|
||||
row_index = x["row_index"]
|
||||
age_band = england_wales_age_band_lookup[x["CONSTRUCTION_AGE_BAND"]]
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~
|
||||
# Walls
|
||||
# ~~~~~~~~~~~~~~~~~~
|
||||
|
||||
starting_wall_uvalue = x["walls_thermal_transmittance"]
|
||||
if pd.isnull(starting_wall_uvalue):
|
||||
starting_wall_uvalue = get_wall_u_value(
|
||||
clean_description=x["walls_clean_description"],
|
||||
age_band=age_band,
|
||||
is_granite_or_whinstone=x["is_granite_or_whinstone"],
|
||||
is_sandstone_or_limestone=x["is_sandstone_or_limestone"],
|
||||
)
|
||||
|
||||
ending_wall_uvalue = x["walls_thermal_transmittance_ENDING"]
|
||||
if pd.isnull(ending_wall_uvalue):
|
||||
if x["walls_clean_description"] != x["walls_clean_description_ENDING"]:
|
||||
ending_wall_uvalue = get_wall_u_value(
|
||||
clean_description=x["walls_clean_description_ENDING"],
|
||||
age_band=age_band,
|
||||
is_granite_or_whinstone=x["is_granite_or_whinstone"],
|
||||
is_sandstone_or_limestone=x["is_sandstone_or_limestone"],
|
||||
)
|
||||
else:
|
||||
ending_wall_uvalue = starting_wall_uvalue
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~
|
||||
# Roof
|
||||
# ~~~~~~~~~~~~~~~~~~
|
||||
|
||||
if x["has_dwelling_above"]:
|
||||
if x["roof_thermal_transmittance"] != 0:
|
||||
raise ValueError("Should have 0 u-value for roof")
|
||||
|
||||
if x["roof_thermal_transmittance_ENDING"] != 0:
|
||||
raise ValueError("Should have 0 u-value for roof")
|
||||
|
||||
starting_roof_uvalue = x["roof_thermal_transmittance"]
|
||||
if pd.isnull(starting_roof_uvalue):
|
||||
starting_roof_uvalue = get_roof_u_value(
|
||||
insulation_thickness=x["roof_insulation_thickness"],
|
||||
has_dwelling_above=x["has_dwelling_above"],
|
||||
is_loft=x["is_loft"],
|
||||
is_roof_room=x["is_roof_room"],
|
||||
is_thatched=x["is_thatched"],
|
||||
is_flat=x["is_flat"],
|
||||
is_pitched=x["is_pitched"],
|
||||
is_at_rafters=x["is_at_rafters"],
|
||||
age_band=age_band
|
||||
)
|
||||
|
||||
ending_roof_uvalue = x["roof_thermal_transmittance_ENDING"]
|
||||
|
||||
if pd.isnull(ending_roof_uvalue):
|
||||
ending_roof_uvalue = get_roof_u_value(
|
||||
insulation_thickness=x["roof_insulation_thickness_ENDING"],
|
||||
has_dwelling_above=x["has_dwelling_above"],
|
||||
is_loft=x["is_loft"],
|
||||
is_roof_room=x["is_roof_room"],
|
||||
is_thatched=x["is_thatched"],
|
||||
is_flat=x["is_flat"],
|
||||
is_pitched=x["is_pitched"],
|
||||
is_at_rafters=x["is_at_rafters"],
|
||||
age_band=age_band
|
||||
)
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~
|
||||
# Floor
|
||||
# ~~~~~~~~~~~~~~~~~~
|
||||
perimeters = {}
|
||||
for suffix in ["_STARTING", "_ENDING"]:
|
||||
floor_area = x[f"TOTAL_FLOOR_AREA{suffix}"]
|
||||
n_rooms = x["NUMBER_HABITABLE_ROOMS"]
|
||||
|
||||
perimeters[f"estimated_perimeter{suffix}"] = estimate_perimeter(floor_area, n_rooms)
|
||||
|
||||
floor_type = "suspended" if x["is_suspended"] else "solid"
|
||||
wall_type = get_wall_type(**x)
|
||||
|
||||
if x["another_property_below"]:
|
||||
if x["floor_thermal_transmittance"] != 0:
|
||||
raise ValueError("Should have 0 u-value for floor")
|
||||
|
||||
if x["floor_thermal_transmittance_ENDING"] != 0:
|
||||
raise ValueError("Should have 0 u-value for floor")
|
||||
starting_floor_uvalue, ending_floor_uvalue = 0, 0
|
||||
else:
|
||||
starting_floor_uvalue = x["floor_thermal_transmittance"]
|
||||
ending_floor_uvalue = x["floor_thermal_transmittance_ENDING"]
|
||||
|
||||
if pd.isnull(starting_floor_uvalue):
|
||||
starting_floor_uvalue = get_floor_u_value(
|
||||
floor_type=floor_type,
|
||||
perimeter=perimeters["estimated_perimeter_STARTING"],
|
||||
area=x[f"TOTAL_FLOOR_AREA_STARTING"],
|
||||
insulation_thickness=x["floor_insulation_thickness"],
|
||||
wall_type=wall_type,
|
||||
age_band=age_band
|
||||
)
|
||||
|
||||
if pd.isnull(ending_floor_uvalue):
|
||||
ending_floor_uvalue = get_floor_u_value(
|
||||
floor_type=floor_type,
|
||||
perimeter=perimeters["estimated_perimeter_ENDING"],
|
||||
area=x[f"TOTAL_FLOOR_AREA_ENDING"],
|
||||
insulation_thickness=x["floor_insulation_thickness_ENDING"],
|
||||
wall_type=wall_type,
|
||||
age_band=age_band
|
||||
)
|
||||
|
||||
uvalues.append(
|
||||
{
|
||||
"UPRN": uprn,
|
||||
"row_index": row_index,
|
||||
"starting_walls_uvalue": starting_wall_uvalue,
|
||||
"ending_walls_uvalue": ending_wall_uvalue,
|
||||
"starting_roof_uvalue": starting_roof_uvalue,
|
||||
"ending_roof_uvalue": ending_roof_uvalue,
|
||||
"starting_floor_uvalue": starting_floor_uvalue,
|
||||
"ending_floor_uvalue": ending_floor_uvalue,
|
||||
**perimeters
|
||||
}
|
||||
)
|
||||
|
||||
uvalues = pd.DataFrame(uvalues)
|
||||
|
||||
df = df.merge(
|
||||
uvalues, how="left", on=["UPRN", "row_index"]
|
||||
).drop(columns="row_index")
|
||||
|
||||
# Fill missings
|
||||
for component in ["walls", "floor", "roof"]:
|
||||
for suffix in ["", "_ENDING"]:
|
||||
fill_col = f"starting_{component}_uvalue" if suffix == "" else f"ending_{component}_uvalue"
|
||||
|
||||
df[f"{component}_thermal_transmittance{suffix}"] = np.where(
|
||||
pd.isnull(df[f"{component}_thermal_transmittance{suffix}"]),
|
||||
df[fill_col],
|
||||
df[f"{component}_thermal_transmittance{suffix}"]
|
||||
)
|
||||
|
||||
df = df.drop(
|
||||
columns=[
|
||||
"starting_walls_uvalue", "ending_walls_uvalue", "starting_roof_uvalue",
|
||||
"ending_roof_uvalue", "starting_floor_uvalue", "ending_floor_uvalue"
|
||||
]
|
||||
)
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def compare_records(earliest_record: pd.Series, latest_record: pd.Series, columns: list):
|
||||
"""
|
||||
For a list of columns, check if the earliest and latest record are the same
|
||||
If they are the same, we indicate this, because we have example of SAP scores changing
|
||||
without any feature changes
|
||||
:param earliest_record: pd.Series
|
||||
:param latest_record: pd.Series
|
||||
:param columns: list of columns to compare
|
||||
:return: boolean indicating whether or not all features are the same
|
||||
"""
|
||||
|
||||
all_equal = True
|
||||
for col in columns:
|
||||
if earliest_record[col] != latest_record[col]:
|
||||
return False
|
||||
if all_equal:
|
||||
return True
|
||||
|
||||
|
||||
def app():
|
||||
# Get all the files in the directory
|
||||
|
||||
# Data glossary:
|
||||
# https://epc.opendatacommunities.org/docs/guidance#glossary
|
||||
|
||||
cleaned_lookup = get_cleaned()
|
||||
|
||||
# List all subdirectories
|
||||
directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
|
||||
# directories = directories[0:3]
|
||||
|
||||
dataset = []
|
||||
cleaning_dataset = []
|
||||
# Keep track of the all equals
|
||||
all_equal_rows = []
|
||||
|
||||
for directory in tqdm(directories):
|
||||
filepath = directory / "certificates.csv"
|
||||
|
||||
data_processor = DataProcessor(filepath=filepath)
|
||||
|
||||
df = data_processor.pre_process()
|
||||
|
||||
cleaning_averages = data_processor.make_cleaning_averages()
|
||||
|
||||
# We have some odd cases with missing constituency so we fill
|
||||
df = df.fillna({"CONSTITUENCY": df["CONSTITUENCY"].mode().values[0]})
|
||||
|
||||
df = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=df,
|
||||
cleaning_data=cleaning_averages,
|
||||
cols_to_merge_on=COLUMNS_TO_MERGE_ON
|
||||
)
|
||||
|
||||
data_by_urpn = []
|
||||
for uprn, property_data in df.groupby("UPRN", observed=True):
|
||||
|
||||
# Fixed features - these are property attributes that shouldn't change over time
|
||||
fixed_data = {}
|
||||
|
||||
# If a property has changed building type, we can ignore the epc rating i.e. this should be 1 unique row
|
||||
if any(property_data[MANDATORY_FIXED_FEATURES].nunique() > 1) or (
|
||||
pd.isnull(property_data[MANDATORY_FIXED_FEATURES]).sum().sum() > 0
|
||||
):
|
||||
continue
|
||||
|
||||
# Take the latest row for both the LATEST_FEILDS and MANDATORY FIELDS
|
||||
latest_field_data = property_data[LATEST_FIELD].iloc[-1].to_dict()
|
||||
mandatory_field_data = (
|
||||
property_data[MANDATORY_FIXED_FEATURES].iloc[-1].to_dict()
|
||||
)
|
||||
|
||||
# Combine all fields together
|
||||
fixed_data.update(mandatory_field_data)
|
||||
fixed_data.update(latest_field_data)
|
||||
|
||||
# We include the lodgement date here as we probably need to factor time into the
|
||||
# model, since EPC standards and rigour have changed over time
|
||||
variable_data = property_data[
|
||||
COMPONENT_FEATURES + EFFICIENCY_FEATURES + POTENTIAL_COLUMNS + [
|
||||
"LODGEMENT_DATE", RDSAP_RESPONSE, HEAT_DEMAND_RESPONSE, CARBON_RESPONSE
|
||||
]
|
||||
]
|
||||
|
||||
# Note: we look at changes between subsequent EPCS, however we could look at other permutations
|
||||
# e.g. first vs second, second vs third and also first vs third
|
||||
property_model_data = []
|
||||
for idx in range(0, property_data.shape[0] - 1):
|
||||
|
||||
if idx >= property_data.shape[0] - 1:
|
||||
break
|
||||
|
||||
earliest_record = variable_data.iloc[idx]
|
||||
latest_record = variable_data.iloc[idx + 1]
|
||||
|
||||
# Check if the sap gets better or worse
|
||||
gets_better = earliest_record[RDSAP_RESPONSE] <= latest_record[RDSAP_RESPONSE]
|
||||
|
||||
component_variables = COMPONENT_FEATURES + EFFICIENCY_FEATURES
|
||||
|
||||
if gets_better:
|
||||
starting_sap = earliest_record[RDSAP_RESPONSE]
|
||||
starting_heat_demand = earliest_record[HEAT_DEMAND_RESPONSE]
|
||||
starting_carbon = earliest_record[CARBON_RESPONSE]
|
||||
|
||||
ending_sap = latest_record[RDSAP_RESPONSE]
|
||||
ending_heat_demand = latest_record[HEAT_DEMAND_RESPONSE]
|
||||
ending_carbon = latest_record[CARBON_RESPONSE]
|
||||
|
||||
rdsap_change = latest_record[RDSAP_RESPONSE] - starting_sap
|
||||
heat_demand_change = latest_record[HEAT_DEMAND_RESPONSE] - starting_heat_demand
|
||||
carbon_change = latest_record[CARBON_RESPONSE] - starting_carbon
|
||||
|
||||
starting_record = earliest_record[component_variables + ["LODGEMENT_DATE"]].add_suffix("_STARTING")
|
||||
ending_record = latest_record[component_variables + ["LODGEMENT_DATE"]].add_suffix("_ENDING")
|
||||
else:
|
||||
starting_sap = latest_record[RDSAP_RESPONSE]
|
||||
starting_heat_demand = latest_record[HEAT_DEMAND_RESPONSE]
|
||||
starting_carbon = latest_record[CARBON_RESPONSE]
|
||||
|
||||
ending_sap = earliest_record[RDSAP_RESPONSE]
|
||||
ending_heat_demand = earliest_record[HEAT_DEMAND_RESPONSE]
|
||||
ending_carbon = earliest_record[CARBON_RESPONSE]
|
||||
|
||||
rdsap_change = earliest_record[RDSAP_RESPONSE] - starting_sap
|
||||
heat_demand_change = earliest_record[HEAT_DEMAND_RESPONSE] - starting_heat_demand
|
||||
carbon_change = earliest_record[CARBON_RESPONSE] - starting_carbon
|
||||
|
||||
starting_record = latest_record[component_variables + ["LODGEMENT_DATE"]].add_suffix("_STARTING")
|
||||
ending_record = earliest_record[component_variables + ["LODGEMENT_DATE"]].add_suffix("_ENDING")
|
||||
|
||||
if rdsap_change == 0:
|
||||
continue
|
||||
|
||||
all_equal = compare_records(
|
||||
earliest_record=earliest_record,
|
||||
latest_record=latest_record,
|
||||
columns=CORE_COMPONENT_FEATURES
|
||||
)
|
||||
|
||||
if all_equal:
|
||||
# Keep track of this for the moment so we can analyse
|
||||
all_equal_rows.append({"uprn": uprn, "directory_name": directory.name})
|
||||
continue
|
||||
|
||||
features = pd.concat([starting_record, ending_record])
|
||||
|
||||
property_model_data.append(
|
||||
{
|
||||
"UPRN": uprn,
|
||||
"RDSAP_CHANGE": rdsap_change,
|
||||
"HEAT_DEMAND_CHANGE": heat_demand_change,
|
||||
"CARBON_CHANGE": carbon_change,
|
||||
"SAP_STARTING": starting_sap,
|
||||
"SAP_ENDING": ending_sap,
|
||||
"HEAT_DEMAND_STARTING": starting_heat_demand,
|
||||
"HEAT_DEMAND_ENDING": ending_heat_demand,
|
||||
"CARBON_STARTING": starting_carbon,
|
||||
"CARBON_ENDING": ending_carbon,
|
||||
"POTENTIAL_ENERGY_EFFICIENCY": earliest_record["POTENTIAL_ENERGY_EFFICIENCY"],
|
||||
"ENVIRONMENT_IMPACT_POTENTIAL": earliest_record["ENVIRONMENT_IMPACT_POTENTIAL"],
|
||||
"ENERGY_CONSUMPTION_POTENTIAL": earliest_record["ENERGY_CONSUMPTION_POTENTIAL"],
|
||||
"CO2_EMISSIONS_POTENTIAL": earliest_record["CO2_EMISSIONS_POTENTIAL"],
|
||||
**fixed_data,
|
||||
**features.to_dict(),
|
||||
}
|
||||
)
|
||||
|
||||
data_by_urpn.extend(property_model_data)
|
||||
|
||||
data_by_urpn_df = pd.DataFrame(data_by_urpn)
|
||||
|
||||
data_by_urpn_df["DAYS_TO_STARTING"] = DataProcessor.calculate_days_to(
|
||||
data_by_urpn_df["LODGEMENT_DATE_STARTING"]
|
||||
)
|
||||
|
||||
data_by_urpn_df["DAYS_TO_ENDING"] = DataProcessor.calculate_days_to(
|
||||
data_by_urpn_df["LODGEMENT_DATE_ENDING"]
|
||||
)
|
||||
|
||||
data_by_urpn_df = data_by_urpn_df.drop(columns=["LODGEMENT_DATE_STARTING", "LODGEMENT_DATE_ENDING"])
|
||||
|
||||
data_by_urpn_df = DataProcessor.clean_efficiency_variables(data_by_urpn_df)
|
||||
|
||||
# We look for key building fabric features that have changed from one EPC to the next.
|
||||
# if, for example, we see that a home has gone from being a cavity wall to a solid wall, we
|
||||
# remove this record, as it indicates that the quality of the EPC conducted in the first instance
|
||||
# is low
|
||||
# We also replace descriptions with their cleaned variants
|
||||
|
||||
if pd.isnull(data_by_urpn_df).sum().sum():
|
||||
raise ValueError("Null values found in dataset")
|
||||
|
||||
data_by_urpn_df = process_and_prune_desriptions(data_by_urpn_df, cleaned_lookup)
|
||||
|
||||
# Apply u-values
|
||||
for col in ["walls_clean_description", "walls_clean_description_ENDING"]:
|
||||
data_by_urpn_df[col] = data_by_urpn_df[col].str.replace("(assumed)", "").str.rstrip()
|
||||
|
||||
data_by_urpn_df = make_uvalues(data_by_urpn_df).drop(
|
||||
columns=["walls_clean_description", "walls_clean_description_ENDING"]
|
||||
)
|
||||
|
||||
# TODO: For some of the features that we clean, we have either a true, false or possibly null value
|
||||
# Those nulls should be False. clean_missings_after_description_process handles this but shouldn't
|
||||
# need to
|
||||
|
||||
data_by_urpn_df = DataProcessor.clean_missings_after_description_process(data_by_urpn_df)
|
||||
|
||||
if pd.isnull(data_by_urpn_df).sum().sum():
|
||||
raise ValueError("Null values found in dataset after process_and_prune_desriptions")
|
||||
|
||||
dataset.append(data_by_urpn_df)
|
||||
|
||||
cleaning_averages["LOCAL_AUTHORITY"] = df["LOCAL_AUTHORITY"].values[0]
|
||||
cleaning_dataset.append(cleaning_averages)
|
||||
|
||||
print("Final all equal count: %s" % str(len(all_equal_rows)))
|
||||
|
||||
# Store cleaning dataset in s3 as a parquet file
|
||||
cleaning_dataset = pd.concat(cleaning_dataset)
|
||||
save_dataframe_to_s3_parquet(
|
||||
df=cleaning_dataset,
|
||||
bucket_name="retrofit-data-dev",
|
||||
file_key="sap_change_model/cleaning_dataset.parquet",
|
||||
epc_pipeline = EPCPipeline(
|
||||
directories=directories,
|
||||
use_parallel=True,
|
||||
epc_data_processor=EPCDataProcessor(run_mode="training"),
|
||||
)
|
||||
|
||||
output = pd.concat(dataset)
|
||||
epc_pipeline.run()
|
||||
|
||||
# Remove any records that have huge swings in their floor area
|
||||
output["tfa_diff_abs"] = abs(output["TOTAL_FLOOR_AREA_ENDING"] - output["TOTAL_FLOOR_AREA_STARTING"])
|
||||
output["tfa_diff_prop"] = output["tfa_diff_abs"] / output["TOTAL_FLOOR_AREA_STARTING"]
|
||||
output = output[output["tfa_diff_prop"] < 0.5]
|
||||
output = output.drop(columns=["tfa_diff_abs", "tfa_diff_prop"])
|
||||
# For testing
|
||||
# dataset_df = epc_pipeline.compiled_dataset
|
||||
# dataset_df.to_parquet("refactor_datasets/dataset_with0perm_all.parquet")
|
||||
# pd.DataFrame(epc_pipeline.compiled_all_equal_rows).to_parquet("refactor_datasets/all_equal_rows_with0perm_all.parquet")
|
||||
# pd.concat(epc_pipeline.compiled_cleaning_averages).to_parquet("refactor_datasets/cleaning_averages_with0perm_all.parquet")
|
||||
|
||||
uvalue_columns = [col for col in output.columns if "thermal_transmittance" in col]
|
||||
for uvalue_col in uvalue_columns:
|
||||
output[uvalue_col] = pd.to_numeric(output[uvalue_col])
|
||||
|
||||
save_dataframe_to_s3_parquet(
|
||||
df=output,
|
||||
bucket_name="retrofit-data-dev",
|
||||
file_key="sap_change_model/dataset.parquet",
|
||||
)
|
||||
|
||||
# Store all_equal_rows
|
||||
all_equal_rows = pd.DataFrame(all_equal_rows)
|
||||
save_dataframe_to_s3_parquet(
|
||||
df=all_equal_rows,
|
||||
bucket_name="retrofit-data-dev",
|
||||
file_key="sap_change_model/all_equal_rows.parquet",
|
||||
)
|
||||
# from utils.s3 import read_dataframe_from_s3_parquet
|
||||
# dataset = read_dataframe_from_s3_parquet(
|
||||
# bucket_name="retrofit-data-dev",
|
||||
# file_key="sap_change_model/dataset_test.parquet",
|
||||
# )
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
||||
main()
|
||||
|
|
|
|||
|
|
@ -0,0 +1,5 @@
|
|||
pandas==2.1.3
|
||||
tqdm==4.66.1
|
||||
msgpack==1.0.7
|
||||
boto3==1.29.6
|
||||
pyarrow==15.0.2
|
||||
|
|
@ -2,6 +2,63 @@
|
|||
# TODO: migrate to dynaconf
|
||||
from pathlib import Path
|
||||
|
||||
DATA_ANOMALY_MATCHES = {
|
||||
# Invalid reports are where the value provided is out of bounds, e.g. a negative energy rating of -1199 or a
|
||||
# non-integer, there is no valid energy band for this, so it is marked as INVALID!
|
||||
"INVALID",
|
||||
"INVALID!",
|
||||
# When the energy certificate was first lodged on the register there was no requirement to lodge this data
|
||||
# item, i.e. a non-mandatory item.
|
||||
"NO DATA!",
|
||||
"NODATA!",
|
||||
# When the energy certificate was first lodged on the register there was no requirement to lodge this data item,
|
||||
# i.e.a non - mandatory item.
|
||||
"N/A",
|
||||
# A value generated by the register to account for a data item that was not mandatory when the lodgement of
|
||||
# the energy certificate occurred. When the data item became mandatory the register operator, for backwards
|
||||
# compatibility purposes, populated the data field with a value of ‘not recorded’ to ensure that the energy
|
||||
# certificate retrieval process is successfully completed. Mandatory data items cannot be applied
|
||||
# retrospectively to energy certificates lodged before the date of the change.
|
||||
"Not recorded",
|
||||
# The data also contains DECs with an operational rating of ‘9999’ (a ‘default’ DEC). The production of a
|
||||
# ‘default’ DEC value was allowed to enable building occupiers, with poor quality or no energy data,
|
||||
# the opportunity to comply with the regulations. From April 2011 the ability to lodge a ‘default’ DEC was no
|
||||
# longer allowed.
|
||||
"9999",
|
||||
# The Building Emission Rate (BER) data field for non-domestic buildings may contain a ‘blank’ value. The BER
|
||||
# was only lodged on the register from 7 March 2010.
|
||||
"Blank"
|
||||
# There are currently just over 8,600 records where the local authority identifier is ‘null’. This is due to
|
||||
# the Register Operator not being able to match the building address in the Markermap Ordinance Survey (GB)
|
||||
# lookup tables or OS MasterMap Address Layer 2 data. The majority of these addresses have been requested
|
||||
# manually by energy assessors for inclusion by the Register Operator in the registers (e.g. new builds,
|
||||
# etc). These records are being published for completeness. An ongoing process to manage these manually added
|
||||
# addresses will take time to develop to deal with these and future anomalies.
|
||||
#
|
||||
# There are several fields within the lodged data where it is possible to enter multiple entries to cater for
|
||||
# different data_types of build within a single property, i.e. extensions. This results in multiple entries for
|
||||
# the description fields for floor, roof and wall. For the purposes of this data release only the information
|
||||
# contained within the first of these multiple entries is being provided. As there are no restrictions on the
|
||||
# value in this first field it means that sometimes the first field in a multiple entry description field may
|
||||
# contain a ‘null’ value. A resolution to correct these anomalies will be considered for future data releases.
|
||||
"NULL",
|
||||
# We sometimes see fields populated with just an empty string.
|
||||
"",
|
||||
# We sometimes find None values - particulatly when we produce an estimated EPC
|
||||
None,
|
||||
# An older value which rarely shows up but has been seen in the data.
|
||||
"UNKNOWN",
|
||||
}
|
||||
|
||||
DATA_ANOMALY_SUBSTRINGS = {
|
||||
# Where values in a ‘pick’ list that have been superseded by another value. For example, where a value for
|
||||
# ‘pitched roof’ has been replaced by three sub-categories of pitched roof. The original value is retained
|
||||
# but ‘for backward compatibility only’ it is appended to ensure that the energy certificate retrieval
|
||||
# process can be successfully completed. Replacement data items cannot be applied retrospectively to energy
|
||||
# certificates lodged on the register before the date of the change.
|
||||
"for backward compatibility only"
|
||||
}
|
||||
|
||||
METRIC_FILENAME = "metrics.csv"
|
||||
|
||||
OPTIMISE_METRIC = "mean_absolute_error"
|
||||
|
|
@ -106,17 +163,20 @@ CORE_COMPONENT_FEATURES = [
|
|||
]
|
||||
|
||||
EFFICIENCY_FEATURES = [
|
||||
'HOT_WATER_ENERGY_EFF',
|
||||
'FLOOR_ENERGY_EFF',
|
||||
'WINDOWS_ENERGY_EFF',
|
||||
'WALLS_ENERGY_EFF',
|
||||
'SHEATING_ENERGY_EFF',
|
||||
'ROOF_ENERGY_EFF',
|
||||
'MAINHEAT_ENERGY_EFF',
|
||||
'MAINHEATC_ENERGY_EFF',
|
||||
'LIGHTING_ENERGY_EFF'
|
||||
"HOT_WATER_ENERGY_EFF",
|
||||
"FLOOR_ENERGY_EFF",
|
||||
"WINDOWS_ENERGY_EFF",
|
||||
"WALLS_ENERGY_EFF",
|
||||
"SHEATING_ENERGY_EFF",
|
||||
"ROOF_ENERGY_EFF",
|
||||
"MAINHEAT_ENERGY_EFF",
|
||||
"MAINHEATC_ENERGY_EFF",
|
||||
"LIGHTING_ENERGY_EFF",
|
||||
]
|
||||
|
||||
ROOM_FEATURES = ["number_habitable_rooms", "number_heated_rooms"]
|
||||
|
||||
|
||||
COMPONENT_FEATURES = CORE_COMPONENT_FEATURES + [
|
||||
"TRANSACTION_TYPE",
|
||||
"ENERGY_TARIFF", # Not sure if this is relevant
|
||||
|
|
@ -127,10 +187,10 @@ COMPONENT_FEATURES = CORE_COMPONENT_FEATURES + [
|
|||
]
|
||||
|
||||
POTENTIAL_COLUMNS = [
|
||||
'POTENTIAL_ENERGY_EFFICIENCY',
|
||||
'ENVIRONMENT_IMPACT_POTENTIAL',
|
||||
'ENERGY_CONSUMPTION_POTENTIAL',
|
||||
'CO2_EMISSIONS_POTENTIAL',
|
||||
"POTENTIAL_ENERGY_EFFICIENCY",
|
||||
"ENVIRONMENT_IMPACT_POTENTIAL",
|
||||
"ENERGY_CONSUMPTION_POTENTIAL",
|
||||
"CO2_EMISSIONS_POTENTIAL",
|
||||
# We don't include cost features for the moment
|
||||
# 'LIGHTING_COST_POTENTIAL',
|
||||
# 'HEATING_COST_POTENTIAL',
|
||||
|
|
@ -155,6 +215,14 @@ MANDATORY_FIXED_FEATURES = ["PROPERTY_TYPE", "BUILT_FORM", "CONSTITUENCY"]
|
|||
# and Wales from 31 July 2014
|
||||
EARLIEST_EPC_DATE = "2014-08-01"
|
||||
|
||||
IGNORED_TRANSACTION_TYPES = "new dwelling"
|
||||
IGNORED_FLOOR_LEVELS = ["top floor", "mid floor"]
|
||||
IGNORED_PROPERTY_TYPES = "Park home"
|
||||
IGNORED_TENURES = [
|
||||
"Not defined - use in the case of a new dwelling for which the intended tenure in not known. It is not to be used "
|
||||
"for an existing dwelling"
|
||||
]
|
||||
|
||||
RDSAP_RESPONSE = "CURRENT_ENERGY_EFFICIENCY"
|
||||
HEAT_DEMAND_RESPONSE = "ENERGY_CONSUMPTION_CURRENT"
|
||||
CARBON_RESPONSE = "CO2_EMISSIONS_CURRENT"
|
||||
|
|
@ -172,30 +240,55 @@ DATA_PROCESSOR_SETTINGS = {
|
|||
|
||||
# This has a manual mapping of the column types required
|
||||
COLUMNTYPES = {
|
||||
'UPRN': 'object', 'TOTAL_FLOOR_AREA': 'float64', 'FLOOR_HEIGHT': 'float64', 'PROPERTY_TYPE': 'object',
|
||||
'BUILT_FORM': 'object', 'CONSTITUENCY': 'object', 'NUMBER_HABITABLE_ROOMS': 'float64',
|
||||
'NUMBER_HEATED_ROOMS': 'float64', 'FIXED_LIGHTING_OUTLETS_COUNT': 'float64',
|
||||
'CONSTRUCTION_AGE_BAND': 'object',
|
||||
'TRANSACTION_TYPE': 'object',
|
||||
'WALLS_DESCRIPTION': 'object',
|
||||
'FLOOR_DESCRIPTION': 'object',
|
||||
'LIGHTING_DESCRIPTION': 'object',
|
||||
'ROOF_DESCRIPTION': 'object',
|
||||
'MAINHEAT_DESCRIPTION': 'object',
|
||||
'HOTWATER_DESCRIPTION': 'object', 'MAIN_FUEL': 'object',
|
||||
'MECHANICAL_VENTILATION': 'object',
|
||||
'SECONDHEAT_DESCRIPTION': 'object', 'ENERGY_TARIFF': 'object',
|
||||
'SOLAR_WATER_HEATING_FLAG': 'object', 'PHOTO_SUPPLY': 'float64',
|
||||
'WINDOWS_DESCRIPTION': 'object',
|
||||
'GLAZED_TYPE': 'object',
|
||||
'MULTI_GLAZE_PROPORTION': 'float64',
|
||||
'LOW_ENERGY_LIGHTING': 'float64',
|
||||
'NUMBER_OPEN_FIREPLACES': 'float64',
|
||||
'MAINHEATCONT_DESCRIPTION': 'object',
|
||||
'EXTENSION_COUNT': 'float64',
|
||||
'LODGEMENT_DATE': 'object',
|
||||
**dict(zip(EFFICIENCY_FEATURES, ['object', ] * len(EFFICIENCY_FEATURES))),
|
||||
**dict(zip(POTENTIAL_COLUMNS, ['float64', ] * len(POTENTIAL_COLUMNS)))
|
||||
"UPRN": "object",
|
||||
"TOTAL_FLOOR_AREA": "float64",
|
||||
"FLOOR_HEIGHT": "float64",
|
||||
"PROPERTY_TYPE": "object",
|
||||
"BUILT_FORM": "object",
|
||||
"CONSTITUENCY": "object",
|
||||
"NUMBER_HABITABLE_ROOMS": "float64",
|
||||
"NUMBER_HEATED_ROOMS": "float64",
|
||||
"FIXED_LIGHTING_OUTLETS_COUNT": "float64",
|
||||
"CONSTRUCTION_AGE_BAND": "object",
|
||||
"TRANSACTION_TYPE": "object",
|
||||
"WALLS_DESCRIPTION": "object",
|
||||
"FLOOR_DESCRIPTION": "object",
|
||||
"LIGHTING_DESCRIPTION": "object",
|
||||
"ROOF_DESCRIPTION": "object",
|
||||
"MAINHEAT_DESCRIPTION": "object",
|
||||
"HOTWATER_DESCRIPTION": "object",
|
||||
"MAIN_FUEL": "object",
|
||||
"MECHANICAL_VENTILATION": "object",
|
||||
"SECONDHEAT_DESCRIPTION": "object",
|
||||
"ENERGY_TARIFF": "object",
|
||||
"SOLAR_WATER_HEATING_FLAG": "object",
|
||||
"PHOTO_SUPPLY": "float64",
|
||||
"WINDOWS_DESCRIPTION": "object",
|
||||
"GLAZED_TYPE": "object",
|
||||
"MULTI_GLAZE_PROPORTION": "float64",
|
||||
"LOW_ENERGY_LIGHTING": "float64",
|
||||
"NUMBER_OPEN_FIREPLACES": "float64",
|
||||
"MAINHEATCONT_DESCRIPTION": "object",
|
||||
"EXTENSION_COUNT": "float64",
|
||||
"LODGEMENT_DATE": "object",
|
||||
**dict(
|
||||
zip(
|
||||
EFFICIENCY_FEATURES,
|
||||
[
|
||||
"object",
|
||||
]
|
||||
* len(EFFICIENCY_FEATURES),
|
||||
)
|
||||
),
|
||||
**dict(
|
||||
zip(
|
||||
POTENTIAL_COLUMNS,
|
||||
[
|
||||
"float64",
|
||||
]
|
||||
* len(POTENTIAL_COLUMNS),
|
||||
)
|
||||
),
|
||||
}
|
||||
|
||||
# For modelling, we don't allow records with more than 100 SAP points
|
||||
|
|
@ -215,7 +308,7 @@ fill_na_map = {
|
|||
"LOW_ENERGY_LIGHTING": 0,
|
||||
"MAINHEATCONT_DESCRIPTION": "Unknown",
|
||||
"EXTENSION_COUNT": 0,
|
||||
"NUMBER_OPEN_FIREPLACES": 0
|
||||
"NUMBER_OPEN_FIREPLACES": 0,
|
||||
}
|
||||
|
||||
################################################################################################
|
||||
|
|
@ -224,62 +317,212 @@ fill_na_map = {
|
|||
################################################################################################
|
||||
|
||||
STARTING_SUFFIX_COMPONENT_COLS = [
|
||||
"SAP", "HEAT_DEMAND", "CARBON", "TRANSACTION_TYPE", "MECHANICAL_VENTILATION",
|
||||
"SECONDHEAT_DESCRIPTION", "ENERGY_TARIFF", "SOLAR_WATER_HEATING_FLAG", "PHOTO_SUPPLY",
|
||||
"GLAZED_TYPE", "MULTI_GLAZE_PROPORTION", "LOW_ENERGY_LIGHTING", "NUMBER_OPEN_FIREPLACES",
|
||||
"EXTENSION_COUNT", "TOTAL_FLOOR_AREA", "FLOOR_HEIGHT", "DAYS_TO", "estimated_perimeter"
|
||||
"SAP",
|
||||
"HEAT_DEMAND",
|
||||
"CARBON",
|
||||
"TRANSACTION_TYPE",
|
||||
"MECHANICAL_VENTILATION",
|
||||
"SECONDHEAT_DESCRIPTION",
|
||||
"ENERGY_TARIFF",
|
||||
"SOLAR_WATER_HEATING_FLAG",
|
||||
"PHOTO_SUPPLY",
|
||||
"GLAZED_TYPE",
|
||||
"MULTI_GLAZE_PROPORTION",
|
||||
"LOW_ENERGY_LIGHTING",
|
||||
"NUMBER_OPEN_FIREPLACES",
|
||||
"EXTENSION_COUNT",
|
||||
"TOTAL_FLOOR_AREA",
|
||||
"FLOOR_HEIGHT",
|
||||
"DAYS_TO",
|
||||
"estimated_perimeter",
|
||||
]
|
||||
NO_SUFFIX_COMPONENT_COLS = [
|
||||
"walls_thermal_transmittance",
|
||||
"is_cavity_wall",
|
||||
"is_filled_cavity",
|
||||
"is_solid_brick",
|
||||
"is_system_built",
|
||||
"is_timber_frame",
|
||||
"is_granite_or_whinstone",
|
||||
"is_as_built",
|
||||
"is_cob",
|
||||
"is_sandstone_or_limestone",
|
||||
"is_park_home",
|
||||
"walls_insulation_thickness",
|
||||
"external_insulation",
|
||||
"internal_insulation",
|
||||
"floor_thermal_transmittance",
|
||||
"is_to_unheated_space",
|
||||
"is_to_external_air",
|
||||
"is_suspended",
|
||||
"is_solid",
|
||||
"another_property_below",
|
||||
"floor_insulation_thickness",
|
||||
"roof_thermal_transmittance",
|
||||
"is_pitched",
|
||||
"is_roof_room",
|
||||
"is_loft",
|
||||
"is_flat",
|
||||
"is_thatched",
|
||||
"is_at_rafters",
|
||||
"has_dwelling_above",
|
||||
"roof_insulation_thickness",
|
||||
"heater_type",
|
||||
"system_type",
|
||||
"thermostat_characteristics",
|
||||
"heating_scope",
|
||||
"energy_recovery",
|
||||
"hotwater_tariff_type",
|
||||
"extra_features",
|
||||
"chp_systems",
|
||||
"distribution_system",
|
||||
"no_system_present",
|
||||
"appliance",
|
||||
"has_radiators",
|
||||
"has_fan_coil_units",
|
||||
"has_pipes_in_screed_above_insulation",
|
||||
"has_pipes_in_insulated_timber_floor",
|
||||
"has_pipes_in_concrete_slab",
|
||||
"has_boiler",
|
||||
"has_air_source_heat_pump",
|
||||
"has_room_heaters",
|
||||
"has_electric_storage_heaters",
|
||||
"has_warm_air",
|
||||
"has_electric_underfloor_heating",
|
||||
"has_electric_ceiling_heating",
|
||||
"has_community_scheme",
|
||||
"has_ground_source_heat_pump",
|
||||
"has_no_system_present",
|
||||
"has_portable_electric_heaters",
|
||||
"has_water_source_heat_pump",
|
||||
"has_electric_heat_pump",
|
||||
"has_micro-cogeneration",
|
||||
"has_solar_assisted_heat_pump",
|
||||
"has_exhaust_source_heat_pump",
|
||||
"has_community_heat_pump",
|
||||
"has_electric",
|
||||
"has_mains_gas",
|
||||
"has_wood_logs",
|
||||
"has_coal",
|
||||
"has_oil",
|
||||
"has_wood_pellets",
|
||||
"has_anthracite",
|
||||
"has_dual_fuel_mineral_and_wood",
|
||||
"has_smokeless_fuel",
|
||||
"has_lpg",
|
||||
"has_b30k",
|
||||
"has_electricaire",
|
||||
"has_assumed_for_most_rooms",
|
||||
"has_underfloor_heating",
|
||||
"thermostatic_control",
|
||||
"charging_system",
|
||||
"switch_system",
|
||||
"no_control",
|
||||
"dhw_control",
|
||||
"community_heating",
|
||||
"multiple_room_thermostats",
|
||||
"auxiliary_systems",
|
||||
"trvs",
|
||||
"rate_control",
|
||||
"glazing_type",
|
||||
"fuel_type",
|
||||
"main-fuel_tariff_type",
|
||||
"is_community",
|
||||
"no_individual_heating_or_community_network",
|
||||
"complex_fuel_type",
|
||||
]
|
||||
NO_SUFFIX_COMPONENT_COLS = ['walls_thermal_transmittance', 'is_cavity_wall',
|
||||
'is_filled_cavity', 'is_solid_brick', 'is_system_built', 'is_timber_frame',
|
||||
'is_granite_or_whinstone', 'is_as_built', 'is_cob', 'is_sandstone_or_limestone',
|
||||
'is_park_home', 'walls_insulation_thickness', 'external_insulation', 'internal_insulation',
|
||||
'floor_thermal_transmittance', 'is_to_unheated_space', 'is_to_external_air', 'is_suspended',
|
||||
'is_solid', 'another_property_below', 'floor_insulation_thickness',
|
||||
'roof_thermal_transmittance', 'is_pitched', 'is_roof_room', 'is_loft', 'is_flat',
|
||||
'is_thatched', 'is_at_rafters', 'has_dwelling_above', 'roof_insulation_thickness',
|
||||
'heater_type', 'system_type', 'thermostat_characteristics', 'heating_scope',
|
||||
'energy_recovery',
|
||||
'hotwater_tariff_type', 'extra_features', 'chp_systems', 'distribution_system',
|
||||
'no_system_present', 'appliance', 'has_radiators', 'has_fan_coil_units',
|
||||
'has_pipes_in_screed_above_insulation', 'has_pipes_in_insulated_timber_floor',
|
||||
'has_pipes_in_concrete_slab', 'has_boiler', 'has_air_source_heat_pump', 'has_room_heaters',
|
||||
'has_electric_storage_heaters', 'has_warm_air', 'has_electric_underfloor_heating',
|
||||
'has_electric_ceiling_heating', 'has_community_scheme', 'has_ground_source_heat_pump',
|
||||
'has_no_system_present', 'has_portable_electric_heaters', 'has_water_source_heat_pump',
|
||||
'has_electric_heat_pump', 'has_micro-cogeneration', 'has_solar_assisted_heat_pump',
|
||||
'has_exhaust_source_heat_pump', 'has_community_heat_pump', 'has_electric', 'has_mains_gas',
|
||||
'has_wood_logs', 'has_coal', 'has_oil', 'has_wood_pellets', 'has_anthracite',
|
||||
'has_dual_fuel_mineral_and_wood', 'has_smokeless_fuel', 'has_lpg', 'has_b30k',
|
||||
'has_electricaire', 'has_assumed_for_most_rooms', 'has_underfloor_heating',
|
||||
'thermostatic_control', 'charging_system', 'switch_system', 'no_control', 'dhw_control',
|
||||
'community_heating', 'multiple_room_thermostats', 'auxiliary_systems', 'trvs',
|
||||
'rate_control',
|
||||
'glazing_type', 'fuel_type', 'main-fuel_tariff_type', 'is_community',
|
||||
'no_individual_heating_or_community_network', 'complex_fuel_type',
|
||||
]
|
||||
|
||||
ENDING_SUFFIX_COMPONENT_COLS = [
|
||||
'SAP', 'HEAT_DEMAND', 'CARBON', 'TRANSACTION_TYPE', 'MECHANICAL_VENTILATION', 'SECONDHEAT_DESCRIPTION',
|
||||
'ENERGY_TARIFF', 'SOLAR_WATER_HEATING_FLAG', 'PHOTO_SUPPLY', 'GLAZED_TYPE', 'MULTI_GLAZE_PROPORTION',
|
||||
'LOW_ENERGY_LIGHTING', 'NUMBER_OPEN_FIREPLACES', 'EXTENSION_COUNT', 'TOTAL_FLOOR_AREA', 'FLOOR_HEIGHT',
|
||||
'DAYS_TO', 'walls_thermal_transmittance', 'is_park_home', 'walls_insulation_thickness',
|
||||
'external_insulation', 'internal_insulation', 'floor_thermal_transmittance', 'floor_insulation_thickness',
|
||||
'roof_thermal_transmittance', 'roof_insulation_thickness', 'heater_type', 'system_type',
|
||||
'thermostat_characteristics', 'heating_scope', 'energy_recovery', 'hotwater_tariff_type', 'extra_features',
|
||||
'chp_systems', 'distribution_system', 'no_system_present', 'appliance', 'has_radiators',
|
||||
'has_fan_coil_units', 'has_pipes_in_screed_above_insulation', 'has_pipes_in_insulated_timber_floor',
|
||||
'has_pipes_in_concrete_slab', 'has_boiler', 'has_air_source_heat_pump', 'has_room_heaters',
|
||||
'has_electric_storage_heaters', 'has_warm_air', 'has_electric_underfloor_heating',
|
||||
'has_electric_ceiling_heating', 'has_community_scheme', 'has_ground_source_heat_pump',
|
||||
'has_no_system_present', 'has_portable_electric_heaters', 'has_water_source_heat_pump',
|
||||
'has_electric_heat_pump', 'has_micro-cogeneration', 'has_solar_assisted_heat_pump',
|
||||
'has_exhaust_source_heat_pump', 'has_community_heat_pump', 'has_electric', 'has_mains_gas', 'has_wood_logs',
|
||||
'has_coal', 'has_oil', 'has_wood_pellets', 'has_anthracite', 'has_dual_fuel_mineral_and_wood',
|
||||
'has_smokeless_fuel', 'has_lpg', 'has_b30k', 'has_electricaire', 'has_assumed_for_most_rooms',
|
||||
'has_underfloor_heating', 'thermostatic_control', 'charging_system', 'switch_system', 'no_control',
|
||||
'dhw_control', 'community_heating', 'multiple_room_thermostats', 'auxiliary_systems', 'trvs',
|
||||
'rate_control', 'glazing_type', 'fuel_type', 'main-fuel_tariff_type', 'is_community',
|
||||
'no_individual_heating_or_community_network', 'complex_fuel_type', 'estimated_perimeter'
|
||||
"SAP",
|
||||
"HEAT_DEMAND",
|
||||
"CARBON",
|
||||
"TRANSACTION_TYPE",
|
||||
"MECHANICAL_VENTILATION",
|
||||
"SECONDHEAT_DESCRIPTION",
|
||||
"ENERGY_TARIFF",
|
||||
"SOLAR_WATER_HEATING_FLAG",
|
||||
"PHOTO_SUPPLY",
|
||||
"GLAZED_TYPE",
|
||||
"MULTI_GLAZE_PROPORTION",
|
||||
"LOW_ENERGY_LIGHTING",
|
||||
"NUMBER_OPEN_FIREPLACES",
|
||||
"EXTENSION_COUNT",
|
||||
"TOTAL_FLOOR_AREA",
|
||||
"FLOOR_HEIGHT",
|
||||
"DAYS_TO",
|
||||
"walls_thermal_transmittance",
|
||||
"is_park_home",
|
||||
"walls_insulation_thickness",
|
||||
"external_insulation",
|
||||
"internal_insulation",
|
||||
"floor_thermal_transmittance",
|
||||
"floor_insulation_thickness",
|
||||
"roof_thermal_transmittance",
|
||||
"roof_insulation_thickness",
|
||||
"heater_type",
|
||||
"system_type",
|
||||
"thermostat_characteristics",
|
||||
"heating_scope",
|
||||
"energy_recovery",
|
||||
"hotwater_tariff_type",
|
||||
"extra_features",
|
||||
"chp_systems",
|
||||
"distribution_system",
|
||||
"no_system_present",
|
||||
"appliance",
|
||||
"has_radiators",
|
||||
"has_fan_coil_units",
|
||||
"has_pipes_in_screed_above_insulation",
|
||||
"has_pipes_in_insulated_timber_floor",
|
||||
"has_pipes_in_concrete_slab",
|
||||
"has_boiler",
|
||||
"has_air_source_heat_pump",
|
||||
"has_room_heaters",
|
||||
"has_electric_storage_heaters",
|
||||
"has_warm_air",
|
||||
"has_electric_underfloor_heating",
|
||||
"has_electric_ceiling_heating",
|
||||
"has_community_scheme",
|
||||
"has_ground_source_heat_pump",
|
||||
"has_no_system_present",
|
||||
"has_portable_electric_heaters",
|
||||
"has_water_source_heat_pump",
|
||||
"has_electric_heat_pump",
|
||||
"has_micro-cogeneration",
|
||||
"has_solar_assisted_heat_pump",
|
||||
"has_exhaust_source_heat_pump",
|
||||
"has_community_heat_pump",
|
||||
"has_electric",
|
||||
"has_mains_gas",
|
||||
"has_wood_logs",
|
||||
"has_coal",
|
||||
"has_oil",
|
||||
"has_wood_pellets",
|
||||
"has_anthracite",
|
||||
"has_dual_fuel_mineral_and_wood",
|
||||
"has_smokeless_fuel",
|
||||
"has_lpg",
|
||||
"has_b30k",
|
||||
"has_electricaire",
|
||||
"has_assumed_for_most_rooms",
|
||||
"has_underfloor_heating",
|
||||
"thermostatic_control",
|
||||
"charging_system",
|
||||
"switch_system",
|
||||
"no_control",
|
||||
"dhw_control",
|
||||
"community_heating",
|
||||
"multiple_room_thermostats",
|
||||
"auxiliary_systems",
|
||||
"trvs",
|
||||
"rate_control",
|
||||
"glazing_type",
|
||||
"fuel_type",
|
||||
"main-fuel_tariff_type",
|
||||
"is_community",
|
||||
"no_individual_heating_or_community_network",
|
||||
"complex_fuel_type",
|
||||
"estimated_perimeter",
|
||||
]
|
||||
|
||||
# We found that without performing any filtering, the bottom 0.5% of homes had a floor height of 1.65m. We'll therefore
|
||||
|
|
|
|||
10001
etl/epc/testfile.csv
Normal file
10001
etl/epc/testfile.csv
Normal file
File diff suppressed because it is too large
Load diff
358
etl/epc/tests/test_epcrecord.py
Normal file
358
etl/epc/tests/test_epcrecord.py
Normal file
|
|
@ -0,0 +1,358 @@
|
|||
import pytest
|
||||
from utils.s3 import read_dataframe_from_s3_parquet
|
||||
from etl.epc.Record import EPCRecord
|
||||
from etl.epc.settings import DATA_ANOMALY_MATCHES
|
||||
import random
|
||||
|
||||
|
||||
class TestEpcRecord:
|
||||
|
||||
@pytest.fixture()
|
||||
def cleaning_data(self):
|
||||
cleaning_data = read_dataframe_from_s3_parquet(
|
||||
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
|
||||
)
|
||||
|
||||
return cleaning_data
|
||||
|
||||
@pytest.fixture()
|
||||
def epc_records_1(self):
|
||||
epc_records_1 = {
|
||||
'original_epc': {
|
||||
'low-energy-fixed-light-count': '', 'address': '139 School Road, Hall Green',
|
||||
'uprn-source': 'Energy Assessor', 'floor-height': '2.6', 'heating-cost-potential': '1138',
|
||||
'unheated-corridor-length': '', 'hot-water-cost-potential': '175',
|
||||
'construction-age-band': 'England and Wales: 1900-1929', 'potential-energy-rating': 'B',
|
||||
'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Very Good',
|
||||
'environment-impact-potential': '82', 'glazed-type': 'double glazing, unknown install date',
|
||||
'heating-cost-current': '2711', 'address3': '',
|
||||
'mainheatcont-description': 'Programmer, TRVs and bypass',
|
||||
'sheating-energy-eff': 'N/A', 'property-type': 'House', 'local-authority-label': 'Birmingham',
|
||||
'fixed-lighting-outlets-count': '11', 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural',
|
||||
'hot-water-cost-current': '310', 'county': '', 'postcode': 'B28 8JF', 'solar-water-heating-flag': 'N',
|
||||
'constituency': 'E14000562', 'co2-emissions-potential': '2.0', 'number-heated-rooms': '4',
|
||||
'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '107',
|
||||
'local-authority': 'E08000025', 'built-form': 'Semi-Detached', 'number-open-fireplaces': '0',
|
||||
'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2023-07-05',
|
||||
'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '65', 'address1': '139 School Road',
|
||||
'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Birmingham, Hall Green',
|
||||
'roof-energy-eff': 'Average', 'total-floor-area': '103.0', 'building-reference-number': '10004697322',
|
||||
'environment-impact-current': '43', 'co2-emissions-current': '6.7',
|
||||
'roof-description': 'Pitched, 100 mm loft insulation', 'floor-energy-eff': 'N/A',
|
||||
'number-habitable-rooms': '4', 'address2': 'Hall Green', 'hot-water-env-eff': 'Good',
|
||||
'posttown': 'BIRMINGHAM', 'mainheatc-energy-eff': 'Average', 'main-fuel': 'mains gas (not community)',
|
||||
'lighting-env-eff': 'Very Good', 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A',
|
||||
'sheating-env-eff': 'N/A', 'lighting-description': 'Low energy lighting in 82% of fixed outlets',
|
||||
'roof-env-eff': 'Average', 'walls-energy-eff': 'Very Poor', 'photo-supply': '0.0',
|
||||
'lighting-cost-potential': '182', 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100',
|
||||
'main-heating-controls': '', 'lodgement-datetime': '2023-07-13 08:23:07', 'flat-top-storey': '',
|
||||
'current-energy-rating': 'E', 'secondheat-description': 'None', 'walls-env-eff': 'Very Poor',
|
||||
'transaction-type': 'rental', 'uprn': '100070505235', 'current-energy-efficiency': '51',
|
||||
'energy-consumption-current': '366', 'mainheat-description': 'Boiler and radiators, mains gas',
|
||||
'lighting-cost-current': '182', 'lodgement-date': '2023-07-13', 'extension-count': '0',
|
||||
'mainheatc-env-eff': 'Average',
|
||||
'lmk-key': 'c1d137711da433fb3cced74b1a6848da8bbc1159d076455d26d7b4668982601e',
|
||||
'wind-turbine-count': '0',
|
||||
'tenure': 'Rented (social)', 'floor-level': '', 'potential-energy-efficiency': '84',
|
||||
'hot-water-energy-eff': 'Good', 'low-energy-lighting': '82',
|
||||
'walls-description': 'Solid brick, as built, no insulation (assumed)',
|
||||
'hotwater-description': 'From main system'}, 'full_sap_epc': {}, 'old_data': []
|
||||
}
|
||||
return epc_records_1
|
||||
|
||||
def test_clean_mechanical_ventilation(self, cleaning_data, epc_records_1):
|
||||
# We have an epc with Natural ventilation - the resulting epc should also have natural ventulation
|
||||
|
||||
record = EPCRecord(cleaning_data=cleaning_data)
|
||||
record.prepared_epc = {
|
||||
"mechanical-ventilation": "natural"
|
||||
}
|
||||
record._clean_ventilation()
|
||||
|
||||
assert record.prepared_epc["mechanical-ventilation"] == "natural"
|
||||
|
||||
record2 = EPCRecord(cleaning_data=cleaning_data)
|
||||
record2.prepared_epc = {
|
||||
"mechanical-ventilation": ""
|
||||
}
|
||||
|
||||
record2._clean_ventilation()
|
||||
|
||||
assert record2.prepared_epc["mechanical-ventilation"] is None
|
||||
|
||||
record3 = EPCRecord(cleaning_data=cleaning_data)
|
||||
record3.prepared_epc = {
|
||||
"mechanical-ventilation": None
|
||||
}
|
||||
|
||||
record3._clean_ventilation()
|
||||
|
||||
assert record3.prepared_epc["mechanical-ventilation"] is None
|
||||
|
||||
record4 = EPCRecord(cleaning_data=cleaning_data)
|
||||
record4.prepared_epc = {
|
||||
"mechanical-ventilation": "INVALID"
|
||||
}
|
||||
|
||||
record4._clean_ventilation()
|
||||
|
||||
assert record4.prepared_epc["mechanical-ventilation"] is None
|
||||
|
||||
def test_clean_energy_valid_values(self, cleaning_data, epc_records_1):
|
||||
record = EPCRecord(cleaning_data=cleaning_data)
|
||||
record.prepared_epc = {
|
||||
"energy-consumption-current": "200",
|
||||
"co2-emissions-current": "5.5"
|
||||
}
|
||||
record._clean_energy()
|
||||
|
||||
assert record.prepared_epc["energy-consumption-current"] == 200.0
|
||||
assert record.prepared_epc["co2-emissions-current"] == 5.5
|
||||
|
||||
def test_clean_energy_empty_values(self, cleaning_data):
|
||||
# We cannot have invalid values so this should raise an exception
|
||||
record = EPCRecord(cleaning_data=cleaning_data)
|
||||
record.prepared_epc = {
|
||||
"energy-consumption-current": "",
|
||||
"co2-emissions-current": ""
|
||||
}
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
record._clean_energy()
|
||||
|
||||
def test_clean_built_form_valid_remap(self, cleaning_data, epc_records_1):
|
||||
record = EPCRecord(cleaning_data=cleaning_data)
|
||||
# Assuming "Semi" should be remapped to "Semi-Detached"
|
||||
record.prepared_epc = {
|
||||
"built-form": "Semi-Detached",
|
||||
"property-type": "Flat" # Assuming this affects the remapping
|
||||
}
|
||||
record._clean_built_form()
|
||||
|
||||
assert record.prepared_epc["built-form"] == "Semi-Detached"
|
||||
|
||||
def test_clean_built_form_anomaly(self, cleaning_data, epc_records_1):
|
||||
record = EPCRecord(cleaning_data=cleaning_data)
|
||||
|
||||
record.prepared_epc = {
|
||||
"built-form": "",
|
||||
"property-type": "Flat"
|
||||
}
|
||||
record._clean_built_form()
|
||||
|
||||
assert record.prepared_epc["built-form"] == "End-Terrace"
|
||||
|
||||
def test_clean_floor_area_valid(self, cleaning_data):
|
||||
record = EPCRecord(cleaning_data=cleaning_data)
|
||||
record.prepared_epc = {
|
||||
"total-floor-area": "120.5"
|
||||
}
|
||||
record._clean_floor_area()
|
||||
|
||||
assert record.prepared_epc["total-floor-area"] == 120.5
|
||||
|
||||
def test_clean_floor_area_empty(self, cleaning_data):
|
||||
record = EPCRecord(cleaning_data=cleaning_data)
|
||||
record.prepared_epc = {
|
||||
"total-floor-area": ""
|
||||
}
|
||||
# We have no known case of missing floor area
|
||||
with pytest.raises(ValueError):
|
||||
record._clean_floor_area()
|
||||
|
||||
def test_clean_heat_loss_corridor_valid(self, cleaning_data):
|
||||
record = EPCRecord(cleaning_data=cleaning_data)
|
||||
record.prepared_epc = {
|
||||
"heat-loss-corridor": "unheated corridor",
|
||||
"unheated-corridor-length": ""
|
||||
}
|
||||
record._clean_heat_loss_corridor()
|
||||
|
||||
assert record.prepared_epc["heat-loss-corridor"] == "unheated corridor"
|
||||
|
||||
record = EPCRecord(cleaning_data=cleaning_data)
|
||||
record.prepared_epc = {
|
||||
"heat-loss-corridor": "unheated corridor",
|
||||
"unheated-corridor-length": None
|
||||
}
|
||||
record._clean_heat_loss_corridor()
|
||||
|
||||
assert record.prepared_epc["heat-loss-corridor"] == "unheated corridor"
|
||||
assert record.prepared_epc["unheated-corridor-length"] is None
|
||||
|
||||
def test_clean_heat_loss_corridor_anomaly(self, cleaning_data):
|
||||
record = EPCRecord(cleaning_data=cleaning_data)
|
||||
# Assuming "InvalidCorridor" is an anomaly
|
||||
record.prepared_epc = {
|
||||
"heat-loss-corridor": "InvalidCorridor",
|
||||
"unheated-corridor-length": ""
|
||||
}
|
||||
record._clean_heat_loss_corridor()
|
||||
|
||||
assert record.prepared_epc["heat-loss-corridor"] == "no corridor"
|
||||
|
||||
def test_clean_mains_gas_valid(self, cleaning_data):
|
||||
record = EPCRecord(cleaning_data=cleaning_data)
|
||||
record.prepared_epc = {
|
||||
"mains-gas-flag": "Y"
|
||||
}
|
||||
record._clean_mains_gas()
|
||||
|
||||
assert record.prepared_epc["mains-gas-flag"] is True
|
||||
|
||||
def test_clean_mains_gas_anomaly(self, cleaning_data):
|
||||
record = EPCRecord(cleaning_data=cleaning_data)
|
||||
record.prepared_epc = {
|
||||
"mains-gas-flag": "InvalidValue"
|
||||
}
|
||||
# It should always be Y or N or an anomally value
|
||||
with pytest.raises(KeyError):
|
||||
record._clean_mains_gas()
|
||||
|
||||
record = EPCRecord(cleaning_data=cleaning_data)
|
||||
record.prepared_epc = {
|
||||
"mains-gas-flag": random.choice(list(DATA_ANOMALY_MATCHES))
|
||||
}
|
||||
record._clean_mains_gas()
|
||||
|
||||
assert record.prepared_epc["mains-gas-flag"] is None
|
||||
|
||||
def test_clean_solar_hot_water_valid(self, cleaning_data):
|
||||
record = EPCRecord(cleaning_data=cleaning_data)
|
||||
record.prepared_epc = {
|
||||
"solar-water-heating-flag": "Y"
|
||||
}
|
||||
record._clean_solar_hot_water()
|
||||
|
||||
assert record.prepared_epc["solar-water-heating-flag"] == "Y"
|
||||
assert record.solar_water_heating_flag_bool is True
|
||||
|
||||
def test_clean_solar_hot_water_empty(self, cleaning_data):
|
||||
record = EPCRecord(cleaning_data=cleaning_data)
|
||||
record.prepared_epc = {
|
||||
"solar-water-heating-flag": ""
|
||||
}
|
||||
record._clean_solar_hot_water()
|
||||
|
||||
assert record.prepared_epc["solar-water-heating-flag"] == "N"
|
||||
assert record.solar_water_heating_flag_bool is False
|
||||
|
||||
def test_clean_number_lighting_outlets_valid(self, cleaning_data, epc_records_1):
|
||||
record = EPCRecord(cleaning_data=cleaning_data, epc_records=epc_records_1)
|
||||
record.prepared_epc = {
|
||||
"fixed-lighting-outlets-count": "5"
|
||||
}
|
||||
record._clean_number_lighting_outlets()
|
||||
|
||||
assert record.prepared_epc["fixed-lighting-outlets-count"] == 5.0
|
||||
|
||||
def test_clean_number_lighting_outlets_empty(self, cleaning_data, epc_records_1):
|
||||
record = EPCRecord(cleaning_data=cleaning_data)
|
||||
record.run_mode = "newdata"
|
||||
record.prepared_epc = {
|
||||
"fixed-lighting-outlets-count": "",
|
||||
"property-type": "Flat",
|
||||
"built-form": "Semi-Detached",
|
||||
"construction-age-band": "England and Wales: 1900-1929",
|
||||
"local-authority": "E08000025",
|
||||
"number-habitable-rooms": "4",
|
||||
"number-heated-rooms": "4",
|
||||
}
|
||||
record.old_data = []
|
||||
record.full_sap_epc = []
|
||||
record._clean_number_lighting_outlets()
|
||||
|
||||
assert record.prepared_epc["fixed-lighting-outlets-count"] == 8.0
|
||||
|
||||
def test_clean_count_variables(self, cleaning_data):
|
||||
record = EPCRecord(cleaning_data=cleaning_data)
|
||||
|
||||
record.prepared_epc = {
|
||||
"number-open-fireplaces": "1",
|
||||
"extension-count": None,
|
||||
"flat-storey-count": "",
|
||||
"number-habitable-rooms": "INVALID!",
|
||||
}
|
||||
|
||||
record._clean_count_variables()
|
||||
|
||||
assert record.prepared_epc["number-open-fireplaces"] == 1.0
|
||||
assert record.prepared_epc["extension-count"] == 0
|
||||
assert record.prepared_epc["flat-storey-count"] is None
|
||||
assert record.prepared_epc["number-habitable-rooms"] is None
|
||||
|
||||
def test_clean_floor_level(self, cleaning_data):
|
||||
record = EPCRecord(cleaning_data=cleaning_data)
|
||||
|
||||
record.prepared_epc = {
|
||||
"floor-level": "1",
|
||||
}
|
||||
|
||||
record._clean_floor_level()
|
||||
|
||||
assert record.prepared_epc["floor-level"] == 1.0
|
||||
|
||||
record = EPCRecord(cleaning_data=cleaning_data)
|
||||
|
||||
record.prepared_epc = {
|
||||
"floor-level": "",
|
||||
}
|
||||
|
||||
record._clean_floor_level()
|
||||
|
||||
assert record.prepared_epc["floor-level"] is None
|
||||
|
||||
record = EPCRecord(cleaning_data=cleaning_data)
|
||||
|
||||
record.prepared_epc = {
|
||||
"floor-level": None,
|
||||
}
|
||||
|
||||
record._clean_floor_level()
|
||||
|
||||
assert record.prepared_epc["floor-level"] is None
|
||||
|
||||
def test_clean_solar_hot_water(self, cleaning_data):
|
||||
record = EPCRecord(cleaning_data=cleaning_data)
|
||||
|
||||
record.prepared_epc = {
|
||||
"solar-water-heating-flag": "Y",
|
||||
}
|
||||
|
||||
record._clean_solar_hot_water()
|
||||
|
||||
assert record.prepared_epc["solar-water-heating-flag"] == "Y"
|
||||
assert record.solar_water_heating_flag_bool is True
|
||||
|
||||
record = EPCRecord(cleaning_data=cleaning_data)
|
||||
|
||||
record.prepared_epc = {
|
||||
"solar-water-heating-flag": "N",
|
||||
}
|
||||
|
||||
record._clean_solar_hot_water()
|
||||
|
||||
assert record.prepared_epc["solar-water-heating-flag"] == "N"
|
||||
assert record.solar_water_heating_flag_bool is False
|
||||
|
||||
record = EPCRecord(cleaning_data=cleaning_data)
|
||||
|
||||
record.prepared_epc = {
|
||||
"solar-water-heating-flag": "",
|
||||
}
|
||||
|
||||
record._clean_solar_hot_water()
|
||||
|
||||
assert record.prepared_epc["solar-water-heating-flag"] == "N"
|
||||
assert record.solar_water_heating_flag_bool is False
|
||||
|
||||
record = EPCRecord(cleaning_data=cleaning_data)
|
||||
|
||||
record.prepared_epc = {
|
||||
"solar-water-heating-flag": None,
|
||||
}
|
||||
|
||||
record._clean_solar_hot_water()
|
||||
|
||||
assert record.prepared_epc["solar-water-heating-flag"] == "N"
|
||||
assert record.solar_water_heating_flag_bool is False
|
||||
|
|
@ -35,9 +35,12 @@ def app():
|
|||
|
||||
cleaned_data = {}
|
||||
epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
|
||||
for directory in tqdm(epc_directories):
|
||||
|
||||
WALLS = []
|
||||
for directory in tqdm(epc_directories):
|
||||
data = pd.read_csv(directory / "certificates.csv", low_memory=False)
|
||||
z = data["WALLS_DESCRIPTION"].unique().tolist()
|
||||
WALLS.extend(z)
|
||||
# Rename the columns to the same format as the api returns
|
||||
data.columns = [c.replace("_", "-").lower() for c in data.columns]
|
||||
# Take just date before the date threshold
|
||||
|
|
|
|||
|
|
@ -61,7 +61,8 @@ class MainHeatAttributes(Definitions):
|
|||
REMAP = {
|
||||
"electric ceiling": "electric ceiling heating",
|
||||
"electric heat pumps": "electric heat pump",
|
||||
"solar-assisted heat pump": "solar assisted heat pump"
|
||||
"solar-assisted heat pump": "solar assisted heat pump",
|
||||
"portable electric heating": "portable electric heaters",
|
||||
}
|
||||
|
||||
edge_case_result = {}
|
||||
|
|
@ -138,6 +139,8 @@ class MainHeatAttributes(Definitions):
|
|||
result.update({f'has_{ft.replace(" ", "_")}': False for ft in self.FUEL_TYPES})
|
||||
result.update({f'has_{ot.replace(" ", "_")}': False for ot in self.OTHERS})
|
||||
result['has_underfloor_heating'] = False
|
||||
# We re-map entries that are the same
|
||||
# We just drop those keys
|
||||
|
||||
if self.nodata:
|
||||
return result
|
||||
|
|
|
|||
|
|
@ -33,6 +33,12 @@ class RoofAttributes(Definitions):
|
|||
"ystafell(oedd) to, dim inswleiddio": "roof room(s), no insulation",
|
||||
}
|
||||
|
||||
DEFAULT_KEYS = [
|
||||
'thermal_transmittance', 'thermal_transmittance_unit', 'is_pitched', 'is_roof_room',
|
||||
'is_loft', 'is_flat', 'is_thatched', 'is_at_rafters', 'is_assumed', 'has_dwelling_above',
|
||||
'is_valid', 'insulation_thickness'
|
||||
]
|
||||
|
||||
def __init__(self, description: str):
|
||||
"""
|
||||
:param description: Description of the roof.
|
||||
|
|
@ -95,6 +101,8 @@ class RoofAttributes(Definitions):
|
|||
result: Dict[str, Union[float, str, bool, None]] = {}
|
||||
|
||||
if self.nodata:
|
||||
for key in self.DEFAULT_KEYS:
|
||||
result[key] = False
|
||||
return result
|
||||
|
||||
description = self.description
|
||||
|
|
@ -114,6 +122,13 @@ class RoofAttributes(Definitions):
|
|||
result["is_valid"] = "invalid" not in description
|
||||
description = description.replace("invalid", "")
|
||||
|
||||
# We handle an edge case where the description is "pitched, 150 loft insulation" and is missing the mm
|
||||
if result["is_pitched"] or result["is_loft"]:
|
||||
# Search for a regular expression that matches 150 insulation
|
||||
match = re.search(r"(\d+\+?)\s*insulation", description)
|
||||
if match:
|
||||
result['insulation_thickness'] = match.group(1)
|
||||
|
||||
# insulation thickness
|
||||
thickness_map = {
|
||||
"ceiling insulated": "average",
|
||||
|
|
@ -129,11 +144,11 @@ class RoofAttributes(Definitions):
|
|||
# Remove the match from the description
|
||||
# description = description.replace(key, "")
|
||||
break
|
||||
else:
|
||||
# Extract insulation thickness in mm, if present
|
||||
match = re.search(r'(\d+\+?)\s*mm', description)
|
||||
if match:
|
||||
result['insulation_thickness'] = match.group(1)
|
||||
|
||||
# Extract insulation thickness in mm, if present
|
||||
match = re.search(r'(\d+\+?)\s*mm', description)
|
||||
if match:
|
||||
result['insulation_thickness'] = match.group(1)
|
||||
|
||||
if "insulation_thickness" not in result:
|
||||
result['insulation_thickness'] = None
|
||||
|
|
|
|||
|
|
@ -68,6 +68,13 @@ class WallAttributes(Definitions):
|
|||
'Cowith external insulation': 'Cob, with external insulation',
|
||||
}
|
||||
|
||||
DEFAULT_KEYS = [
|
||||
'thermal_transmittance', 'thermal_transmittance_unit', 'is_cavity_wall', 'is_filled_cavity',
|
||||
'is_solid_brick', 'is_system_built', 'is_timber_frame', 'is_granite_or_whinstone',
|
||||
'is_as_built', 'is_cob', 'is_assumed', 'is_sandstone_or_limestone',
|
||||
'insulation_thickness', 'external_insulation', 'internal_insulation'
|
||||
]
|
||||
|
||||
def __init__(self, description: str):
|
||||
"""
|
||||
:param description: Description of the walls.
|
||||
|
|
@ -98,6 +105,9 @@ class WallAttributes(Definitions):
|
|||
def process(self) -> Dict[str, Union[float, str, bool, None]]:
|
||||
result: Dict[str, Union[float, str, bool, None]] = {}
|
||||
if self.nodata:
|
||||
for key in self.DEFAULT_KEYS:
|
||||
result[key] = False
|
||||
|
||||
return result
|
||||
|
||||
description = self.description.lower()
|
||||
|
|
@ -142,4 +152,7 @@ class WallAttributes(Definitions):
|
|||
else:
|
||||
result["insulation_thickness"] = "average"
|
||||
|
||||
if result["is_cavity_wall"] & result["is_as_built"] & (result["insulation_thickness"] == "average"):
|
||||
result["is_filled_cavity"] = True
|
||||
|
||||
return result
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ class WindowAttributes(Definitions):
|
|||
raise ValueError('Invalid description')
|
||||
|
||||
def process(self) -> Dict[str, Union[str, bool]]:
|
||||
result: Dict[str, Union[str, bool]] = {
|
||||
result: Dict[str, Union[str, bool, None]] = {
|
||||
"has_glazing": False,
|
||||
"glazing_coverage": None,
|
||||
"glazing_type": None,
|
||||
|
|
@ -80,7 +80,11 @@ class WindowAttributes(Definitions):
|
|||
break
|
||||
|
||||
# If we didn't find any coverage or type, we assume full coverage
|
||||
if not result["glazing_coverage"]:
|
||||
if (not result["glazing_coverage"]) & (result["glazing_type"] != "single"):
|
||||
result["glazing_coverage"] = "full"
|
||||
|
||||
# We reset some values if the glazing is single
|
||||
if result["glazing_type"] == "single":
|
||||
result["has_glazing"] = False
|
||||
|
||||
return result
|
||||
|
|
|
|||
|
|
@ -1652,4 +1652,17 @@ mainheat_cases = [
|
|||
'has_electricaire': False, 'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False,
|
||||
"has_electric_heat_pumps": False,
|
||||
"has_micro-cogeneration": False},
|
||||
{'original_description': 'Portable electric heating assumed for most rooms', 'has_radiators': False,
|
||||
'has_fan_coil_units': False, 'has_pipes_in_screed_above_insulation': False,
|
||||
'has_pipes_in_insulated_timber_floor': False, 'has_pipes_in_concrete_slab': False, 'has_boiler': False,
|
||||
'has_air_source_heat_pump': False, 'has_room_heaters': False, 'has_electric_storage_heaters': False,
|
||||
'has_warm_air': False, 'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False,
|
||||
'has_community_scheme': False, 'has_ground_source_heat_pump': False, 'has_no_system_present': False,
|
||||
'has_portable_electric_heaters': False, 'has_water_source_heat_pump': False, 'has_electric_heat_pump': False,
|
||||
'has_micro-cogeneration': False, 'has_solar_assisted_heat_pump': False, 'has_exhaust_source_heat_pump': False,
|
||||
'has_community_heat_pump': False, 'has_portable_electric_heating': True, 'has_electric': True,
|
||||
'has_mains_gas': False, 'has_wood_logs': False, 'has_coal': False, 'has_oil': False, 'has_wood_pellets': False,
|
||||
'has_anthracite': False, 'has_dual_fuel_mineral_and_wood': False, 'has_smokeless_fuel': False, 'has_lpg': False,
|
||||
'has_b30k': False, 'has_assumed': True, 'has_electricaire': False, 'has_assumed_for_most_rooms': True,
|
||||
'has_underfloor_heating': False}
|
||||
]
|
||||
|
|
|
|||
|
|
@ -550,7 +550,7 @@ wall_cases = [
|
|||
'is_as_built': False, 'is_cob': False, 'is_assumed': False, 'is_sandstone_or_limestone': False,
|
||||
'insulation_thickness': None, 'external_insulation': False, 'internal_insulation': False},
|
||||
{'original_description': 'Cavity wall, as built, insulated (assumed)', 'thermal_transmittance': None,
|
||||
'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': False, 'is_solid_brick': False,
|
||||
'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': True, 'is_solid_brick': False,
|
||||
'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': True,
|
||||
'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average',
|
||||
'external_insulation': False, 'internal_insulation': False},
|
||||
|
|
@ -727,7 +727,7 @@ wall_cases = [
|
|||
'external_insulation': False, 'internal_insulation': False},
|
||||
{'original_description': 'Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)',
|
||||
'thermal_transmittance': None,
|
||||
'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': False, 'is_solid_brick': False,
|
||||
'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': True, 'is_solid_brick': False,
|
||||
'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': True,
|
||||
'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average',
|
||||
'external_insulation': False, 'internal_insulation': False},
|
||||
|
|
|
|||
|
|
@ -30,7 +30,8 @@ windows_cases = [
|
|||
'glazing_type': 'triple', 'no_data': False},
|
||||
{'original_description': 'Gwydrau triphlyg rhannol', 'has_glazing': True, 'glazing_coverage': 'partial',
|
||||
'glazing_type': 'triple', 'no_data': False},
|
||||
{'original_description': 'Single glazed', 'has_glazing': True, 'glazing_coverage': 'full', 'glazing_type': 'single',
|
||||
{'original_description': 'Single glazed', 'has_glazing': False, 'glazing_coverage': None,
|
||||
'glazing_type': 'single',
|
||||
'no_data': False},
|
||||
{'original_description': 'Some double glazing', 'has_glazing': True, 'glazing_coverage': 'partial',
|
||||
'glazing_type': 'double', 'no_data': False},
|
||||
|
|
@ -46,7 +47,8 @@ windows_cases = [
|
|||
'glazing_type': 'double', 'no_data': False},
|
||||
{'original_description': 'Gwydrau dwbl gan mwyaf', 'has_glazing': True, 'glazing_coverage': 'most',
|
||||
'glazing_type': 'double', 'no_data': False},
|
||||
{'original_description': 'Gwydrau sengl', 'has_glazing': True, 'glazing_coverage': 'full', 'glazing_type': 'single',
|
||||
{'original_description': 'Gwydrau sengl', 'has_glazing': False, 'glazing_coverage': None,
|
||||
'glazing_type': 'single',
|
||||
'no_data': False},
|
||||
{'original_description': 'Ffenestri perfformiad uchel', 'has_glazing': True, 'glazing_coverage': 'full',
|
||||
'glazing_type': 'high performance', 'no_data': False},
|
||||
|
|
|
|||
|
|
@ -3,12 +3,13 @@ from pathlib import Path
|
|||
from etl.epc_clean.tests.test_data.test_roof_attributes_cases import clean_roof_test_cases
|
||||
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
|
||||
|
||||
|
||||
# For local testing
|
||||
if __file__ == "<input>":
|
||||
input_data_path = Path("./model_data/tests/test_data/EpcClean_inputs.obj")
|
||||
else:
|
||||
current_file_path = Path(__file__)
|
||||
input_data_path = current_file_path.parent / 'test_data' / 'EpcClean_inputs.obj'
|
||||
# if __file__ == "<input>":
|
||||
# input_data_path = Path("./model_data/tests/test_data/EpcClean_inputs.obj")
|
||||
# else:
|
||||
# current_file_path = Path(__file__)
|
||||
# input_data_path = current_file_path.parent / 'test_data' / 'EpcClean_inputs.obj'
|
||||
|
||||
|
||||
class TestRoofAttributes:
|
||||
|
|
@ -88,7 +89,12 @@ class TestRoofAttributes:
|
|||
|
||||
def test_clean_roof_no_description(self):
|
||||
roof = RoofAttributes('').process()
|
||||
assert roof == {}
|
||||
assert roof == {
|
||||
'thermal_transmittance': False, 'thermal_transmittance_unit': False, 'is_pitched': False,
|
||||
'is_roof_room': False, 'is_loft': False, 'is_flat': False, 'is_thatched': False,
|
||||
'is_at_rafters': False, 'is_assumed': False, 'has_dwelling_above': False, 'is_valid': False,
|
||||
'insulation_thickness': False
|
||||
}
|
||||
|
||||
def test_clean_roof_edge_cases(self):
|
||||
# Insulation thickness edge case
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ from pathlib import Path
|
|||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
from etl.epc.settings import EARLIEST_EPC_DATE
|
||||
from etl.epc.DataProcessor import DataProcessor
|
||||
from etl.epc.DataProcessor import EPCDataProcessor
|
||||
from BaseUtility import Definitions
|
||||
from utils.s3 import save_dataframe_to_s3_parquet
|
||||
|
||||
|
|
@ -21,24 +21,31 @@ BUCKET = os.environ.get("BUCKET", "retrofit-data-dev")
|
|||
def app():
|
||||
directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
|
||||
|
||||
sample = []
|
||||
for directory in tqdm(directories):
|
||||
|
||||
data = pd.read_csv(directory / "certificates.csv", low_memory=False)
|
||||
|
||||
data = data[data["LODGEMENT_DATE"] >= EARLIEST_EPC_DATE]
|
||||
data = data[~pd.isnull(data["UPRN"])]
|
||||
data["TOTAL_FLOOR_AREA"] = data["TOTAL_FLOOR_AREA"].astype(float)
|
||||
|
||||
data["CONSTRUCTION_AGE_BAND"] = data["CONSTRUCTION_AGE_BAND"].apply(
|
||||
lambda x: DataProcessor.clean_construction_age_band(x)
|
||||
lambda x: EPCDataProcessor.clean_construction_age_band(x)
|
||||
)
|
||||
data = data[~pd.isnull(data["CONSTRUCTION_AGE_BAND"])]
|
||||
data = data[~data["CONSTRUCTION_AGE_BAND"].isin(Definitions.DATA_ANOMALY_MATCHES)]
|
||||
data = data[~pd.isnull(data["TOTAL_FLOOR_AREA"])]
|
||||
data = data[~pd.isnull(data["NUMBER_HABITABLE_ROOMS"])]
|
||||
data = data[~pd.isnull(data["FLOOR_HEIGHT"])]
|
||||
data = data[~pd.isnull(data["NUMBER_HEATED_ROOMS"])]
|
||||
|
||||
df = (
|
||||
data.groupby(GROUPBY)
|
||||
.agg({"NUMBER_HABITABLE_ROOMS": "median", "TOTAL_FLOOR_AREA": "mean", "FLOOR_HEIGHT": "mean"})
|
||||
.agg(
|
||||
{"NUMBER_HEATED_ROOMS": "median", "NUMBER_HABITABLE_ROOMS": "median", "TOTAL_FLOOR_AREA": "mean",
|
||||
"FLOOR_HEIGHT": "mean"}
|
||||
)
|
||||
.reset_index()
|
||||
)
|
||||
|
||||
|
|
|
|||
244
etl/solar/SolarPhotoSupply.py
Normal file
244
etl/solar/SolarPhotoSupply.py
Normal file
|
|
@ -0,0 +1,244 @@
|
|||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
from utils.s3 import save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
|
||||
from utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
class SolarPhotoSupply:
|
||||
DATASET_COLUMNS = [
|
||||
"UPRN", "PROPERTY_TYPE", "TENURE", "BUILT_FORM", "ROOF_DESCRIPTION", "PHOTO_SUPPLY", "TOTAL_FLOOR_AREA",
|
||||
"CONSTRUCTION_AGE_BAND", "SOLAR_WATER_HEATING_FLAG"
|
||||
]
|
||||
|
||||
def __init__(self, file_directories, cleaned_lookup):
|
||||
"""
|
||||
Initialize the SolarPhotoSupply class with file directories and a cleaned lookup. Currently, this class
|
||||
just works with locally stored data, but this could be extended to work with data stored in S3.
|
||||
|
||||
:param file_directories: A list of directories where files are stored.
|
||||
:param cleaned_lookup: A dictionary containing cleaned lookup data.
|
||||
"""
|
||||
self.file_directories = file_directories
|
||||
|
||||
self.results = []
|
||||
self.decile_thresholds = None
|
||||
|
||||
self.roof_lookup = pd.DataFrame(cleaned_lookup.get("roof-description"))
|
||||
|
||||
self.photo_supply_lookup = pd.DataFrame()
|
||||
self.floor_area_decile_thresholds = pd.DataFrame()
|
||||
|
||||
def create_dataset(self):
|
||||
"""
|
||||
Create a dataset from the provided file directories. This method processes the data files,
|
||||
applies transformations, and aggregates data into a useful format.
|
||||
"""
|
||||
|
||||
if self.roof_lookup.empty:
|
||||
raise ValueError("No roof lookup data")
|
||||
|
||||
results = []
|
||||
|
||||
logger.info("Creating solar photo supply dataset")
|
||||
for dir in tqdm(self.file_directories):
|
||||
filepath = dir / "certificates.csv"
|
||||
df = pd.read_csv(filepath, low_memory=False)
|
||||
df = df[~pd.isnull(df["UPRN"])]
|
||||
df["UPRN"] = df["UPRN"].astype(int).astype(str)
|
||||
# Drop rows that have a missing PROPERTY_TYPE, BUILT_FORM, CONSTRUCTION_AGE_BAND, TOTAL_FLOOR_AREA
|
||||
for col in ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA"]:
|
||||
df = df[~pd.isnull(df[col])]
|
||||
# Take newest LODGEMENT_DATE per UPRN
|
||||
df = df.sort_values(by="LODGEMENT_DATE", ascending=False).drop_duplicates(subset=["UPRN"])
|
||||
|
||||
data = df[self.DATASET_COLUMNS].copy()
|
||||
data["PHOTO_SUPPLY"] = data["PHOTO_SUPPLY"].fillna(0)
|
||||
data = data[data["PHOTO_SUPPLY"] != 0]
|
||||
results.append(data)
|
||||
|
||||
self.results = pd.concat(results)
|
||||
|
||||
# Convert total floor area to deciles
|
||||
self.decile_thresholds = self.results["TOTAL_FLOOR_AREA"].quantile(
|
||||
[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
|
||||
).values
|
||||
|
||||
self.results["floor_area_decile"] = pd.cut(
|
||||
self.results["TOTAL_FLOOR_AREA"],
|
||||
bins=[0] + list(self.decile_thresholds) + [float('inf')],
|
||||
labels=False,
|
||||
include_lowest=True
|
||||
)
|
||||
|
||||
# Convert tenure to lower
|
||||
self.results["TENURE"] = self.results["TENURE"].str.lower()
|
||||
|
||||
self.results = self.results.merge(
|
||||
self.roof_lookup.drop(
|
||||
columns=[
|
||||
"clean_description", "thermal_transmittance", "thermal_transmittance_unit", "insulation_thickness",
|
||||
"is_assumed"
|
||||
]
|
||||
),
|
||||
left_on="ROOF_DESCRIPTION",
|
||||
right_on="original_description",
|
||||
how="left"
|
||||
)
|
||||
|
||||
self.photo_supply_lookup = self.results.groupby(
|
||||
[
|
||||
"PROPERTY_TYPE", "BUILT_FORM", "TENURE", "is_pitched", "is_roof_room", "is_flat",
|
||||
"CONSTRUCTION_AGE_BAND", "floor_area_decile"
|
||||
],
|
||||
observed=True
|
||||
).agg(
|
||||
{
|
||||
"PHOTO_SUPPLY": ["median", "mean"],
|
||||
}
|
||||
).reset_index()
|
||||
|
||||
self.photo_supply_lookup.columns = ['_'.join(col).strip() for col in self.photo_supply_lookup.columns.values]
|
||||
# Remove trailing underscore from columns
|
||||
self.photo_supply_lookup.columns = [
|
||||
col[:-1] if col.endswith("_") else col for col in self.photo_supply_lookup.columns.values
|
||||
]
|
||||
# Convert columns to lowercase
|
||||
self.photo_supply_lookup.columns = [col.lower() for col in self.photo_supply_lookup.columns.values]
|
||||
|
||||
self.floor_area_decile_thresholds = pd.DataFrame(
|
||||
self.decile_thresholds,
|
||||
columns=["floor_area_decile_thresholds"]
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def classify_floor_area(new_area, thresholds):
|
||||
"""
|
||||
Classify a given floor area into a decile based on provided thresholds.
|
||||
|
||||
:param new_area: The new floor area to be classified.
|
||||
:param thresholds: A list of thresholds used for classification.
|
||||
:return: An integer representing the decile index.
|
||||
"""
|
||||
|
||||
for i, threshold in enumerate(thresholds):
|
||||
if new_area <= threshold:
|
||||
return i # Returns the decile index (0 to 9)
|
||||
return len(thresholds)
|
||||
|
||||
def save(self):
|
||||
"""
|
||||
Save the processed data to an S3 bucket in the parquet format. This method also handles
|
||||
logging and validation to ensure data is present before saving.
|
||||
"""
|
||||
if self.photo_supply_lookup.empty:
|
||||
raise ValueError("No data to save")
|
||||
|
||||
logger.info("Storing outputs to S3")
|
||||
# Store this data in s3 as a parquet file
|
||||
|
||||
save_dataframe_to_s3_parquet(
|
||||
df=self.photo_supply_lookup,
|
||||
bucket_name="retrofit-data-dev",
|
||||
file_key="solar_pv_supply/photo_supply_lookup.parquet",
|
||||
)
|
||||
|
||||
save_dataframe_to_s3_parquet(
|
||||
df=self.floor_area_decile_thresholds,
|
||||
bucket_name="retrofit-data-dev",
|
||||
file_key=f"solar_pv_supply/floor_area_decile_thresholds.parquet",
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def load(bucket):
|
||||
"""
|
||||
Load datasets from an S3 bucket.
|
||||
|
||||
:param bucket: The name of the S3 bucket to load data from.
|
||||
:return: A tuple containing photo supply lookup and floor area decile thresholds dataframes.
|
||||
"""
|
||||
photo_supply_lookup = read_dataframe_from_s3_parquet(
|
||||
bucket_name=bucket, file_key="solar_pv_supply/photo_supply_lookup.parquet",
|
||||
)
|
||||
floor_area_decile_thresholds = read_dataframe_from_s3_parquet(
|
||||
bucket_name=bucket, file_key="solar_pv_supply/floor_area_decile_thresholds.parquet",
|
||||
)
|
||||
|
||||
return photo_supply_lookup, floor_area_decile_thresholds
|
||||
|
||||
@classmethod
|
||||
def filter_photo_supply_lookup(
|
||||
cls,
|
||||
photo_supply_lookup: pd.DataFrame,
|
||||
floor_area_decile_thresholds: pd.DataFrame,
|
||||
tenure: str,
|
||||
built_form: str,
|
||||
property_type: str,
|
||||
construction_age_band: str,
|
||||
is_flat: bool,
|
||||
is_pitched: bool,
|
||||
is_roof_room: bool,
|
||||
floor_area: float
|
||||
):
|
||||
|
||||
"""
|
||||
Filter the photo supply lookup to find the most appropriate photo supply for a given property.
|
||||
:param photo_supply_lookup: The photo supply lookup dataframe.
|
||||
:param floor_area_decile_thresholds: The floor area decile thresholds dataframe.
|
||||
:param tenure: The tenure of the property.
|
||||
:param built_form: The built form of the property.
|
||||
:param property_type: The property type of the property.
|
||||
:param construction_age_band: The construction age band of the property.
|
||||
:param is_flat: Whether the property has a flat roof.
|
||||
:param is_pitched: Whether the property has a pitched roof.
|
||||
:param is_roof_room: Whether the property has a roof room.
|
||||
:param floor_area: The floor area of the property.
|
||||
:return:
|
||||
"""
|
||||
|
||||
# Convert the tenure to lower case, as is done in the creation of the dataset
|
||||
tenure = tenure.lower()
|
||||
# We remap the "not defined"
|
||||
tenure = {
|
||||
"not defined - use in the case of a new dwelling for which the intended tenure in not known. it is not to "
|
||||
"be used for an existing dwelling":
|
||||
"not defined - use in the case of a new dwelling for which the intended tenure in not known. it is no"
|
||||
}.get(tenure, tenure)
|
||||
|
||||
photo_supply_matched = photo_supply_lookup[
|
||||
(photo_supply_lookup["tenure"] == tenure) &
|
||||
(photo_supply_lookup["built_form"] == built_form) &
|
||||
(photo_supply_lookup["property_type"] == property_type) &
|
||||
(photo_supply_lookup["construction_age_band"] == construction_age_band) &
|
||||
(photo_supply_lookup["is_flat"] == is_flat) &
|
||||
(photo_supply_lookup["is_pitched"] == is_pitched) &
|
||||
(photo_supply_lookup["is_roof_room"] == is_roof_room)
|
||||
]
|
||||
|
||||
if photo_supply_matched.empty:
|
||||
# There are a small number of cases where we don't get a full match so try again with a more aggregated
|
||||
# average
|
||||
photo_supply_matched = photo_supply_lookup[
|
||||
(photo_supply_lookup["tenure"] == tenure) &
|
||||
(photo_supply_lookup["built_form"] == built_form) &
|
||||
(photo_supply_lookup["property_type"] == property_type)
|
||||
]
|
||||
if construction_age_band in photo_supply_matched["construction_age_band"].values:
|
||||
photo_supply_matched = photo_supply_matched[
|
||||
photo_supply_matched["construction_age_band"] == construction_age_band
|
||||
]
|
||||
|
||||
if photo_supply_matched.empty:
|
||||
raise ValueError("No photo supply matches")
|
||||
|
||||
floor_area_decile = cls.classify_floor_area(
|
||||
floor_area, floor_area_decile_thresholds["floor_area_decile_thresholds"].values
|
||||
)
|
||||
|
||||
if floor_area_decile in photo_supply_matched["floor_area_decile"].values:
|
||||
photo_supply_matched = photo_supply_matched[
|
||||
photo_supply_matched["floor_area_decile"] == floor_area_decile
|
||||
]
|
||||
|
||||
return photo_supply_matched
|
||||
31
etl/solar/app.py
Normal file
31
etl/solar/app.py
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
from pathlib import Path
|
||||
from etl.epc.property_change_app import get_cleaned
|
||||
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
|
||||
|
||||
DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"
|
||||
|
||||
|
||||
def app():
|
||||
"""
|
||||
This code reads in the EPC data and attempt to produce a reasonable figure for the photo-supply variable, which
|
||||
is the following:
|
||||
"Percentage of photovoltaic area as a percentage of total roof area. 0% indicates that a Photovoltaic Supply
|
||||
is not present in the property."
|
||||
|
||||
When recommending solar, we want to simulate the retrofit by increasing this value from 0, so we need a sensible
|
||||
figure to increase this to. This script will pull the data for that, to allow us to try and deduce what
|
||||
a sensible figure would be
|
||||
:return:
|
||||
"""
|
||||
|
||||
directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
|
||||
cleaned_lookup = get_cleaned()
|
||||
|
||||
solar_data_client = SolarPhotoSupply(
|
||||
file_directories=directories,
|
||||
cleaned_lookup=cleaned_lookup
|
||||
)
|
||||
|
||||
solar_data_client.create_dataset()
|
||||
|
||||
solar_data_client.save()
|
||||
109
etl/solar/tests/test_solar_photo_supply.py
Normal file
109
etl/solar/tests/test_solar_photo_supply.py
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
import unittest
|
||||
import pandas as pd
|
||||
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
|
||||
|
||||
|
||||
class TestSolarPhotoSupply(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
# Mock data for photo_supply_lookup and floor_area_decile_thresholds
|
||||
self.photo_supply_lookup = pd.DataFrame({
|
||||
"tenure": ["leasehold", "freehold"],
|
||||
"built_form": ["detached", "semi-detached"],
|
||||
"property_type": ["house", "flat"],
|
||||
"construction_age_band": ["pre-1900", "1900-1929"],
|
||||
"is_flat": [False, True],
|
||||
"is_pitched": [True, False],
|
||||
"is_roof_room": [False, True],
|
||||
"floor_area_decile": [0, 1],
|
||||
"photo_supply": [100, 200]
|
||||
})
|
||||
|
||||
self.floor_area_decile_thresholds = pd.DataFrame({
|
||||
"floor_area_decile_thresholds": [50, 100]
|
||||
})
|
||||
|
||||
self.solar_photo_supply = SolarPhotoSupply([], {})
|
||||
|
||||
def test_correct_filtering(self):
|
||||
result = self.solar_photo_supply.filter_photo_supply_lookup(
|
||||
self.photo_supply_lookup,
|
||||
self.floor_area_decile_thresholds,
|
||||
"leasehold",
|
||||
"detached",
|
||||
"house",
|
||||
"pre-1900",
|
||||
False,
|
||||
True,
|
||||
False,
|
||||
45
|
||||
)
|
||||
self.assertEqual(len(result), 1)
|
||||
self.assertEqual(result.iloc[0]["photo_supply"], 100)
|
||||
|
||||
def test_no_matches(self):
|
||||
with self.assertRaises(ValueError):
|
||||
self.solar_photo_supply.filter_photo_supply_lookup(
|
||||
self.photo_supply_lookup,
|
||||
self.floor_area_decile_thresholds,
|
||||
"leasehold",
|
||||
"unknown",
|
||||
"house",
|
||||
"pre-1900",
|
||||
False,
|
||||
True,
|
||||
False,
|
||||
45
|
||||
)
|
||||
|
||||
def test_floor_area_decile_matching(self):
|
||||
result = self.solar_photo_supply.filter_photo_supply_lookup(
|
||||
self.photo_supply_lookup,
|
||||
self.floor_area_decile_thresholds,
|
||||
"freehold",
|
||||
"semi-detached",
|
||||
"flat",
|
||||
"1900-1929",
|
||||
True,
|
||||
False,
|
||||
True,
|
||||
60
|
||||
)
|
||||
self.assertEqual(len(result), 1)
|
||||
self.assertEqual(result.iloc[0]["photo_supply"], 200)
|
||||
|
||||
def test_invalid_parameters(self):
|
||||
with self.assertRaises(AttributeError):
|
||||
self.solar_photo_supply.filter_photo_supply_lookup(
|
||||
self.photo_supply_lookup,
|
||||
self.floor_area_decile_thresholds,
|
||||
123, # Invalid type for tenure
|
||||
"detached",
|
||||
"house",
|
||||
"pre-1900",
|
||||
False,
|
||||
True,
|
||||
False,
|
||||
45
|
||||
)
|
||||
|
||||
def test_classify_floor_area(self):
|
||||
# Setup
|
||||
thresholds = [10, 20, 30, 40, 50]
|
||||
solar_photo_supply = SolarPhotoSupply([], {})
|
||||
|
||||
# Test Case 1: Valid floor area
|
||||
floor_area = 25
|
||||
expected_decile = 2
|
||||
result = solar_photo_supply.classify_floor_area(floor_area, thresholds)
|
||||
self.assertEqual(result, expected_decile, "Decile classification did not match expected result")
|
||||
|
||||
# Test Case 2: Out of range floor area
|
||||
floor_area = 60
|
||||
expected_decile = len(thresholds)
|
||||
result = solar_photo_supply.classify_floor_area(floor_area, thresholds)
|
||||
self.assertEqual(result, expected_decile, "Decile classification for out of range value is incorrect")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
179
etl/testing_data/birmingham_pilot.py
Normal file
179
etl/testing_data/birmingham_pilot.py
Normal file
|
|
@ -0,0 +1,179 @@
|
|||
"""
|
||||
This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
|
||||
testing
|
||||
"""
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from epc_api.client import EpcClient
|
||||
from utils.s3 import save_csv_to_s3
|
||||
|
||||
FILE_SIZE = 5
|
||||
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN", None)
|
||||
USER_ID = 8
|
||||
PORTFOLIO_ID = 54
|
||||
|
||||
|
||||
def app():
|
||||
# For this dataset, we want 3 properties, all hourses. A mid-terrace, and end-terrace and a semi-detached
|
||||
|
||||
epc_client = EpcClient(auth_token=EPC_AUTH_TOKEN)
|
||||
|
||||
# Birmingham has a Local Authority Code of E08000025
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~
|
||||
# First example
|
||||
# ~~~~~~~~~~~~~~~~~~~~
|
||||
# Let's take an EPC D property
|
||||
example_1_reponse = epc_client.domestic.search(
|
||||
params={
|
||||
"local-authority": "E08000025",
|
||||
"property-type": "house",
|
||||
},
|
||||
size=1000
|
||||
)
|
||||
example_1_reponse = example_1_reponse["rows"]
|
||||
# Get a property with a cavity wall
|
||||
example_1_reponse_filtered = [
|
||||
x for x in example_1_reponse if
|
||||
"cavity wall, as built, no insulation (assumed)" in x["walls-description"].lower()
|
||||
]
|
||||
example_1_reponse_filtered = [
|
||||
x for x in example_1_reponse_filtered if "pitched, no insulation (assumed)" in x["roof-description"].lower()
|
||||
]
|
||||
# Get a social housing property
|
||||
example_1_reponse_filtered = [
|
||||
x for x in example_1_reponse_filtered if x["tenure"] == "Rented (social)"
|
||||
]
|
||||
|
||||
print(example_1_reponse_filtered[0]["postcode"])
|
||||
# B13 9LT
|
||||
print(example_1_reponse_filtered[0]["address1"])
|
||||
# 113 Tenby Road
|
||||
print(example_1_reponse_filtered[0]["built-form"])
|
||||
# Mid-Terrace
|
||||
print(example_1_reponse_filtered[0]["current-energy-rating"])
|
||||
# 'D'
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~
|
||||
# Second example
|
||||
# ~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
# Let's take an EPC E property
|
||||
example_2_reponse = epc_client.domestic.search(
|
||||
params={
|
||||
"local-authority": "E08000025",
|
||||
"property-type": "house",
|
||||
"energy-band": "e"
|
||||
},
|
||||
size=1000
|
||||
)
|
||||
example_2_reponse = example_2_reponse["rows"]
|
||||
# Get a solid wall example
|
||||
example_2_reponse_filtered = [
|
||||
x for x in example_2_reponse if
|
||||
"solid brick, as built, no insulation (assumed)" in x["walls-description"].lower()
|
||||
]
|
||||
# With some existing loft insulation
|
||||
example_2_reponse_filtered = [
|
||||
x for x in example_2_reponse_filtered if "pitched, 100 mm loft insulation" in x["roof-description"].lower()
|
||||
]
|
||||
# Get a social housing property
|
||||
example_2_reponse_filtered = [
|
||||
x for x in example_2_reponse_filtered if x["tenure"] == "Rented (social)"
|
||||
]
|
||||
|
||||
print(example_2_reponse_filtered[0]["postcode"])
|
||||
# B28 8JF
|
||||
print(example_2_reponse_filtered[0]["address1"])
|
||||
# 139 School Road
|
||||
print(example_2_reponse_filtered[0]["built-form"])
|
||||
# Semi-Detached
|
||||
print(example_2_reponse_filtered[0]["current-energy-rating"])
|
||||
# E
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~
|
||||
# Third example
|
||||
# ~~~~~~~~~~~~~~~~~~~~
|
||||
example_3_reponse = epc_client.domestic.search(
|
||||
params={
|
||||
"local-authority": "E08000025",
|
||||
"property-type": "house",
|
||||
"energy-band": "f"
|
||||
},
|
||||
size=1000
|
||||
)
|
||||
example_3_reponse = example_3_reponse["rows"]
|
||||
# Get a social housing property]
|
||||
example_3_reponse_filtered = [
|
||||
x for x in example_3_reponse if x["tenure"] == "Rented (social)"
|
||||
]
|
||||
|
||||
print(example_3_reponse_filtered[4]["walls-description"])
|
||||
print(example_3_reponse_filtered[4]["floor-description"])
|
||||
print(example_3_reponse_filtered[4]["roof-description"])
|
||||
print(example_3_reponse_filtered[4]["postcode"])
|
||||
# B32 1SL
|
||||
print(example_3_reponse_filtered[4]["address1"])
|
||||
# 77 Simmons Drive
|
||||
print(example_3_reponse_filtered[4]["built-form"])
|
||||
# Semi-Detached
|
||||
|
||||
# ~~~~~~~~~~~~~~~~~~~~
|
||||
# Final example
|
||||
# ~~~~~~~~~~~~~~~~~~~~
|
||||
# Let's take a flat that is a D
|
||||
example_4_reponse = epc_client.domestic.search(
|
||||
params={
|
||||
"local-authority": "E08000025",
|
||||
"property-type": "flat",
|
||||
"energy-band": "d"
|
||||
},
|
||||
size=1000
|
||||
)
|
||||
example_4_reponse = example_4_reponse["rows"]
|
||||
|
||||
example_4_reponse_filtered = [
|
||||
x for x in example_4_reponse if
|
||||
"cavity wall, as built, no insulation (assumed)" in x["walls-description"].lower()
|
||||
]
|
||||
# Get a social housing property
|
||||
example_4_reponse_filtered = [
|
||||
x for x in example_4_reponse_filtered if x["tenure"] == "Rented (social)"
|
||||
]
|
||||
print(example_4_reponse_filtered[0]["postcode"])
|
||||
# B32 1LS
|
||||
print(example_4_reponse_filtered[0]["address1"])
|
||||
# Flat 2
|
||||
|
||||
print(example_4_reponse_filtered[0]["floor-description"])
|
||||
print(example_4_reponse_filtered[0]["property-type"])
|
||||
# Flat
|
||||
|
||||
test_file = pd.DataFrame(
|
||||
[
|
||||
# New properties
|
||||
{"address": "113 Tenby Road", "postcode": "B13 9LT", "Notes": None},
|
||||
{"address": "139 School Road", "postcode": "B28 8JF", "Notes": None},
|
||||
{"address": "77 Simmons Drive", "postcode": "B32 1SL", "Notes": None},
|
||||
{"address": "Flat 2, 54 Wedgewood Road", "postcode": "B32 1LS", "Notes": None},
|
||||
]
|
||||
)
|
||||
|
||||
# Store the data in s3
|
||||
filename = f"{USER_ID}/{PORTFOLIO_ID}/test_inputs.csv"
|
||||
save_csv_to_s3(
|
||||
dataframe=test_file,
|
||||
bucket_name="retrofit-plan-inputs-dev",
|
||||
file_name=filename
|
||||
)
|
||||
|
||||
body = {
|
||||
"portfolio_id": str(PORTFOLIO_ID),
|
||||
"housing_type": "Social",
|
||||
"goal": "Increase EPC",
|
||||
"goal_value": "C",
|
||||
"trigger_file_path": filename
|
||||
}
|
||||
print(body)
|
||||
194
etl/testing_data/estimate_epc.py
Normal file
194
etl/testing_data/estimate_epc.py
Normal file
|
|
@ -0,0 +1,194 @@
|
|||
from pathlib import Path
|
||||
from random import choices, sample
|
||||
|
||||
import os
|
||||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
from dotenv import load_dotenv
|
||||
from utils.logger import setup_logger
|
||||
from backend.SearchEpc import SearchEpc, vartypes
|
||||
from BaseUtility import Definitions
|
||||
from etl.epc.settings import BUILT_FORM_REMAP
|
||||
|
||||
ENV_FILE = Path(__file__).parent / "backend" / ".env"
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"
|
||||
DIR_SAMPLE_SIZE = 500
|
||||
N_DIRECTORIES = 50
|
||||
|
||||
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
||||
|
||||
load_dotenv(ENV_FILE)
|
||||
|
||||
CATETORICALS_TO_IGNORE = [
|
||||
"postcode", "constituency", "local-authority", "built-form", "property-type", "address1", "constituency-label",
|
||||
"building-reference-number", "address2", "posttown", "transaction-type", "lmk-key", "address3",
|
||||
"local-authority-label", "county",
|
||||
]
|
||||
|
||||
|
||||
def check_numeric_performance(estimated_value, actual_value):
|
||||
# If we don't have anything to compare against, return None
|
||||
if pd.isnull(actual_value):
|
||||
return None
|
||||
|
||||
if pd.isnull(estimated_value):
|
||||
return 1
|
||||
|
||||
if actual_value == 0 and estimated_value == 0:
|
||||
return 0
|
||||
|
||||
if actual_value == 0 and estimated_value != 0:
|
||||
return 1
|
||||
|
||||
return abs(estimated_value - actual_value) / actual_value
|
||||
|
||||
|
||||
def app():
|
||||
"""
|
||||
This script is used to test the EPC estimation process.
|
||||
"""
|
||||
|
||||
numerical_vartypes = {key: value for key, value in vartypes.items() if value in ["float", "Int64"]}
|
||||
str_var_types = {key: value for key, value in vartypes.items() if value == "str"}
|
||||
# Make sure we have missed any keys
|
||||
if len(numerical_vartypes) + len(str_var_types) != len(vartypes):
|
||||
raise ValueError("Not all vartypes have been accounted for")
|
||||
|
||||
# Drop some keys that aren't important
|
||||
for k in CATETORICALS_TO_IGNORE:
|
||||
str_var_types.pop(k, None)
|
||||
|
||||
directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
|
||||
|
||||
directory_sample = choices(directories, k=N_DIRECTORIES)
|
||||
|
||||
results = []
|
||||
|
||||
for directory in tqdm(directory_sample):
|
||||
filepath = directory / "certificates.csv"
|
||||
df = pd.read_csv(filepath, low_memory=False)
|
||||
df["UPRN"] = df["UPRN"].astype("Int64").astype("str")
|
||||
df = df[~pd.isnull(df["UPRN"])]
|
||||
|
||||
# uprn_sample = sample(df["UPRN"].unique().tolist(), DIR_SAMPLE_SIZE)
|
||||
# Take a fixed sample based on the first DIR_SAMPLE_SIZE uprns
|
||||
uprn_sample = sorted(df["UPRN"].unique().tolist())[:DIR_SAMPLE_SIZE]
|
||||
df_sample = df[df["UPRN"].isin(uprn_sample)]
|
||||
# Take the record with the newest LODGEMENT_DATETIME by uprn
|
||||
df_sample = df_sample.sort_values("LODGEMENT_DATETIME", ascending=False).drop_duplicates("UPRN")
|
||||
# Convert the columns to lower case and replace underscores with hyphens, the same as the api
|
||||
df_sample.columns = df_sample.columns.str.lower().str.replace("_", "-")
|
||||
|
||||
# For each epc, we test the estimation process
|
||||
for _, epc in df_sample.iterrows():
|
||||
epc = epc.to_dict()
|
||||
address1 = epc["address1"]
|
||||
postcode = epc["postcode"]
|
||||
|
||||
# Get all EPCs for this urpn and we make sure they get dropped from the estimate_epc function
|
||||
epcs_for_uprn = df[df["UPRN"] == epc["uprn"]]
|
||||
lmks_to_drop = epcs_for_uprn["LMK_KEY"].tolist()
|
||||
searcher = SearchEpc(address1, postcode, auth_token=EPC_AUTH_TOKEN, os_api_key="")
|
||||
searcher.uprn = epc["uprn"]
|
||||
|
||||
# Perform the same remapping for built-form as in the Property class for this test, in case we get (e.g.)
|
||||
# Enclosed End-Terrace
|
||||
built_form = BUILT_FORM_REMAP.get(epc["built-form"], epc["built-form"])
|
||||
if ((epc["property-type"] == "Maisonette") & (built_form == "Detached")) or (
|
||||
built_form in Definitions.DATA_ANOMALY_MATCHES
|
||||
):
|
||||
built_form = ""
|
||||
|
||||
estimated_epc = searcher.estimate_epc(
|
||||
property_type=epc["property-type"], built_form=built_form, lmks_to_drop=lmks_to_drop
|
||||
)
|
||||
|
||||
# We now compare the difference between the estimated and original
|
||||
# TODO: We can convert windows and lighting to numeric versions and estimate how close we are
|
||||
numeric_performance = {
|
||||
key: check_numeric_performance(estimated_epc[key], epc[key]) for key, value in
|
||||
numerical_vartypes.items()
|
||||
}
|
||||
|
||||
# Remove Nones
|
||||
numeric_performance = {key: value for key, value in numeric_performance.items() if value is not None}
|
||||
# Get an average
|
||||
numeric_performance = sum(numeric_performance.values()) / len(numeric_performance)
|
||||
numeric_success = 1 - numeric_performance
|
||||
|
||||
# categorical performance
|
||||
categorical_performance = {
|
||||
key: 0 if estimated_epc[key] != epc[key] else 1 for key, value in str_var_types.items()
|
||||
}
|
||||
# Get an average
|
||||
categorical_success = sum(categorical_performance.values()) / len(categorical_performance)
|
||||
|
||||
results.append(
|
||||
{
|
||||
"uprn": epc["uprn"],
|
||||
"numeric_success": numeric_success,
|
||||
"categorical_success": categorical_success,
|
||||
"property_type": epc["property-type"],
|
||||
"built_form": epc["built-form"],
|
||||
"tenure": epc["tenure"],
|
||||
}
|
||||
)
|
||||
|
||||
# Get aggregate performance figures
|
||||
results_df = pd.DataFrame(results)
|
||||
results_df["tenure"] = results_df["tenure"].replace("Rented (social)", "rental (social)")
|
||||
|
||||
avg_numeric_succes = results_df["numeric_success"].median()
|
||||
avg_categorical_sucess = results_df["categorical_success"].median()
|
||||
|
||||
# With 20 nearest homes
|
||||
# 0.7718100840549558
|
||||
# 0.5116279069767442
|
||||
# 100 nearest homes
|
||||
# 0.7859617377809409
|
||||
# 0.5348837209302325
|
||||
|
||||
# Fixed sample, sqrt weights
|
||||
|
||||
# Group by tenure
|
||||
by_tenure = results_df.groupby("tenure").agg(
|
||||
{"numeric_success": "median", "categorical_success": "median", "uprn": "count"}
|
||||
)
|
||||
pd.set_option('display.max_rows', 500)
|
||||
pd.set_option('display.max_columns', 500)
|
||||
pd.set_option('display.width', 1000)
|
||||
|
||||
# With 20 nearest homes
|
||||
# numeric_success categorical_success uprn
|
||||
# tenure
|
||||
# NO DATA! 0.847840 0.581395 278
|
||||
# Not defined - use in the case of a new dwelling... 0.930282 0.651163 617
|
||||
# Owner-occupied 0.770330 0.511628 2588
|
||||
# Rented (private) 0.791885 0.558140 1232
|
||||
# owner-occupied 0.741088 0.488372 10912
|
||||
# rental (private) 0.749064 0.488372 3252
|
||||
# rental (social) 0.822109 0.581395 3878
|
||||
# unknown 0.895840 0.627907 1820
|
||||
|
||||
# 100 nearest homes
|
||||
# tenure
|
||||
# NO DATA! 0.899566 0.604651 233
|
||||
# Not defined - use in the case of a new dwelling... 0.927518 0.674419 608
|
||||
# Owner-occupied 0.777026 0.511628 3167
|
||||
# Rented (private) 0.805646 0.534884 1316
|
||||
# owner-occupied 0.762180 0.488372 10835
|
||||
# rental (private) 0.760503 0.511628 3181
|
||||
# rental (social) 0.830057 0.604651 3705
|
||||
# unknown 0.899948 0.627907 1571
|
||||
|
||||
# By property type - we also want to see how many properties we have for each property type
|
||||
by_property_type = results_df.groupby("property_type").agg(
|
||||
{"numeric_success": "median", "categorical_success": "median", "uprn": "count"}
|
||||
)
|
||||
# By property_type & built form
|
||||
by_property_type_built_form = results_df.groupby(["property_type", "built_form"]).agg(
|
||||
{"numeric_success": "median", "categorical_success": "median", "uprn": "count"}
|
||||
)
|
||||
38
etl/testing_data/livewest_pilot.py
Normal file
38
etl/testing_data/livewest_pilot.py
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
"""
|
||||
This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
|
||||
testing
|
||||
"""
|
||||
import os
|
||||
|
||||
import pandas as pd
|
||||
from utils.s3 import save_csv_to_s3
|
||||
|
||||
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN", None)
|
||||
USER_ID = 8
|
||||
PORTFOLIO_ID = 61
|
||||
|
||||
|
||||
def app():
|
||||
pilot_file = pd.DataFrame(
|
||||
[
|
||||
{"address": "42, Foxes Field", "postcode": "TR18 3RJ", "Notes": None},
|
||||
{"address": "11, Cranley Gardens", "postcode": "TQ13 8UT", "Notes": None},
|
||||
]
|
||||
)
|
||||
|
||||
# Store the data in s3
|
||||
filename = f"{USER_ID}/{PORTFOLIO_ID}/livewest_pilot_file.csv"
|
||||
save_csv_to_s3(
|
||||
dataframe=pilot_file,
|
||||
bucket_name="retrofit-plan-inputs-dev",
|
||||
file_name=filename
|
||||
)
|
||||
|
||||
body = {
|
||||
"portfolio_id": str(PORTFOLIO_ID),
|
||||
"housing_type": "Social",
|
||||
"goal": "Increase EPC",
|
||||
"goal_value": "C",
|
||||
"trigger_file_path": filename
|
||||
}
|
||||
print(body)
|
||||
42
etl/testing_data/no_epc_input.py
Normal file
42
etl/testing_data/no_epc_input.py
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
"""
|
||||
This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
|
||||
testing
|
||||
"""
|
||||
import pandas as pd
|
||||
from utils.s3 import save_csv_to_s3
|
||||
|
||||
USER_ID = 8
|
||||
PORTFOLIO_ID = 57
|
||||
|
||||
|
||||
def app():
|
||||
"""
|
||||
This portfolio is for testing windows recommendations
|
||||
:return:
|
||||
"""
|
||||
|
||||
test_file = pd.DataFrame(
|
||||
[
|
||||
{"address": "21 Butler House", "postcode": "E2 0PN", "Notes": None},
|
||||
{"address": "22 Butler House", "postcode": "E2 0PN", "Notes": None},
|
||||
{"address": "23 Butler House", "postcode": "E2 0PN", "Notes": None},
|
||||
{"address": "24 Butler House", "postcode": "E2 0PN", "Notes": None},
|
||||
]
|
||||
)
|
||||
|
||||
# Store the data in s3
|
||||
filename = f"{USER_ID}/{PORTFOLIO_ID}/no_epc.csv"
|
||||
save_csv_to_s3(
|
||||
dataframe=test_file,
|
||||
bucket_name="retrofit-plan-inputs-dev",
|
||||
file_name=filename
|
||||
)
|
||||
|
||||
body = {
|
||||
"portfolio_id": str(PORTFOLIO_ID),
|
||||
"housing_type": "Social",
|
||||
"goal": "Increase EPC",
|
||||
"goal_value": "A",
|
||||
"trigger_file_path": filename
|
||||
}
|
||||
print(body)
|
||||
61
etl/testing_data/retrofitted_properties.py
Normal file
61
etl/testing_data/retrofitted_properties.py
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
"""
|
||||
This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
|
||||
testing
|
||||
"""
|
||||
import pandas as pd
|
||||
from utils.s3 import save_csv_to_s3
|
||||
|
||||
USER_ID = 8
|
||||
PORTFOLIO_ID = 62
|
||||
|
||||
|
||||
def app():
|
||||
"""
|
||||
This portfolio contains propertyies that we have demo'd in pilots, or properties that were provided to us
|
||||
as proprties that are being treated under funding scehemes and we have pre/post EPRs for
|
||||
:return:
|
||||
"""
|
||||
|
||||
test_file = pd.DataFrame(
|
||||
[
|
||||
# Live West Properties
|
||||
{"address": "42, Foxes Field", "postcode": "TR18 3RJ", "Notes": None},
|
||||
{"address": "11, Cranley Gardens", "postcode": "TQ13 8UT", "Notes": None},
|
||||
# Keyzy properties
|
||||
{'address': '2 South Terrace', 'postcode': 'NN1 5JY', 'Notes': ''},
|
||||
{'address': '25 Albert Street', 'postcode': 'PO12 4TY', 'Notes': ''},
|
||||
# Pilot properties
|
||||
{'address': '113 Tenby Road', 'postcode': 'B13 9LT', 'Notes': ''},
|
||||
{'address': '139 School Road', 'postcode': 'B28 8JF', 'Notes': ''},
|
||||
{'address': '77 Simmons Drive', 'postcode': 'B32 1SL', 'Notes': ''},
|
||||
{'address': 'Flat 2, 54 Wedgewood Road', 'postcode': 'B32 1LS', 'Notes': ''},
|
||||
# Warmfront ECO4 Properties
|
||||
{'address': '73 Long Chaulden', 'postcode': 'HP1 2HX', 'Notes': ''},
|
||||
{'address': '8 Lindlings', 'postcode': 'HP1 2HA', 'Notes': ''},
|
||||
{'address': '44 Lindlings', 'postcode': 'HP1 2HE', 'Notes': ''},
|
||||
{'address': '46 Chaulden Terrace', 'postcode': 'HP1 2AN', 'Notes': ''},
|
||||
# Osmosis SHDF Properties
|
||||
{'address': '4, Heather Shaw', 'postcode': 'BA14 7JS', 'Notes': ''},
|
||||
{'address': '16 Glastonbury Road', 'postcode': 'M32 9PE', 'Notes': ''},
|
||||
{'address': '31 Loddon Way', 'postcode': 'BA15 1HG', 'Notes': ''},
|
||||
{'address': '62 Pearmain Drive', 'postcode': 'NG3 3DJ', 'Notes': ''},
|
||||
]
|
||||
|
||||
)
|
||||
|
||||
# Store the data in s3
|
||||
filename = f"{USER_ID}/{PORTFOLIO_ID}/eco4_shdf_retrofits.csv"
|
||||
save_csv_to_s3(
|
||||
dataframe=test_file,
|
||||
bucket_name="retrofit-plan-inputs-dev",
|
||||
file_name=filename
|
||||
)
|
||||
|
||||
body = {
|
||||
"portfolio_id": str(PORTFOLIO_ID),
|
||||
"housing_type": "Social",
|
||||
"goal": "Increase EPC",
|
||||
"goal_value": "A",
|
||||
"trigger_file_path": filename
|
||||
}
|
||||
print(body)
|
||||
2170
etl/testing_data/sap_model_simulation.py
Normal file
2170
etl/testing_data/sap_model_simulation.py
Normal file
File diff suppressed because it is too large
Load diff
38
etl/testing_data/the_guiness_partnership_pilot.py
Normal file
38
etl/testing_data/the_guiness_partnership_pilot.py
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
"""
|
||||
This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
|
||||
testing
|
||||
"""
|
||||
import os
|
||||
|
||||
import pandas as pd
|
||||
from utils.s3 import save_csv_to_s3
|
||||
|
||||
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN", None)
|
||||
USER_ID = 8
|
||||
PORTFOLIO_ID = 59
|
||||
|
||||
|
||||
def app():
|
||||
pilot_file = pd.DataFrame(
|
||||
[
|
||||
{"address": "10 Elm Close", "postcode": "CV37 8XL", "Notes": None},
|
||||
{"address": "21, Spring Lane", "postcode": "MK17 0QP", "Notes": None},
|
||||
]
|
||||
)
|
||||
|
||||
# Store the data in s3
|
||||
filename = f"{USER_ID}/{PORTFOLIO_ID}/the_guiness_partnership_pilot_file.csv"
|
||||
save_csv_to_s3(
|
||||
dataframe=pilot_file,
|
||||
bucket_name="retrofit-plan-inputs-dev",
|
||||
file_name=filename
|
||||
)
|
||||
|
||||
body = {
|
||||
"portfolio_id": str(PORTFOLIO_ID),
|
||||
"housing_type": "Social",
|
||||
"goal": "Increase EPC",
|
||||
"goal_value": "C",
|
||||
"trigger_file_path": filename
|
||||
}
|
||||
print(body)
|
||||
43
etl/testing_data/windows_portfolio.py
Normal file
43
etl/testing_data/windows_portfolio.py
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
"""
|
||||
This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
|
||||
testing
|
||||
"""
|
||||
import pandas as pd
|
||||
from utils.s3 import save_csv_to_s3
|
||||
|
||||
USER_ID = 8
|
||||
PORTFOLIO_ID = 56
|
||||
|
||||
|
||||
def app():
|
||||
"""
|
||||
This portfolio is for testing windows recommendations
|
||||
:return:
|
||||
"""
|
||||
|
||||
test_file = pd.DataFrame(
|
||||
[
|
||||
{"address": "3 Church Terrace", "postcode": "LE13 0PW", "Notes": None},
|
||||
{"address": "3, Main Street, Redmile", "postcode": "NG13 0GA", "Notes": None},
|
||||
{"address": "Manor House, Kennel Lane, Reepham", "postcode": "LN3 4DZ", "Notes": None},
|
||||
{"address": "13 Main Street", "postcode": "LE14 2JU", "Notes": None},
|
||||
{"address": "8 The Crescent, Coston Road, Buckminster", "postcode": "NG33 5SF", "Notes": None},
|
||||
]
|
||||
)
|
||||
|
||||
# Store the data in s3
|
||||
filename = f"{USER_ID}/{PORTFOLIO_ID}/windows_portfolio_inputs.csv"
|
||||
save_csv_to_s3(
|
||||
dataframe=test_file,
|
||||
bucket_name="retrofit-plan-inputs-dev",
|
||||
file_name=filename
|
||||
)
|
||||
|
||||
body = {
|
||||
"portfolio_id": str(PORTFOLIO_ID),
|
||||
"housing_type": "Social",
|
||||
"goal": "Increase EPC",
|
||||
"goal_value": "A",
|
||||
"trigger_file_path": filename
|
||||
}
|
||||
print(body)
|
||||
|
|
@ -181,4 +181,16 @@ module "lambda_carbon_prediction_ecr" {
|
|||
module "lambda_heat_prediction_ecr" {
|
||||
ecr_name = "lambda-heat-prediction-${var.stage}"
|
||||
source = "./modules/ecr"
|
||||
}
|
||||
|
||||
##############################################
|
||||
# CDN - Cloudfront
|
||||
##############################################
|
||||
module "cloudfront_distribution" {
|
||||
source = "./modules/cloudfront"
|
||||
bucket_name = module.s3.bucket_name
|
||||
bucket_id = module.s3.bucket_id
|
||||
bucket_arn = module.s3.bucket_arn
|
||||
bucket_domain_name = module.s3.bucket_domain_name
|
||||
stage = var.stage
|
||||
}
|
||||
65
infrastructure/terraform/modules/cloudfront/main.tf
Normal file
65
infrastructure/terraform/modules/cloudfront/main.tf
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
resource "aws_cloudfront_distribution" "s3_distribution" {
|
||||
origin {
|
||||
domain_name = var.bucket_domain_name
|
||||
origin_id = "S3-${var.bucket_name}"
|
||||
|
||||
s3_origin_config {
|
||||
origin_access_identity = aws_cloudfront_origin_access_identity.oai.cloudfront_access_identity_path
|
||||
}
|
||||
}
|
||||
|
||||
enabled = true
|
||||
|
||||
default_cache_behavior {
|
||||
allowed_methods = ["GET", "HEAD"]
|
||||
cached_methods = ["GET", "HEAD"]
|
||||
target_origin_id = "S3-${var.bucket_name}"
|
||||
viewer_protocol_policy = "redirect-to-https"
|
||||
compress = true
|
||||
|
||||
forwarded_values {
|
||||
query_string = false
|
||||
cookies {
|
||||
forward = "none"
|
||||
}
|
||||
}
|
||||
|
||||
min_ttl = 0
|
||||
default_ttl = 86400
|
||||
max_ttl = 31536000
|
||||
}
|
||||
|
||||
price_class = "PriceClass_All"
|
||||
|
||||
restrictions {
|
||||
geo_restriction {
|
||||
restriction_type = "none"
|
||||
}
|
||||
}
|
||||
|
||||
viewer_certificate {
|
||||
cloudfront_default_certificate = true
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_cloudfront_origin_access_identity" "oai" {
|
||||
comment = "OAI for ${var.bucket_name}"
|
||||
}
|
||||
|
||||
resource "aws_s3_bucket_policy" "bucket_policy" {
|
||||
bucket = var.bucket_id
|
||||
|
||||
policy = jsonencode({
|
||||
Version = "2012-10-17"
|
||||
Statement = [
|
||||
{
|
||||
Effect = "Allow"
|
||||
Principal = {
|
||||
AWS = "arn:aws:iam::cloudfront:user/CloudFront Origin Access Identity ${aws_cloudfront_origin_access_identity.oai.id}"
|
||||
}
|
||||
Action = "s3:GetObject"
|
||||
Resource = "${var.bucket_arn}/*"
|
||||
},
|
||||
]
|
||||
})
|
||||
}
|
||||
24
infrastructure/terraform/modules/cloudfront/variables.tf
Normal file
24
infrastructure/terraform/modules/cloudfront/variables.tf
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
variable "bucket_name" {
|
||||
description = "The name of the bucket"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "stage" {
|
||||
description = "The deployment stage"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "bucket_id" {
|
||||
description = "The ID of the S3 bucket"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "bucket_arn" {
|
||||
description = "The ARN of the S3 bucket"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "bucket_domain_name" {
|
||||
description = "The regional domain name of the S3 bucket"
|
||||
type = string
|
||||
}
|
||||
|
|
@ -2,3 +2,15 @@ output "bucket_name" {
|
|||
description = "The name of the S3 bucket"
|
||||
value = aws_s3_bucket.bucket.bucket
|
||||
}
|
||||
|
||||
output "bucket_id" {
|
||||
value = aws_s3_bucket.bucket.id
|
||||
}
|
||||
|
||||
output "bucket_arn" {
|
||||
value = aws_s3_bucket.bucket.arn
|
||||
}
|
||||
|
||||
output "bucket_domain_name" {
|
||||
value = aws_s3_bucket.bucket.bucket_regional_domain_name
|
||||
}
|
||||
3
keyzy_pilot.csv
Normal file
3
keyzy_pilot.csv
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
address,postcode,Notes,,,,
|
||||
2 South Terrace,NN1 5JY,,,,,
|
||||
25 Albert Street,PO12 4TY,,,,,
|
||||
|
|
|
@ -1,27 +1,96 @@
|
|||
import numpy as np
|
||||
from recommendations.county_to_region import county_to_region_map
|
||||
|
||||
# This data comes from SPONs
|
||||
# This data comes from SPONs 2023
|
||||
regional_labour_variations = [
|
||||
{"Region": "Outer London (Spon’s 2023)", "Adjustment_Factor": 1.00},
|
||||
{"Region": "Outer London", "Adjustment_Factor": 1.00},
|
||||
{"Region": "Inner London", "Adjustment_Factor": 1.05},
|
||||
{"Region": "South East", "Adjustment_Factor": 0.96},
|
||||
{"Region": "South West", "Adjustment_Factor": 0.90},
|
||||
{"Region": "South East England", "Adjustment_Factor": 0.96},
|
||||
{"Region": "South West England", "Adjustment_Factor": 0.90},
|
||||
{"Region": "East of England", "Adjustment_Factor": 0.93},
|
||||
{"Region": "East Midlands", "Adjustment_Factor": 0.88},
|
||||
{"Region": "West Midlands", "Adjustment_Factor": 0.87},
|
||||
{"Region": "North East", "Adjustment_Factor": 0.83},
|
||||
{"Region": "North West", "Adjustment_Factor": 0.88},
|
||||
{"Region": "Yorkshire and Humberside", "Adjustment_Factor": 0.86},
|
||||
{"Region": "North East England", "Adjustment_Factor": 0.83},
|
||||
{"Region": "North West England", "Adjustment_Factor": 0.88},
|
||||
{"Region": "Yorkshire and the Humber", "Adjustment_Factor": 0.86},
|
||||
{"Region": "Wales", "Adjustment_Factor": 0.88},
|
||||
{"Region": "Scotland", "Adjustment_Factor": 0.88},
|
||||
{"Region": "Northern Ireland", "Adjustment_Factor": 0.76}
|
||||
]
|
||||
|
||||
county_map = {
|
||||
"Northamptonshire": "East Midlands",
|
||||
"Hampshire": "South East",
|
||||
# This data is based on the MCS database
|
||||
MCS_SOLAR_PV_COST_DATA = {
|
||||
"last_updated": "2024-01-04",
|
||||
"average_cost_per_kwh": 2013.94,
|
||||
"average_cost_per_kwh-Outer London": 2618.75,
|
||||
"average_cost_per_kwh-Inner London": 2618.75,
|
||||
"average_cost_per_kwh-South East England": 2083.33,
|
||||
"average_cost_per_kwh-South West England": 2113,
|
||||
"average_cost_per_kwh-East of England": 1973.86,
|
||||
"average_cost_per_kwh-East Midlands": 1981.86,
|
||||
"average_cost_per_kwh-West Midlands": 1926.55,
|
||||
"average_cost_per_kwh-North East England": 2028.49,
|
||||
"average_cost_per_kwh-North West England": 1620.42,
|
||||
"average_cost_per_kwh-Yorkshire and the Humber": 2060.9,
|
||||
"average_cost_per_kwh-Wales": 1898.83,
|
||||
"average_cost_per_kwh-Scotland": 1967.97,
|
||||
"average_cost_per_kwh-Northern Ireland": 2126.09,
|
||||
}
|
||||
|
||||
# This is based on quotes from installers
|
||||
BATTERY_COST = 3500
|
||||
|
||||
# This is based on https://www.checkatrade.com/blog/cost-guides/cost-smart-thermostat/
|
||||
SMART_APPLIANCE_THERMOSTAT_COST = 400
|
||||
PROGRAMMER_COST = 120
|
||||
ROOM_THERMOSTAT_COST = 150
|
||||
TRVS_COST = 35
|
||||
|
||||
# Cost for TTZC
|
||||
# Smart thermostat based on checkatrade https://www.checkatrade.com/blog/cost-guides/cost-smart-thermostat/
|
||||
# Based on the Nest system
|
||||
TTZC_SMART_THERMOSTAT_COST = 205
|
||||
TTZC_SMART_THERMOSTAT_LABOUR_HOURS = 2
|
||||
TTZC_ELECTRICIAN_HOURLY_RATE = 45
|
||||
# Based on cost of a Nest temperature sensor
|
||||
TTZC_ROOM_TEMPERATURE_SENSOR_COST = 50
|
||||
TTZC_ROOM_TEMPERATURE_SENSOR_LABOUR_HOURS = 0.17 # (Assume ~ 10 mins install per sensor)
|
||||
# Basedon an average cost of smart radiator values
|
||||
TTZC_SMART_RADIATOR_VALUES = 50
|
||||
TTZC_SMART_RADIATOR_VALUES_LABOUR_HOURS = 0.37 # (Assume ~ 15-30 mins install per valve)
|
||||
|
||||
# Low carbon combi boiler - median value based on £2200 - £3000 range
|
||||
LOW_CARBON_COMBI_BOILER = 2200
|
||||
|
||||
# boiler prices based on
|
||||
# https://www.greenmatch.co.uk/boilers/30kw-boiler
|
||||
# https://www.greenmatch.co.uk/boilers/35kw-boiler
|
||||
# https://www.greenmatch.co.uk/boilers/40kw-boiler
|
||||
# These are exclusive of installation costs
|
||||
COMBI_BOILER_COSTS = {
|
||||
"30kw": 1550,
|
||||
"35kw": 1610,
|
||||
"40kw": 1625
|
||||
}
|
||||
|
||||
CONVENTIONAL_BOILER_COSTS = {
|
||||
"30kw": 1117,
|
||||
"35kw": 1546,
|
||||
"40kw": 1776
|
||||
}
|
||||
|
||||
# Assumes 3 hours to remove each heater (including re-decorating)
|
||||
ROOM_HEATER_REMOVAL_COST = 120
|
||||
ROOM_HEATER_REMOVAL_LABOUR_HOURS = 3
|
||||
|
||||
# This is a cost quoted by Jim for a system flush - existig system will run more efficiently
|
||||
SYSTEM_FLUSH_COST = 250
|
||||
|
||||
SINGLE_RADIATOR_COST = 150
|
||||
DOUBLE_RADIATOR_COST = 300
|
||||
FLUE_COST = 600
|
||||
PIPEWORK_COST = 750 # Min cost is £500
|
||||
|
||||
|
||||
class Costs:
|
||||
"""
|
||||
|
|
@ -40,8 +109,16 @@ class Costs:
|
|||
# We assume a conservative 10% contingency for all works which is a rate defined by SPONs
|
||||
CONTINGENCY = 0.1
|
||||
|
||||
# For flat roof, we assume it's a high risk project as it's very weather dependent and also is heavily
|
||||
# dependent on the quality of the existing roof
|
||||
FLAT_ROOF_CONTINGENCY = 0.15
|
||||
|
||||
# We use a higher contingency rate for internal wall insulation because of the potential for issues with moving
|
||||
# fittings and trimming doors, as well as scope for damage to the existing wall during preparation.
|
||||
IWI_CONTINGENCY = 0.2
|
||||
|
||||
# Where there is more uncertainty, a higher contingency rate is used
|
||||
HIGH_RISK_CONTINGENCY = 0.15
|
||||
HIGH_RISK_CONTINGENCY = 0.2
|
||||
# When there is less uncertainty, a lower contingency rate is used
|
||||
LOW_RISK_CONTINGENCY = 0.05
|
||||
|
||||
|
|
@ -54,11 +131,21 @@ class Costs:
|
|||
# have a preliminaries of 12-14% so we use 12% as the median for the preliminaries rate.
|
||||
# For External wall insulation (EWI), we use 15% as the preliminaries rate if we think the property might
|
||||
# need scaffolding, otherwise we use 12%. This is to account for any site preparation that might be required
|
||||
EWI_NO_SCAFFOLDING_PRELIMINARIES = 0.12
|
||||
EWI_SCAFFOLDING_PRELIMINARIES = 0.15
|
||||
EWI_NO_SCAFFOLDING_PRELIMINARIES = 0.2
|
||||
EWI_SCAFFOLDING_PRELIMINARIES = 0.25
|
||||
|
||||
VAT_RATE = 0.2
|
||||
PROFIT_MARGIN = 0.15
|
||||
PROFIT_MARGIN = 0.2
|
||||
|
||||
# Based on this greenmatch article, on average, a Sash window is around 50% more expensive than a casement window.
|
||||
# Therefore, for a conservative cost estimate, and allowance for a more premium window type, we inflate the material
|
||||
# cost of the windows to allow for a sash window type
|
||||
# https://www.greenmatch.co.uk/windows/double-glazing/cost
|
||||
SASH_WINDOW_INFLATION_FACTOR = 1.5
|
||||
|
||||
# Typically, secondary glazing can be installed for 25% of the cost of double glazed windows - to be conservative,
|
||||
# we scale the cost by half
|
||||
SECONDARY_GLAZING_SCALING_FACTOR = 0.5
|
||||
|
||||
def __init__(self, property_instance):
|
||||
"""
|
||||
|
|
@ -71,13 +158,16 @@ class Costs:
|
|||
self.property = property_instance
|
||||
self.regional_labour_variations = regional_labour_variations
|
||||
|
||||
self.county = county_map.get(self.property.data["county"], None)
|
||||
if self.county is None:
|
||||
raise ValueError("County not found in county map")
|
||||
self.region = county_to_region_map.get(self.property.data["county"], None)
|
||||
if self.region is None:
|
||||
# Try and grab using the local-authority-label
|
||||
self.region = county_to_region_map.get(self.property.data["local-authority-label"], None)
|
||||
if self.region is None:
|
||||
raise ValueError("Region not found in county map")
|
||||
|
||||
self.labour_adjustment_factor = [
|
||||
x["Adjustment_Factor"] for x in self.regional_labour_variations if
|
||||
x["Region"] == self.county
|
||||
x["Region"] == self.region
|
||||
][0]
|
||||
|
||||
if not self.labour_adjustment_factor:
|
||||
|
|
@ -115,6 +205,9 @@ class Costs:
|
|||
|
||||
labour_hours = material["labour_hours_per_unit"] * wall_area
|
||||
|
||||
# Assume a team of 2
|
||||
labour_days = (labour_hours / 8) / 2
|
||||
|
||||
return {
|
||||
"total": total_cost,
|
||||
"subtotal": subtotal_before_vat,
|
||||
|
|
@ -124,7 +217,8 @@ class Costs:
|
|||
"material": base_material_cost,
|
||||
"profit": profit_cost,
|
||||
"labour_hours": labour_hours,
|
||||
"labour_cost": labour_cost
|
||||
"labour_cost": labour_cost,
|
||||
"labour_days": labour_days
|
||||
}
|
||||
|
||||
def loft_insulation(self, floor_area, material):
|
||||
|
|
@ -136,12 +230,16 @@ class Costs:
|
|||
"""
|
||||
material_cost_per_m2 = material["material_cost"]
|
||||
|
||||
# We inflate material costs due to recent price increases
|
||||
material_cost_per_m2 = material_cost_per_m2 * 1.5
|
||||
|
||||
base_material_cost = material_cost_per_m2 * floor_area
|
||||
labour_cost = material["labour_cost"] * floor_area * self.labour_adjustment_factor
|
||||
|
||||
subtotal_before_profit = base_material_cost + labour_cost
|
||||
|
||||
contingency_cost = subtotal_before_profit * self.CONTINGENCY
|
||||
# We use high risk contingency because of the possibility of access issues and clearing existing insulation
|
||||
contingency_cost = subtotal_before_profit * self.HIGH_RISK_CONTINGENCY
|
||||
preliminaries_cost = subtotal_before_profit * self.PRELIMINARIES
|
||||
profit_cost = subtotal_before_profit * self.PROFIT_MARGIN
|
||||
|
||||
|
|
@ -153,6 +251,9 @@ class Costs:
|
|||
|
||||
labour_hours = material["labour_hours_per_unit"] * floor_area
|
||||
|
||||
# Assume a team of 1 person
|
||||
labour_days = labour_hours / 8
|
||||
|
||||
return {
|
||||
"total": total_cost,
|
||||
"subtotal": subtotal_before_vat,
|
||||
|
|
@ -162,7 +263,8 @@ class Costs:
|
|||
"material": base_material_cost,
|
||||
"profit": profit_cost,
|
||||
"labour_hours": labour_hours,
|
||||
"labour_cost": labour_cost
|
||||
"labour_cost": labour_cost,
|
||||
"labour_days": labour_days
|
||||
}
|
||||
|
||||
def internal_wall_insulation(self, wall_area, material, non_insulation_materials):
|
||||
|
|
@ -224,8 +326,7 @@ class Costs:
|
|||
|
||||
subtotal_before_profit = labour_costs + materials_costs + demolition_plant_costs
|
||||
|
||||
# We use high risk contingency for iwi
|
||||
contingency_cost = subtotal_before_profit * self.HIGH_RISK_CONTINGENCY
|
||||
contingency_cost = subtotal_before_profit * self.IWI_CONTINGENCY
|
||||
preliminaries_cost = subtotal_before_profit * self.PRELIMINARIES
|
||||
profit_cost = subtotal_before_profit * self.PROFIT_MARGIN
|
||||
|
||||
|
|
@ -301,7 +402,9 @@ class Costs:
|
|||
|
||||
subtotal_before_profit = labour_costs + materials_costs
|
||||
|
||||
contingency_cost = subtotal_before_profit * self.CONTINGENCY
|
||||
# Because of the possiblity of damage to the existing floor, or difficulties associated to moving fittings,
|
||||
# we use a higher contingency rate
|
||||
contingency_cost = subtotal_before_profit * self.HIGH_RISK_CONTINGENCY
|
||||
preliminaries_cost = subtotal_before_profit * self.PRELIMINARIES
|
||||
profit_cost = subtotal_before_profit * self.PROFIT_MARGIN
|
||||
|
||||
|
|
@ -569,3 +672,566 @@ class Costs:
|
|||
"labour_days": labour_days,
|
||||
"labour_cost": labour_costs
|
||||
}
|
||||
|
||||
def low_energy_lighting(self, number_of_lights, number_current_lel_lights, material):
|
||||
|
||||
"""
|
||||
Calculates the total cost for low energy lighting based on material and labor costs,
|
||||
including contingency, preliminaries, profit, and VAT.
|
||||
|
||||
:param number_of_lights: Int, number of light
|
||||
:param number_current_lel_lights: Int, number of low energy lights currently installed in the home
|
||||
:material: Dict, material data containing costs of fittings
|
||||
"""
|
||||
|
||||
# If there are no lights fitted in the property, we increase the contingency in case there are potential wiring
|
||||
# blockers
|
||||
if number_current_lel_lights == 0:
|
||||
contingency = self.HIGH_RISK_CONTINGENCY
|
||||
else:
|
||||
contingency = self.CONTINGENCY
|
||||
|
||||
material_cost = material["material_cost"] * number_of_lights
|
||||
labour_cost = material["labour_cost"] * number_of_lights * self.labour_adjustment_factor
|
||||
|
||||
subtotal_before_profit = material_cost + labour_cost
|
||||
|
||||
contingency_cost = subtotal_before_profit * contingency
|
||||
preliminaries_cost = subtotal_before_profit * self.PRELIMINARIES
|
||||
profit_cost = subtotal_before_profit * self.PROFIT_MARGIN
|
||||
|
||||
subtotal_before_vat = subtotal_before_profit + contingency_cost + preliminaries_cost + profit_cost
|
||||
vat_cost = subtotal_before_vat * self.VAT_RATE
|
||||
total_cost = subtotal_before_vat + vat_cost
|
||||
|
||||
labour_hours = material["labour_hours_per_unit"] * number_of_lights
|
||||
# Assume a single electrician installing
|
||||
labour_days = (labour_hours / 8)
|
||||
|
||||
return {
|
||||
"total": total_cost,
|
||||
"subtotal": subtotal_before_vat,
|
||||
"vat": vat_cost,
|
||||
"contingency": contingency_cost,
|
||||
"preliminaries": preliminaries_cost,
|
||||
"material": material_cost,
|
||||
"profit": profit_cost,
|
||||
"labour_hours": labour_hours,
|
||||
"labour_days": labour_days,
|
||||
"labour_cost": labour_cost
|
||||
}
|
||||
|
||||
def flat_roof_insulation(self, floor_area, material, non_insulation_materials):
|
||||
"""
|
||||
A model of a warm, flat roof construction can be seen in this video:
|
||||
https://www.youtube.com/watch?v=WZ6Ng6YI9OA
|
||||
Warm, flat roof insulation will normally be 100-125mm in depth
|
||||
|
||||
We break this measure down into the following jobs to be done
|
||||
1) Preparation of the room. This involves cleaning the existing roof surface, removing any debris and repairing
|
||||
any damage. Additionally, an edge barrier will likely need to be installed, to protect the sides of the
|
||||
roof from water ingress.
|
||||
2) Primer Application. A layer of primer is applied to the clean roof surface to enhance the adhestia of
|
||||
subsequent layers, and seal the existing roof surface.
|
||||
3) Vapour Proof Layer Installation. Lay a vapour control layer to prevent moisture ingress from inside the
|
||||
building, which is essential in warm roof construction.
|
||||
4) Insulation Layer Application. Place and securely fix insulation boards over the roof. These could be rigid
|
||||
boards like PIR (Polyisocyanurate).
|
||||
5) Waterproofing Membrane Installation: Cover the insulation (and timber layer, if used) with a
|
||||
waterproofing membrane, like EPDM, PVC, or bituminous felt. Carefully seal all joints, edges, and around any
|
||||
roof penetrations to ensure water tightness
|
||||
|
||||
:param floor_area: Area of the flat roof to be insulated, based on the area of the floor
|
||||
:param material: Selected insulation material
|
||||
:param non_insulation_materials: Non-insulation materials required for the job
|
||||
:return:
|
||||
"""
|
||||
|
||||
preparation_data_m2 = [
|
||||
x for x in non_insulation_materials if
|
||||
(x["type"] == "flat_roof_preparation") and (x["cost_unit"] == "gbp_per_m2")
|
||||
]
|
||||
vapour_barrier_data = [x for x in non_insulation_materials if x["type"] == "flat_roof_vapour_barrier"]
|
||||
waterproofing_data = [x for x in non_insulation_materials if x["type"] == "flat_roof_waterproofing"]
|
||||
|
||||
if (len(preparation_data_m2) != 2) or (len(vapour_barrier_data) != 1) or (
|
||||
len(waterproofing_data) != 1):
|
||||
raise ValueError("Incorrect number of data entries for non-insulation materials")
|
||||
|
||||
# Break out the individual material costs
|
||||
preparation_m2_material_costs = sum([x["material_cost"] * floor_area for x in preparation_data_m2])
|
||||
vapour_barrier_material_costs = vapour_barrier_data[0]["material_cost"] * floor_area
|
||||
insulation_material_costs = material["material_cost"] * floor_area
|
||||
|
||||
preparation_m2_labour_costs = sum([x["labour_cost"] * floor_area for x in preparation_data_m2])
|
||||
vapour_barrier_labour_costs = vapour_barrier_data[0]["labour_cost"] * floor_area
|
||||
|
||||
# For waterproofing and upstand, we only have a total cost
|
||||
waterproofing_total_costs = waterproofing_data[0]["total_cost"] * floor_area
|
||||
|
||||
labour_costs = preparation_m2_labour_costs + vapour_barrier_labour_costs
|
||||
labour_costs = labour_costs * self.labour_adjustment_factor
|
||||
|
||||
materials_costs = preparation_m2_material_costs + vapour_barrier_material_costs + insulation_material_costs
|
||||
|
||||
subtotal_before_profit = labour_costs + materials_costs + waterproofing_total_costs
|
||||
|
||||
contingency_cost = subtotal_before_profit * self.FLAT_ROOF_CONTINGENCY
|
||||
preliminaries_cost = subtotal_before_profit * self.PRELIMINARIES
|
||||
profit_cost = subtotal_before_profit * self.PROFIT_MARGIN
|
||||
|
||||
subtotal_before_vat = subtotal_before_profit + contingency_cost + preliminaries_cost + profit_cost
|
||||
vat_cost = subtotal_before_vat * self.VAT_RATE
|
||||
total_cost = subtotal_before_vat + vat_cost
|
||||
|
||||
preparation_m2_labour_hours = sum([x["labour_hours_per_unit"] * floor_area for x in preparation_data_m2])
|
||||
vapour_barrier_labour_hours = vapour_barrier_data[0]["labour_hours_per_unit"] * floor_area
|
||||
waterproofing_labour_hours = waterproofing_data[0]["labour_hours_per_unit"] * floor_area
|
||||
|
||||
labour_hours = preparation_m2_labour_hours + vapour_barrier_labour_hours + waterproofing_labour_hours
|
||||
|
||||
# To install flat roof insulation, assume a small/medium project might be conducted by a team of 2-4.
|
||||
# We'll assume a team of 2 since a lot of the roofs will be on the smaller side and will review this later
|
||||
labour_days = (labour_hours / 8) / 2
|
||||
|
||||
return {
|
||||
"total": total_cost,
|
||||
"subtotal": subtotal_before_vat,
|
||||
"vat": vat_cost,
|
||||
"contingency": contingency_cost,
|
||||
"preliminaries": preliminaries_cost,
|
||||
"material": materials_costs,
|
||||
"profit": profit_cost,
|
||||
"labour_hours": labour_hours,
|
||||
"labour_days": labour_days,
|
||||
"labour_cost": labour_costs
|
||||
}
|
||||
|
||||
def window_glazing(self, number_of_windows, material, is_secondary_glazing=False):
|
||||
"""
|
||||
We characterise the jobs to be done for window glazing as the following:
|
||||
1) Initial Assessment and Measurements: Before removing the existing window, it's essential to assess the
|
||||
condition of the window frame and opening. Precise measurements are taken to ensure the new double glazed
|
||||
windows fit perfectly.
|
||||
|
||||
2) Remove the Existing Window: This involves carefully dismantling and removing the old single glazed window. It
|
||||
requires skill to avoid damaging the surrounding wall and the window frame (if it's to be reused).
|
||||
|
||||
3) Dispose of the Existing Window: The old window, especially if it's a single glazed unit, needs to be
|
||||
disposed of responsibly. Glass and other materials should be recycled where possible.
|
||||
|
||||
4) Surface Preparation: The window opening might need some preparation, especially if there's damage or if
|
||||
adjustments are needed to accommodate the new window. This can include repairing or replacing parts of the
|
||||
window frame, sealing gaps, and ensuring the opening is level and square.
|
||||
|
||||
5) Install the Window Frame (if new frames are used): In many cases, double glazed windows come with their
|
||||
frames. These need to be installed securely into the window opening. This process involves aligning, leveling,
|
||||
and fixing the frame in place.
|
||||
|
||||
6) Install the Window Sill: If a new window sill is required, it is installed at this stage. It needs to be
|
||||
correctly aligned with the frame and securely attached.
|
||||
|
||||
7) Install the Double Glazed Glass Units: The glass units are carefully inserted into the frame. This step
|
||||
requires precision to ensure a snug fit without causing stress on the glass, which could lead to cracking or
|
||||
breaking.
|
||||
|
||||
8) Sealing and Weatherproofing: After the glass units are in place, it's crucial to seal around the frame and
|
||||
between the glass and frame to ensure there are no drafts and that the installation is weather-tight. This
|
||||
typically involves applying silicone sealant or other appropriate sealing materials.
|
||||
|
||||
9) Finishing Touches: This includes any cosmetic work, such as trimming, painting, or staining the frame and
|
||||
sill to match the rest of the property. It might also involve cleaning up any mess created during the
|
||||
installation.
|
||||
|
||||
10) Inspection and Testing: Finally, the new windows should be inspected to ensure they open, close, and lock
|
||||
correctly. This is also a good time to check for any gaps or issues with the sealing.
|
||||
|
||||
For this cost estimation process, we factor in initial assement into the preliminaries
|
||||
|
||||
"""
|
||||
|
||||
material_cost = material["material_cost"] * number_of_windows
|
||||
|
||||
labour_cost = (
|
||||
material["labour_cost"] * number_of_windows * self.labour_adjustment_factor
|
||||
)
|
||||
multiplier = self.SECONDARY_GLAZING_SCALING_FACTOR if is_secondary_glazing else (
|
||||
self.SASH_WINDOW_INFLATION_FACTOR)
|
||||
|
||||
subtotal = (material_cost + labour_cost) * multiplier
|
||||
|
||||
contingency_cost = subtotal * self.CONTINGENCY
|
||||
preliminaries_cost = subtotal * self.PRELIMINARIES
|
||||
profit_cost = subtotal * self.PROFIT_MARGIN
|
||||
|
||||
subtotal_before_vat = subtotal + contingency_cost + preliminaries_cost + profit_cost
|
||||
|
||||
vat_cost = subtotal_before_vat * self.VAT_RATE
|
||||
|
||||
total_cost = subtotal_before_vat + vat_cost
|
||||
|
||||
labour_hours = material["labour_hours_per_unit"] * number_of_windows
|
||||
labour_hours = labour_hours * self.SECONDARY_GLAZING_SCALING_FACTOR if is_secondary_glazing else labour_hours
|
||||
|
||||
# Assume a team of 2
|
||||
labour_days = (labour_hours / 8) / 2
|
||||
|
||||
return {
|
||||
"total": total_cost,
|
||||
"subtotal": subtotal_before_vat,
|
||||
"vat": vat_cost,
|
||||
"contingency": contingency_cost,
|
||||
"preliminaries": preliminaries_cost,
|
||||
"material": material_cost,
|
||||
"profit": profit_cost,
|
||||
"labour_hours": labour_hours,
|
||||
"labour_cost": labour_cost,
|
||||
"labour_days": labour_days
|
||||
}
|
||||
|
||||
def solar_pv(self, wattage: float, has_battery: bool = False):
|
||||
|
||||
"""
|
||||
Calculates the total cost for solar PV based data provided by the MCS dashboard, which contains
|
||||
costing data for installations of renewable and clean energy measures.
|
||||
|
||||
The data in the dashboard is filtered on domestic building installations and then the data across the
|
||||
various regions is manually collected. There is currently no automated way to get the data from the MCS
|
||||
dashboard
|
||||
|
||||
Price can also be benchmarked against this checkatrade article:
|
||||
https://www.checkatrade.com/blog/cost-guides/cost-of-solar-panel-installation/
|
||||
:param wattage: Peak wattage of the solar PV system]
|
||||
:param has_battery: Bool, whether the system includes a battery
|
||||
"""
|
||||
|
||||
# Get the cost data relevant to the region
|
||||
regional_cost = MCS_SOLAR_PV_COST_DATA["-".join(["average_cost_per_kwh", self.region])]
|
||||
|
||||
kw = wattage / 1000
|
||||
total_cost = kw * regional_cost
|
||||
|
||||
if has_battery:
|
||||
# The battery cost is based on the £3500 quote, recieved from installers
|
||||
total_cost += BATTERY_COST
|
||||
|
||||
subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
|
||||
|
||||
vat = total_cost - subtotal_before_vat
|
||||
|
||||
# Labour hours are based on estimates from online research but an average team seems to consist of 3 people
|
||||
# and most jobs take around 2 days. Assuming an 8 hour day for 3 people across 2 days, gives us 72 hours of
|
||||
# labour
|
||||
return {
|
||||
"total": total_cost,
|
||||
"subtotal": subtotal_before_vat,
|
||||
"vat": vat,
|
||||
"labour_hours": 72,
|
||||
"labour_days": 2,
|
||||
}
|
||||
|
||||
def programmer_and_appliance_thermostat(self, has_programmer):
|
||||
"""
|
||||
Calculate the total cost of installing a programmer and appliance thermostat
|
||||
If the property already has a programmer, then the only thing we need to calculate the cost for is the
|
||||
appliance thermostat
|
||||
"""
|
||||
|
||||
if has_programmer:
|
||||
labour_hours = 2
|
||||
total_cost = SMART_APPLIANCE_THERMOSTAT_COST
|
||||
else:
|
||||
labour_hours = 4
|
||||
total_cost = SMART_APPLIANCE_THERMOSTAT_COST + PROGRAMMER_COST
|
||||
|
||||
subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
|
||||
vat = total_cost - subtotal_before_vat
|
||||
|
||||
# We estimate the cost of an appliance thermostat at £400, which is the upper end of the range
|
||||
return {
|
||||
"total": total_cost,
|
||||
"subtotal": subtotal_before_vat,
|
||||
"vat": vat,
|
||||
"labour_hours": labour_hours,
|
||||
"labour_days": 1,
|
||||
}
|
||||
|
||||
def electric_room_heaters(self, number_heated_rooms):
|
||||
"""
|
||||
We base the estimates for the cost of electric room heaters on the cost per room as estimated by the
|
||||
following article:
|
||||
https://www.bestelectricradiators.co.uk/blog/cost-to-install-a-new-heating-system-uk/
|
||||
|
||||
:param number_heated_rooms: int, number of rooms to be heated
|
||||
:return:
|
||||
"""
|
||||
|
||||
total_cost = 500 * number_heated_rooms
|
||||
subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
|
||||
vat = total_cost - subtotal_before_vat
|
||||
|
||||
# TODO: Rough estimate to be reviewed
|
||||
labour_hours = 1 * number_heated_rooms
|
||||
labour_days = np.ceil(labour_hours / 8)
|
||||
|
||||
return {
|
||||
"total": total_cost,
|
||||
"subtotal": subtotal_before_vat,
|
||||
"vat": vat,
|
||||
"labour_hours": labour_hours,
|
||||
"labour_days": labour_days,
|
||||
}
|
||||
|
||||
def high_heat_electric_storage_heaters(self, number_heated_rooms):
|
||||
|
||||
"""
|
||||
We base the estimates for the cost of electric storage heaters on the cost per room as estimated by the
|
||||
energy saving trust
|
||||
https://energysavingtrust.org.uk/advice/electric-heating/
|
||||
|
||||
The cost is based on the number of heated rooms
|
||||
:param number_heated_rooms: int, number of rooms to be heated
|
||||
"""
|
||||
|
||||
total_cost = 1500 * number_heated_rooms
|
||||
subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
|
||||
vat = total_cost - subtotal_before_vat
|
||||
|
||||
# TODO: Rough estimate to be reviewed
|
||||
labour_hours = 3 * number_heated_rooms
|
||||
labour_days = np.ceil(labour_hours / 8)
|
||||
|
||||
return {
|
||||
"total": total_cost,
|
||||
"subtotal": subtotal_before_vat,
|
||||
"vat": vat,
|
||||
"labour_hours": labour_hours,
|
||||
"labour_days": labour_days,
|
||||
}
|
||||
|
||||
def celect_type_controls(self):
|
||||
"""
|
||||
Calculate the cost of installing Celect type controls
|
||||
"""
|
||||
|
||||
# The £50 cost is a rough estimate based on internet research
|
||||
total_cost = 50
|
||||
subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
|
||||
vat = total_cost - subtotal_before_vat
|
||||
|
||||
# We estimate the labour hours to be 4
|
||||
return {
|
||||
"total": total_cost,
|
||||
"subtotal": subtotal_before_vat,
|
||||
"vat": vat,
|
||||
"labour_hours": 4,
|
||||
"labour_days": 1,
|
||||
}
|
||||
|
||||
def hot_water_tank_insulation(self):
|
||||
"""
|
||||
Calculate the cost of installing hot water tank insulation
|
||||
"""
|
||||
|
||||
# The £50 cost is a rough estimate based on internet research
|
||||
total_cost = 50
|
||||
subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
|
||||
vat = total_cost - subtotal_before_vat
|
||||
|
||||
return {
|
||||
"total": total_cost,
|
||||
"subtotal": subtotal_before_vat,
|
||||
"vat": vat,
|
||||
"labour_hours": 0,
|
||||
"labour_days": 0,
|
||||
}
|
||||
|
||||
def roomstat_programmer_trvs(
|
||||
self, number_heated_rooms, has_programmer, has_trvs, has_room_thermostat
|
||||
):
|
||||
"""
|
||||
|
||||
:return:
|
||||
"""
|
||||
|
||||
total_cost = 0
|
||||
labour_hours = 0
|
||||
|
||||
if not has_programmer:
|
||||
total_cost += PROGRAMMER_COST
|
||||
labour_hours += 1
|
||||
|
||||
if not has_trvs:
|
||||
total_cost += TRVS_COST * number_heated_rooms
|
||||
labour_hours += 0.25 * number_heated_rooms
|
||||
|
||||
if not has_room_thermostat:
|
||||
total_cost += ROOM_THERMOSTAT_COST
|
||||
labour_hours += 0.5
|
||||
|
||||
subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
|
||||
vat = total_cost - subtotal_before_vat
|
||||
|
||||
return {
|
||||
"total": total_cost,
|
||||
"subtotal": subtotal_before_vat,
|
||||
"vat": vat,
|
||||
"labour_hours": labour_hours,
|
||||
"labour_days": 1,
|
||||
}
|
||||
|
||||
def time_and_temperature_zone_control(self, number_heated_rooms):
|
||||
|
||||
# The product costs are inclusive of VAT
|
||||
product_costs = (
|
||||
TTZC_SMART_THERMOSTAT_COST +
|
||||
TTZC_ROOM_TEMPERATURE_SENSOR_COST * number_heated_rooms +
|
||||
TTZC_SMART_RADIATOR_VALUES * number_heated_rooms
|
||||
)
|
||||
labour_hours = (
|
||||
TTZC_SMART_THERMOSTAT_LABOUR_HOURS +
|
||||
TTZC_ROOM_TEMPERATURE_SENSOR_LABOUR_HOURS * number_heated_rooms +
|
||||
TTZC_SMART_RADIATOR_VALUES_LABOUR_HOURS * number_heated_rooms
|
||||
)
|
||||
labour_costs = TTZC_ELECTRICIAN_HOURLY_RATE * labour_hours
|
||||
# Add continency and preliminaries to the labour to account for the complexity of the job
|
||||
labour_costs = labour_costs * (1 + self.CONTINGENCY + self.PRELIMINARIES)
|
||||
|
||||
vat = labour_costs * self.VAT_RATE
|
||||
|
||||
subtotal_before_vat = product_costs + labour_costs
|
||||
total_cost = subtotal_before_vat + vat
|
||||
|
||||
labour_days = np.ceil(labour_hours / 8)
|
||||
|
||||
return {
|
||||
"total": total_cost,
|
||||
"subtotal": subtotal_before_vat,
|
||||
"vat": vat,
|
||||
"labour_hours": labour_hours,
|
||||
"labour_days": labour_days,
|
||||
}
|
||||
|
||||
def heater_removal(self, n_rooms):
|
||||
"""
|
||||
Estimates the costs of removal of heaters, including the redecoration costs of the space behind the heater
|
||||
:return:
|
||||
"""
|
||||
|
||||
removal_cost = ROOM_HEATER_REMOVAL_COST * n_rooms
|
||||
removal_labour_hours = ROOM_HEATER_REMOVAL_LABOUR_HOURS * n_rooms
|
||||
|
||||
vat = removal_cost * self.VAT_RATE
|
||||
|
||||
subtotal_before_vat = removal_cost
|
||||
total_cost = subtotal_before_vat + vat
|
||||
|
||||
return {
|
||||
"total": total_cost,
|
||||
"subtotal": subtotal_before_vat,
|
||||
"vat": vat,
|
||||
"labour_hours": removal_labour_hours,
|
||||
"labour_days": np.ceil(removal_labour_hours / 8),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _estimate_n_radiators(number_habitable_rooms, total_floor_area, property_type, built_form):
|
||||
# Base number of radiators: one per habitable room
|
||||
base_radiators = number_habitable_rooms
|
||||
|
||||
# Additional radiators for non-habitable essential areas (e.g., kitchens, hallways)
|
||||
additional_radiators = 3 # Initial assumption
|
||||
|
||||
# Adjust additional radiators based on property type
|
||||
if property_type == 'Flat':
|
||||
additional_radiators -= 1 # Flats may need fewer radiators due to less exposure
|
||||
elif property_type in ['House', 'Bungalow', 'Maisonette']:
|
||||
# Multiple floors in Maisonette may require additional heating points
|
||||
additional_radiators += 2 # Houses and bungalows might need more due to greater exposure
|
||||
else:
|
||||
raise Exception("Invalid property type")
|
||||
|
||||
# Adjust total radiator needs based on built form
|
||||
form_factor = {
|
||||
'Mid-Terrace': 0.95,
|
||||
'Semi-Detached': 1.05,
|
||||
'Detached': 1.25,
|
||||
'End-Terrace': 1.05
|
||||
}
|
||||
|
||||
# Calculate total heating power needed and number of radiators based on standard output
|
||||
total_heating_power_required = total_floor_area * 80 # Watts per square meter
|
||||
radiator_output = 1000 # Average wattage per radiator
|
||||
total_radiators_based_on_power = (total_heating_power_required / radiator_output) * form_factor[built_form]
|
||||
|
||||
# Final estimation taking the higher of calculated needs or base room count
|
||||
estimated_radiators = max(total_radiators_based_on_power, base_radiators + additional_radiators)
|
||||
return round(estimated_radiators)
|
||||
|
||||
def boiler(self, is_combi, size, exising_room_heaters, system_change, n_heated_rooms, n_rooms):
|
||||
"""
|
||||
Based on a basic estimate of median value £2600 to install a low carbon combi boiler
|
||||
First time central heating vosts can als be found here:
|
||||
https://www.checkatrade.com/blog/cost-guides/central-heating-installation-cost/
|
||||
:return:
|
||||
"""
|
||||
|
||||
unit_cost = COMBI_BOILER_COSTS[size] if is_combi else CONVENTIONAL_BOILER_COSTS[size]
|
||||
# The unit cost is the cost without VAT
|
||||
# We now need to estimate the cost of the works
|
||||
labour_days = 2
|
||||
labour_hours = labour_days * 8
|
||||
labour_rate = 300
|
||||
|
||||
# Average cost of installation is 1 (maybe 2days) at £300 per day
|
||||
# https://www.checkatrade.com/blog/cost-guides/new-boiler-cost/
|
||||
# To be pessimistic, assume 2 days work
|
||||
labour_cost = labour_rate * self.labour_adjustment_factor * labour_days
|
||||
# Add contingency and preliminaries
|
||||
labour_cost = labour_cost * (1 + self.CONTINGENCY + self.PRELIMINARIES)
|
||||
|
||||
# labour_days = labour_days + (removal_labour_hours / 8)
|
||||
|
||||
vat = labour_cost * self.VAT_RATE
|
||||
|
||||
subtotal_before_vat = unit_cost + labour_cost
|
||||
total_cost = subtotal_before_vat + vat
|
||||
|
||||
# if there are existing room heaters, we need to add the cost of removing them
|
||||
if exising_room_heaters:
|
||||
removal_costing = self.heater_removal(n_rooms=n_heated_rooms)
|
||||
# Add the totals to the existing totals
|
||||
total_cost += removal_costing["total"]
|
||||
subtotal_before_vat += removal_costing["subtotal"]
|
||||
labour_hours += removal_costing["labour_hours"]
|
||||
labour_days += removal_costing["labour_days"]
|
||||
vat += removal_costing["vat"]
|
||||
|
||||
if system_change:
|
||||
# We need the cost of radiators
|
||||
n_radiators = self._estimate_n_radiators(
|
||||
number_habitable_rooms=n_rooms,
|
||||
total_floor_area=self.property.floor_area,
|
||||
property_type=self.property.data["property-type"],
|
||||
built_form=self.property.data["built-form"]
|
||||
)
|
||||
|
||||
additionals_labour_cost = labour_rate * self.labour_adjustment_factor
|
||||
radiator_cost = DOUBLE_RADIATOR_COST * n_radiators
|
||||
system_change_cost = radiator_cost + FLUE_COST + PIPEWORK_COST + additionals_labour_cost
|
||||
system_change_cost_before_vat = system_change_cost / (1 + self.VAT_RATE)
|
||||
system_change_vat = system_change_cost - system_change_cost_before_vat
|
||||
# We add an extra labour day for the system change
|
||||
labour_days += 1
|
||||
labour_hours += 8
|
||||
total_cost += system_change_cost
|
||||
subtotal_before_vat += system_change_cost_before_vat
|
||||
vat += system_change_vat
|
||||
|
||||
return {
|
||||
"total": total_cost,
|
||||
"subtotal": subtotal_before_vat,
|
||||
"vat": vat,
|
||||
"labour_hours": labour_hours,
|
||||
"labour_days": labour_days,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ class FireplaceRecommendations(Definitions):
|
|||
self.has_ventilaion = None
|
||||
self.recommendation = None
|
||||
|
||||
def recommend(self):
|
||||
def recommend(self, phase=0):
|
||||
"""
|
||||
Based on the number of open fireplcaes found, we recommend sealing each one at a cost of
|
||||
around £500
|
||||
|
|
@ -32,19 +32,23 @@ class FireplaceRecommendations(Definitions):
|
|||
if number_open_fireplaces == 0:
|
||||
return
|
||||
|
||||
estimated_cost = number_open_fireplaces * self.COST_OF_WORK
|
||||
already_installed = "sealing_open_fireplace" in self.property.already_installed
|
||||
estimated_cost = number_open_fireplaces * self.COST_OF_WORK if not already_installed else 0
|
||||
|
||||
# We recommend installing two mechanical ventilation systems
|
||||
self.recommendation = [
|
||||
{
|
||||
"phase": phase,
|
||||
"parts": [],
|
||||
"type": "sealing_open_fireplace",
|
||||
"description": "Seal %s open fireplaces" % str(number_open_fireplaces),
|
||||
"starting_u_value": None,
|
||||
"new_u_value": None,
|
||||
"sap_points": None,
|
||||
"already_installed": already_installed,
|
||||
"total": estimated_cost,
|
||||
# Take a very basic estimate of 6 hours, multipled by the number of open fireplaces to seal
|
||||
"labour_hours": 6 * number_open_fireplaces
|
||||
"labour_hours": 6 * number_open_fireplaces,
|
||||
"labour_days": 6 * number_open_fireplaces / 8, # Assume 8 hour day
|
||||
}
|
||||
]
|
||||
|
|
|
|||
|
|
@ -8,9 +8,8 @@ from datatypes.enums import QuantityUnits
|
|||
from backend.Property import Property
|
||||
from recommendations.recommendation_utils import (
|
||||
r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns, update_lowest_selected_u_value,
|
||||
get_recommended_part, get_floor_u_value
|
||||
get_recommended_part, get_floor_u_value, override_costs
|
||||
)
|
||||
from recommendations.rdsap_tables import FLOOR_LEVEL_MAP
|
||||
from recommendations.Costs import Costs
|
||||
|
||||
|
||||
|
|
@ -51,8 +50,9 @@ class FloorRecommendations(Definitions):
|
|||
]
|
||||
]
|
||||
|
||||
# For solid floor, we don't use materials that are too thick
|
||||
self.solid_floor_insulation_materials = [
|
||||
part for part in materials if part["type"] == "solid_floor_insulation"
|
||||
part for part in materials if part["type"] == "solid_floor_insulation" if float(part["depth"]) <= 75
|
||||
]
|
||||
|
||||
self.solid_floor_non_insulation_materials = [
|
||||
|
|
@ -69,15 +69,9 @@ class FloorRecommendations(Definitions):
|
|||
# TODO: To be completed
|
||||
self.exposed_floor_non_insulation_materials = []
|
||||
|
||||
def recommend(self):
|
||||
def recommend(self, phase=0):
|
||||
u_value = self.property.floor["thermal_transmittance"]
|
||||
|
||||
floor_level = (
|
||||
FLOOR_LEVEL_MAP[self.property.data["floor-level"]] if
|
||||
self.property.data["floor-level"] not in self.DATA_ANOMALY_MATCHES else None
|
||||
)
|
||||
property_type = self.property.data["property-type"]
|
||||
|
||||
floor_area = self.property.insulation_floor_area
|
||||
year_built = self.property.year_built
|
||||
|
||||
|
|
@ -89,7 +83,13 @@ class FloorRecommendations(Definitions):
|
|||
return
|
||||
|
||||
# If the property is a flat that isn't at ground level, it's likely impractical to recommend a floor upgrade
|
||||
if (floor_level != 0) and (property_type == "Flat"):
|
||||
if (self.property.floor_level != 0) and (property_type == "Flat") and (
|
||||
self.property.floor["another_property_below"]
|
||||
):
|
||||
return
|
||||
|
||||
# If the property is a new build flat, we won't recommend floor upgrades
|
||||
if len(self.property.full_sap_epc) and (property_type == "Flat"):
|
||||
return
|
||||
|
||||
if u_value:
|
||||
|
|
@ -103,15 +103,17 @@ class FloorRecommendations(Definitions):
|
|||
# The floor is already compliant
|
||||
return
|
||||
|
||||
u_value = get_floor_u_value(
|
||||
floor_type=self.property.floor_type,
|
||||
area=floor_area,
|
||||
perimeter=self.property.perimeter,
|
||||
age_band=self.property.age_band,
|
||||
insulation_thickness=self.property.floor["insulation_thickness"],
|
||||
wall_type=self.property.wall_type
|
||||
)
|
||||
self.estimated_u_value = u_value
|
||||
if u_value is None:
|
||||
u_value = get_floor_u_value(
|
||||
floor_type=self.property.floor_type,
|
||||
area=floor_area,
|
||||
perimeter=self.property.perimeter,
|
||||
age_band=self.property.age_band,
|
||||
insulation_thickness=self.property.floor["insulation_thickness"],
|
||||
wall_type=self.property.wall_type
|
||||
)
|
||||
|
||||
self.estimated_u_value = u_value
|
||||
|
||||
if u_value < self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
|
||||
return
|
||||
|
|
@ -119,6 +121,7 @@ class FloorRecommendations(Definitions):
|
|||
if self.property.floor["is_suspended"]:
|
||||
# Given the U-value, we recommend underfloor insulation
|
||||
self.recommend_floor_insulation(
|
||||
phase=phase,
|
||||
u_value=u_value,
|
||||
insulation_materials=self.suspended_floor_insulation_materials,
|
||||
non_insulation_materials=self.suspended_floor_non_insulation_materials
|
||||
|
|
@ -130,7 +133,8 @@ class FloorRecommendations(Definitions):
|
|||
self.recommend_floor_insulation(
|
||||
u_value=u_value,
|
||||
insulation_materials=self.solid_floor_insulation_materials,
|
||||
non_insulation_materials=self.solid_floor_non_insulation_materials
|
||||
non_insulation_materials=self.solid_floor_non_insulation_materials,
|
||||
phase=phase
|
||||
)
|
||||
return
|
||||
|
||||
|
|
@ -142,9 +146,22 @@ class FloorRecommendations(Definitions):
|
|||
|
||||
@staticmethod
|
||||
def _make_floor_description(material):
|
||||
return f"Install {int(material['depth'])}{material['depth_unit']} {material['description']} insulation"
|
||||
|
||||
def recommend_floor_insulation(self, u_value, insulation_materials, non_insulation_materials):
|
||||
if material["type"] == "suspended_floor_insulation":
|
||||
return (f"Install {int(material['depth'])}{material['depth_unit']} {material['description']} insulation in "
|
||||
f"suspended floor")
|
||||
|
||||
if material["type"] == "solid_floor_insulation":
|
||||
return (f"Install {int(material['depth'])}{material['depth_unit']} {material['description']} insulation on "
|
||||
f"solid floor")
|
||||
|
||||
if material["type"] == "exposed_floor_insulation":
|
||||
return (f"Install {int(material['depth'])}{material['depth_unit']} {material['description']} insulation in "
|
||||
f"exposed floor")
|
||||
|
||||
raise ValueError("Invalid material type - implement me!")
|
||||
|
||||
def recommend_floor_insulation(self, u_value, insulation_materials, non_insulation_materials, phase):
|
||||
"""
|
||||
This method is tasked with estimating the impact of performing suspended floor insulation
|
||||
:return:
|
||||
|
|
@ -175,17 +192,27 @@ class FloorRecommendations(Definitions):
|
|||
material=material.to_dict(),
|
||||
non_insulation_materials=non_insulation_materials
|
||||
)
|
||||
|
||||
already_installed = "suspended_floor_insulation" in self.property.already_installed
|
||||
if already_installed:
|
||||
cost_result = override_costs(cost_result)
|
||||
|
||||
elif material["type"] == "solid_floor_insulation":
|
||||
cost_result = self.costs.solid_floor_insulation(
|
||||
insulation_floor_area=self.property.insulation_floor_area,
|
||||
material=material.to_dict(),
|
||||
non_insulation_materials=non_insulation_materials
|
||||
)
|
||||
|
||||
already_installed = "solid_floor_insulation" in self.property.already_installed
|
||||
if already_installed:
|
||||
cost_result = override_costs(cost_result)
|
||||
else:
|
||||
raise NotImplementedError("Implement me!")
|
||||
|
||||
self.recommendations.append(
|
||||
{
|
||||
"phase": phase,
|
||||
"parts": [
|
||||
get_recommended_part(
|
||||
part=material.to_dict(),
|
||||
|
|
@ -194,11 +221,12 @@ class FloorRecommendations(Definitions):
|
|||
cost_result=cost_result
|
||||
),
|
||||
],
|
||||
"type": "floor_insulation",
|
||||
"type": material["type"],
|
||||
"description": self._make_floor_description(material),
|
||||
"starting_u_value": u_value,
|
||||
"new_u_value": new_u_value,
|
||||
"sap_points": None,
|
||||
"already_installed": already_installed,
|
||||
**cost_result
|
||||
}
|
||||
)
|
||||
|
|
|
|||
248
recommendations/HeatingControlRecommender.py
Normal file
248
recommendations/HeatingControlRecommender.py
Normal file
|
|
@ -0,0 +1,248 @@
|
|||
from recommendations.Costs import Costs
|
||||
from recommendations.recommendation_utils import check_simulation_difference, override_costs
|
||||
from backend.Property import Property
|
||||
from etl.epc_clean.epc_attributes.MainheatControlAttributes import MainheatControlAttributes
|
||||
|
||||
|
||||
class HeatingControlRecommender:
|
||||
|
||||
def __init__(self, property_instance: Property):
|
||||
self.property = property_instance
|
||||
self.costs = Costs(self.property)
|
||||
|
||||
self.recommendation = []
|
||||
|
||||
def recommend(self, heating_description):
|
||||
|
||||
# Reset the recommendations
|
||||
self.recommendation = []
|
||||
|
||||
# This first iteration of the recommender will provide very basic recommendation
|
||||
# We recommend heating controls based on the main heating system
|
||||
if heating_description in ["Room heaters, electric"]:
|
||||
self.recommend_room_heaters_electric_controls()
|
||||
return
|
||||
|
||||
if heating_description in ["Electric storage heaters", "Electric storage heaters, radiators"]:
|
||||
self.recommend_high_heat_retention_controls()
|
||||
return
|
||||
|
||||
if heating_description in ["Boiler and radiators, mains gas"]:
|
||||
# We can recommend roomstat programmer trvs
|
||||
self.recommend_roomstat_programmer_trvs()
|
||||
# We can also recommend time and temperature zone controls
|
||||
self.recommend_time_temperature_zone_controls()
|
||||
|
||||
return
|
||||
|
||||
def recommend_room_heaters_electric_controls(self):
|
||||
"""
|
||||
If the home has Room heaters, electric, we start by identifying potential heating controls that could
|
||||
be upgraded, that would provide a practical impact. This will be the least invasive improvement.
|
||||
|
||||
We can then consider the heating system itself
|
||||
:return:
|
||||
"""
|
||||
if (self.property.data["mainheatc-energy-eff"] in ["Poor", "Very Poor", "Average"]) or (
|
||||
self.property.main_heating_controls["clean_description"] in ["Programmer and room thermostat"]
|
||||
):
|
||||
# We recommend Programmer and appliance thermostats as the heating control. This has an average energy
|
||||
# efficiency rating, and is likely to be more efficient than the current heating controls. if the
|
||||
# rating is poor or very poor, the home may have a Programmer and room thermostat, which is less efficient
|
||||
# than a Programmer and appliance thermostats, because it allows for much more granular control at not
|
||||
# just a room level but individual heater/appliance level
|
||||
|
||||
# Note: A room thermostat is commonly placed in a hallway, and it measures the temperature of the air
|
||||
# surrounding it. It then sends a signal to the heating system to turn on or off, depending on the
|
||||
# temperature. An appliance thermostat, on the other hand, is placed on the heater/appliance itself, and
|
||||
# measures the temperature of the heater/appliance. This allows for much more granular control, and
|
||||
# prevents overheating.
|
||||
|
||||
# In order to cost, we check if the property already has a programmer, and therefor we will just need to
|
||||
# add the cost of the appliance thermostats
|
||||
|
||||
has_programmer = self.property.main_heating_controls["switch_system"] == "programmer"
|
||||
|
||||
ending_config = MainheatControlAttributes("Programmer and appliance thermostats").process()
|
||||
# We look at what has changed in the ending config, and compare it to the current config
|
||||
|
||||
# We use this to determine how we should be updating the config
|
||||
simulation_config = check_simulation_difference(
|
||||
new_config=ending_config, old_config=self.property.main_heating_controls
|
||||
)
|
||||
# This upgrade will only take the heating system to average energy efficiency
|
||||
simulation_config["mainheatc_energy_eff_ending"] = "Good"
|
||||
|
||||
self.recommendation.append(
|
||||
{
|
||||
"description": "upgrade heating controls to Programmer and Appliance or Smart Thermostats",
|
||||
**self.costs.programmer_and_appliance_thermostat(has_programmer=has_programmer),
|
||||
"simulation_config": simulation_config
|
||||
}
|
||||
)
|
||||
|
||||
# We don't implement any other recommendations right now
|
||||
return
|
||||
|
||||
def recommend_high_heat_retention_controls(self):
|
||||
"""
|
||||
When applicable, we recommend upgrading the heating controls to high heat retention controls. This is a
|
||||
specific type of control system that is designed to work with electric storage heaters. It is a more
|
||||
efficient control system than the standard controls that come with electric storage heaters.
|
||||
|
||||
We can then consider the heating system itself
|
||||
:return:
|
||||
"""
|
||||
|
||||
# We recommend upgrading to Celect type controls
|
||||
ending_config = MainheatControlAttributes("Controls for high heat retention storage heaters").process()
|
||||
# We look at what has changed in the ending config, and compare it to the current config
|
||||
simulation_config = check_simulation_difference(
|
||||
new_config=ending_config, old_config=self.property.main_heating_controls
|
||||
)
|
||||
# This upgrade will only take the heating system to average energy efficiency
|
||||
simulation_config["mainheatc_energy_eff_ending"] = "Good"
|
||||
|
||||
self.recommendation.append(
|
||||
{
|
||||
"description": "upgrade heating controls to High Heat Retention Storage Heater Controls",
|
||||
**self.costs.celect_type_controls(),
|
||||
"simulation_config": simulation_config
|
||||
}
|
||||
)
|
||||
|
||||
# We don't implement any other recommendations right now
|
||||
return
|
||||
|
||||
def recommend_roomstat_programmer_trvs(self):
|
||||
"""
|
||||
If the home has a boiler and radiators, mains gas, we start by identifying potential heating controls that could
|
||||
be upgraded, that would provide a practical impact.
|
||||
|
||||
The criteria for recommending an upgrade to heating controls are (one of these must be true)
|
||||
1) There are no controls
|
||||
2) No programmer
|
||||
3) No room thermostat
|
||||
4) No TRVs
|
||||
|
||||
|
||||
:return:
|
||||
"""
|
||||
|
||||
# We check if we have the conditions to recommend this upgrade
|
||||
|
||||
needs_programmer = self.property.main_heating_controls["switch_system"] is None
|
||||
needs_room_thermostat = self.property.main_heating_controls["thermostatic_control"] is None
|
||||
needs_trvs = self.property.main_heating_controls["trvs"] is None
|
||||
|
||||
can_recommend = (
|
||||
(self.property.main_heating_controls["no_control"] is not None) or
|
||||
needs_programmer or
|
||||
needs_room_thermostat or
|
||||
needs_trvs
|
||||
)
|
||||
|
||||
if not can_recommend:
|
||||
return
|
||||
|
||||
ending_config = MainheatControlAttributes("Programmer, room thermostat and TRVS").process()
|
||||
# We use this to determine how we should be updating the config
|
||||
simulation_config = check_simulation_difference(
|
||||
new_config=ending_config, old_config=self.property.main_heating_controls
|
||||
)
|
||||
# This upgrade will only take the heating system to average energy efficiency
|
||||
# If the current system is below good, we make it good
|
||||
if self.property.data["mainheatc-energy-eff"] in ["Poor", "Very Poor", "Average"]:
|
||||
simulation_config["mainheatc_energy_eff_ending"] = "Good"
|
||||
|
||||
has_programmer = not needs_programmer
|
||||
has_room_thermostat = not needs_room_thermostat
|
||||
has_trvs = not needs_trvs
|
||||
|
||||
cost_result = self.costs.roomstat_programmer_trvs(
|
||||
number_heated_rooms=int(self.property.data["number-heated-rooms"]),
|
||||
has_programmer=has_programmer,
|
||||
has_room_thermostat=has_room_thermostat,
|
||||
has_trvs=has_trvs
|
||||
)
|
||||
|
||||
description = "upgrade heating controls to Room thermostat, programmer and TRVs"
|
||||
|
||||
already_installed = "heating_control" in self.property.already_installed
|
||||
if already_installed:
|
||||
cost_result = override_costs(cost_result)
|
||||
description = "Heating controls have already been upgraded, no further action needed."
|
||||
|
||||
self.recommendation.append(
|
||||
{
|
||||
"type": "heating_control",
|
||||
"parts": [],
|
||||
"description": description,
|
||||
**cost_result,
|
||||
"starting_u_value": None,
|
||||
"new_u_value": None,
|
||||
"sap_points": None,
|
||||
"already_installed": already_installed,
|
||||
"simulation_config": simulation_config
|
||||
}
|
||||
)
|
||||
|
||||
return
|
||||
|
||||
def recommend_time_temperature_zone_controls(self):
|
||||
"""
|
||||
If the home has a boiler, we can recommend time and temperature zone controls. This is a more advanced
|
||||
and more efficient control system than the standard controls that come with a boiler. However, it may come
|
||||
with a higher cost and more involved usage
|
||||
:return:
|
||||
"""
|
||||
|
||||
# We check if the efficiency of the current heating controls is good or below, and
|
||||
|
||||
# Conditions for installation are as follows:
|
||||
# 1) The current heating controls are not time and temperature zone controls
|
||||
# 2) The current heating controls are not already at 'Very Good' or above
|
||||
|
||||
if (
|
||||
(self.property.main_heating_controls["thermostatic_control"] == "time and temperature zone control") or
|
||||
(self.property.data["mainheatc-energy-eff"] in ["Very Good"])
|
||||
):
|
||||
# No recommendation needed
|
||||
return
|
||||
|
||||
ending_config = MainheatControlAttributes("Time and temperature zone control").process()
|
||||
|
||||
# We use this to determine how we should be updating the config
|
||||
simulation_config = check_simulation_difference(
|
||||
new_config=ending_config, old_config=self.property.main_heating_controls
|
||||
)
|
||||
|
||||
# If the current system is below very good, we make it very good
|
||||
if self.property.data["mainheatc-energy-eff"] in ["Poor", "Very Poor", "Average", "Good"]:
|
||||
simulation_config["mainheatc_energy_eff_ending"] = "Very Good"
|
||||
|
||||
cost_result = self.costs.time_and_temperature_zone_control(
|
||||
number_heated_rooms=int(self.property.data["number-heated-rooms"])
|
||||
)
|
||||
|
||||
description = ("Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & "
|
||||
"temperature zone control)")
|
||||
|
||||
already_installed = "heating_control" in self.property.already_installed
|
||||
if already_installed:
|
||||
cost_result = override_costs(cost_result)
|
||||
description = "Heating controls have already been upgraded, no further action needed."
|
||||
|
||||
self.recommendation.append(
|
||||
{
|
||||
"type": "heating_control",
|
||||
"parts": [],
|
||||
"description": description,
|
||||
**cost_result,
|
||||
"starting_u_value": None,
|
||||
"new_u_value": None,
|
||||
"sap_points": None,
|
||||
"already_installed": already_installed,
|
||||
"simulation_config": simulation_config
|
||||
}
|
||||
)
|
||||
435
recommendations/HeatingRecommender.py
Normal file
435
recommendations/HeatingRecommender.py
Normal file
|
|
@ -0,0 +1,435 @@
|
|||
import pandas as pd
|
||||
|
||||
from recommendations.Costs import Costs
|
||||
from recommendations.recommendation_utils import check_simulation_difference, override_costs
|
||||
from backend.Property import Property
|
||||
from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes
|
||||
from etl.epc_clean.epc_attributes.HotWaterAttributes import HotWaterAttributes
|
||||
from etl.epc_clean.epc_attributes.MainFuelAttributes import MainFuelAttributes
|
||||
from recommendations.HeatingControlRecommender import HeatingControlRecommender
|
||||
|
||||
|
||||
class HeatingRecommender:
|
||||
|
||||
def __init__(self, property_instance: Property):
|
||||
self.property = property_instance
|
||||
self.costs = Costs(self.property)
|
||||
|
||||
self.recommendations = []
|
||||
|
||||
def recommend(self, phase=0):
|
||||
|
||||
# TODO: We could have a system flush recommendation for an existing boiler, where there is no need to replace
|
||||
# the boiler, but instead flushing the system will make it run more efficiently. There is a cost for this
|
||||
# in the Costs class, stored as SYSTEM_FLUSH_COST
|
||||
|
||||
self.recommendations = []
|
||||
# This first iteration of the recommender will provide very basic recommendation
|
||||
# We recommend heating controls based on the main heating system
|
||||
|
||||
has_electric_heating_description = self.property.main_heating["clean_description"] in [
|
||||
"Room heaters, electric", "Electric storage heaters", "Electric storage heaters, radiators"
|
||||
]
|
||||
|
||||
no_heating_no_mains = (
|
||||
self.property.main_heating["clean_description"] in ["No system present, electric heaters assumed"] and
|
||||
not self.property.data["mains-gas-flag"]
|
||||
)
|
||||
|
||||
if has_electric_heating_description or no_heating_no_mains:
|
||||
# Recommend high heat retention storage heaters
|
||||
self.recommend_hhr_storage_heaters(phase=phase, system_change=True, heating_controls_only=False)
|
||||
|
||||
# if the property has mains heating with boiler and radiators, we recommend optimal heating controls
|
||||
has_boiler = self.property.main_heating["clean_description"] in ["Boiler and radiators, mains gas"]
|
||||
|
||||
# We also check that the property doesn't have a heating system, but it has access to the mains gas
|
||||
no_heating_has_mains = self.property.main_heating["clean_description"] in [
|
||||
'No system present, electric heaters assumed'
|
||||
] and self.property.data["mains-gas-flag"]
|
||||
|
||||
has_gas_heaters = (
|
||||
self.property.main_heating["clean_description"] in ["Room heaters, mains gas"] and
|
||||
self.property.data["mains-gas-flag"]
|
||||
)
|
||||
|
||||
# We also check if the property has electric heating, but it has access to the mains gas
|
||||
electic_heating_has_mains = has_electric_heating_description and self.property.data["mains-gas-flag"]
|
||||
|
||||
portable_heaters_has_mains = (
|
||||
self.property.main_heating["clean_description"] in ["Portable electric heaters assumed for most rooms"] and
|
||||
self.property.data["mains-gas-flag"]
|
||||
)
|
||||
|
||||
if (
|
||||
has_boiler or
|
||||
no_heating_has_mains or
|
||||
electic_heating_has_mains or
|
||||
has_gas_heaters or
|
||||
portable_heaters_has_mains
|
||||
):
|
||||
# This indicates that the home previously did not have a boiler in place and so would require
|
||||
# an overhaul to the system - right now, this is all reasons, apart from if there is an existing boiler
|
||||
system_change = not has_boiler
|
||||
exising_room_heaters = self.property.main_heating["clean_description"] in [
|
||||
"Room heaters, electric", "Room heaters, mains gas"
|
||||
]
|
||||
|
||||
self.recommend_boiler_upgrades(
|
||||
phase=phase, system_change=system_change, exising_room_heaters=exising_room_heaters
|
||||
)
|
||||
|
||||
return
|
||||
|
||||
@staticmethod
|
||||
def check_simulation_difference(old_config, new_config):
|
||||
"""
|
||||
Given two dictionaries, that describe the heating control configurations, this method will compare the two
|
||||
and pick out the differences. These differences will be things that have been added and things that have been
|
||||
removed. This will be used to determine how we should be updating the configuration in the simulation
|
||||
:return:
|
||||
"""
|
||||
|
||||
differences = {key + "_ending": new_config[key] for key in new_config if old_config[key] != new_config[key]}
|
||||
|
||||
return differences
|
||||
|
||||
def combine_heating_and_controls(
|
||||
self, controls_recommendations, heating_simulation_config, costs, description, phase, heating_controls_only,
|
||||
system_change
|
||||
):
|
||||
"""
|
||||
Given a recommendation for heating controls, and a recommendation for the heating system, we combine the two
|
||||
into a single recommendation
|
||||
:param controls_recommendations: The heating controls recommendations
|
||||
:param heating_simulation_config: The simulation configuration for the heating system
|
||||
:param costs: The costs of the heating system
|
||||
:param description: The description of the recommendation
|
||||
:param phase: The phase of the recommendation
|
||||
:param heating_controls_only: If True, we will also add a recommendation for heating controls only
|
||||
:param system_change: Indicates if we are recommending a different type of heating system, compared to the
|
||||
current system. If we have a system change and we have a heat control recommendation, we only recommend
|
||||
both heating and controls together
|
||||
:return:
|
||||
"""
|
||||
|
||||
# We produce recommendations with & without heating controls
|
||||
# We will also produce a recommendation for heating controls only
|
||||
heating_controls_switch = [True, False] if controls_recommendations else [False]
|
||||
if not heating_simulation_config:
|
||||
heating_controls_switch = []
|
||||
|
||||
if system_change and len(controls_recommendations):
|
||||
heating_controls_switch = [True]
|
||||
|
||||
output = []
|
||||
for controls_switch in heating_controls_switch:
|
||||
total_costs = costs.copy()
|
||||
recommendation_simulation_config = heating_simulation_config.copy()
|
||||
recommendation_description = description
|
||||
if controls_switch:
|
||||
# We add the costs of the heating controls, onto each key in the costs dictionary
|
||||
for key in total_costs:
|
||||
total_costs[key] += controls_recommendations[0][key]
|
||||
|
||||
recommendation_simulation_config = {
|
||||
**recommendation_simulation_config,
|
||||
**controls_recommendations[0]["simulation_config"]
|
||||
}
|
||||
controls_description = controls_recommendations[0]['description']
|
||||
# Make the first letter of the description lowercase
|
||||
controls_description = (
|
||||
controls_description[0].lower() + controls_description[1:]
|
||||
)
|
||||
|
||||
recommendation_description = f"{description} and {controls_description}"
|
||||
|
||||
already_installed = "cavity_wall_insulation" in self.property.already_installed
|
||||
if already_installed:
|
||||
total_costs = override_costs(total_costs)
|
||||
recommendation_description = "Heating system has already been upgraded, no further action needed."
|
||||
|
||||
recommendation = {
|
||||
"phase": phase,
|
||||
"parts": [
|
||||
# TODO
|
||||
],
|
||||
"type": "heating",
|
||||
"description": recommendation_description,
|
||||
"starting_u_value": None,
|
||||
"new_u_value": None,
|
||||
"sap_points": None,
|
||||
"already_installed": already_installed,
|
||||
**total_costs,
|
||||
"simulation_config": recommendation_simulation_config
|
||||
}
|
||||
|
||||
output.append(recommendation)
|
||||
|
||||
if heating_controls_only and len(controls_recommendations):
|
||||
# Also add on a recommendation for heating controls only
|
||||
heating_control_recommendation = controls_recommendations[0].copy()
|
||||
# Capitalize the first letter of the description
|
||||
heating_control_recommendation["description"] = (
|
||||
heating_control_recommendation["description"][0].upper() +
|
||||
heating_control_recommendation["description"][1:]
|
||||
)
|
||||
|
||||
output.append(
|
||||
{
|
||||
"phase": phase,
|
||||
"parts": [
|
||||
# TODO
|
||||
],
|
||||
"type": "heating",
|
||||
"starting_u_value": None,
|
||||
"new_u_value": None,
|
||||
"sap_points": None,
|
||||
**heating_control_recommendation
|
||||
}
|
||||
)
|
||||
|
||||
return output
|
||||
|
||||
def recommend_hhr_storage_heaters(self, phase, system_change, heating_controls_only):
|
||||
"""
|
||||
We will recommend upgrading to a high heat retention storage system, if the current system is not already
|
||||
high heat retention storage
|
||||
|
||||
:param phase: The phase of the recommendation
|
||||
:param system_change: Indicates if we are recommending a different type of heating system, compared to the
|
||||
current system
|
||||
:param heating_controls_only: Indicates if we should include a recommendation for just heating controls
|
||||
:return:
|
||||
"""
|
||||
|
||||
controls_recommender = HeatingControlRecommender(self.property)
|
||||
# The heating controls we're recommending for are based on the recommended heating system
|
||||
high_heat_retention_contols_desc = "Controls for high heat retention storage heaters"
|
||||
# We only recommend Celect-type controls if the current heating system is not Celect-type controls
|
||||
if self.property.main_heating_controls["clean_description"] != high_heat_retention_contols_desc:
|
||||
controls_recommender.recommend(heating_description="Electric storage heaters, radiators")
|
||||
|
||||
# Conditions for not needing this recommendation
|
||||
already_installed_hh_retention = (
|
||||
"Electric storage heaters" in self.property.main_heating["clean_description"] and
|
||||
self.property.main_heating_controls["clean_description"].lower() == high_heat_retention_contols_desc.lower()
|
||||
)
|
||||
|
||||
# Conditions for not recommending electric storage heaters
|
||||
if already_installed_hh_retention:
|
||||
# No recommendation needed
|
||||
return
|
||||
|
||||
# Set up artefacts, suitable for the simulation and regardless of controls
|
||||
heating_ending_config = MainHeatAttributes("Electric storage heaters, radiators").process()
|
||||
heating_simulation_config = check_simulation_difference(
|
||||
new_config=heating_ending_config, old_config=self.property.main_heating
|
||||
)
|
||||
# This upgrade will only take the heating system to average energy efficiency
|
||||
heating_simulation_config["mainheat_energy_eff_ending"] = "Average"
|
||||
|
||||
# If the property is off-gas and has no heating system in place, the number of heated rooms will actually
|
||||
# be 0, so we use the number of rooms as the figure
|
||||
number_heated_rooms = (
|
||||
self.property.data["number-heated-rooms"] if self.property.data["number-heated-rooms"] > 0
|
||||
else (
|
||||
self.property.number_of_rooms - 1 if self.property.number_of_rooms > 1 else
|
||||
self.property.number_of_rooms
|
||||
)
|
||||
)
|
||||
# Upgrade to electric storage heaters
|
||||
costs = self.costs.high_heat_electric_storage_heaters(
|
||||
number_heated_rooms=number_heated_rooms
|
||||
)
|
||||
description = "Install high heat retention electric storage heaters"
|
||||
|
||||
recommendations = self.combine_heating_and_controls(
|
||||
controls_recommendations=controls_recommender.recommendation,
|
||||
heating_simulation_config=heating_simulation_config,
|
||||
costs=costs,
|
||||
description=description,
|
||||
phase=phase,
|
||||
heating_controls_only=heating_controls_only,
|
||||
system_change=system_change
|
||||
)
|
||||
|
||||
self.recommendations.extend(recommendations)
|
||||
|
||||
@staticmethod
|
||||
def estimate_boiler_size(property_type, built_form, floor_area, floor_height, num_heated_rooms):
|
||||
# Step 1: Base size estimation based on property type (as a starting point)
|
||||
base_size = {
|
||||
'Flat': 25,
|
||||
'House': 30,
|
||||
'Maisonette': 28,
|
||||
'Bungalow': 27
|
||||
}
|
||||
|
||||
# Step 2: Calculate the volume of the property
|
||||
volume = floor_area * floor_height
|
||||
|
||||
# Step 3: Adjust base size for built form (to account for heat retention)
|
||||
form_adjustment = {
|
||||
'Mid-Terrace': 0,
|
||||
'End-Terrace': 2,
|
||||
'Semi-Detached': 4,
|
||||
'Detached': 6
|
||||
}
|
||||
|
||||
# Step 4: Further adjust for the total volume and number of heated rooms
|
||||
volume_adjustment = (volume / 100) # Simplified adjustment factor for volume
|
||||
rooms_adjustment = (num_heated_rooms - 5) * 0.5 # Assuming base case of 5 rooms
|
||||
|
||||
# Calculate the estimated boiler size
|
||||
estimated_size = base_size[property_type] + form_adjustment[built_form] + volume_adjustment + rooms_adjustment
|
||||
|
||||
# Step 5: Align with available boiler sizes and ensure it does not exceed 35kW, as it's rare to need more
|
||||
available_sizes = [30, 35, 40, 45, 50]
|
||||
estimated_size = min(max(estimated_size, 30), 40) # Ensure within 30kW to 35kW range
|
||||
|
||||
# Find the closest available size (in this case, either rounding up or down to align with 30 or 35)
|
||||
closest_size = min(available_sizes, key=lambda x: abs(x - estimated_size))
|
||||
|
||||
return closest_size
|
||||
|
||||
def recommend_boiler_upgrades(self, phase, system_change, exising_room_heaters):
|
||||
"""
|
||||
This boiler recommendation will only recommend a like-for-like upgrade, since changing the system
|
||||
is generally more expensive
|
||||
:param phase:
|
||||
:param system_change: Indicates if the property would be undergoing a heating system change. This could be true
|
||||
if the home didn't have a heating system in place, or if the home had electric heating
|
||||
previously
|
||||
:param exising_room_heaters: Indicates if the property had room heaters previously - if so, a boiler
|
||||
recommendation will need to be accompanied by removal of the room heaters
|
||||
:return:
|
||||
"""
|
||||
|
||||
recommendation_phase = phase
|
||||
|
||||
# We now recommend boiler upgrades, if applicable
|
||||
simulation_config = {}
|
||||
boiler_costs = {}
|
||||
boiler_recommendation = {}
|
||||
if self.property.data["mainheat-energy-eff"] in ["Very Poor", "Poor", "Average"]:
|
||||
boiler_size = self.estimate_boiler_size(
|
||||
property_type=self.property.data["property-type"],
|
||||
built_form=self.property.data["built-form"],
|
||||
floor_area=self.property.floor_area,
|
||||
floor_height=self.property.floor_height,
|
||||
num_heated_rooms=self.property.data["number-heated-rooms"],
|
||||
)
|
||||
|
||||
# We recommend a combi boiler under the following conditions
|
||||
# 1) If there are 4 or fewer rooms (we don't use heqted rooms because none of the rooms could be
|
||||
# heated if there is no existing heating system).
|
||||
# 2) There 1 or fewer bathrooms
|
||||
# Otherwise, we recommend a gas condensing boiler, which will server a larger property, that has multiple
|
||||
# bathrooms
|
||||
is_combi = (
|
||||
(self.property.number_of_rooms <= 4) and
|
||||
(self.property.n_bathrooms in [None, 0, 1])
|
||||
)
|
||||
if is_combi:
|
||||
description = "Upgrade to a new combi boiler"
|
||||
else:
|
||||
description = "Upgrade to a new gas condensing boiler"
|
||||
|
||||
simulation_config = {"mainheat_energy_eff_ending": "Good"}
|
||||
if system_change:
|
||||
# Installation of a boiler improves the hot water system so we need to reflect this in
|
||||
# the outcome of the recommendation
|
||||
heating_ending_config = MainHeatAttributes("Boiler and radiators, mains gas").process()
|
||||
hotwater_ending_config = HotWaterAttributes("From main system").process()
|
||||
fuel_ending_config = MainFuelAttributes("mains gas (not community)").process()
|
||||
|
||||
heating_simulation_config = check_simulation_difference(
|
||||
new_config=heating_ending_config, old_config=self.property.main_heating
|
||||
)
|
||||
hotwater_simulation_config = check_simulation_difference(
|
||||
new_config=hotwater_ending_config, old_config=self.property.hotwater
|
||||
)
|
||||
fuel_simulation_config = check_simulation_difference(
|
||||
new_config=fuel_ending_config, old_config=self.property.main_fuel
|
||||
)
|
||||
|
||||
simulation_config = {
|
||||
**simulation_config,
|
||||
**heating_simulation_config,
|
||||
**hotwater_simulation_config,
|
||||
**fuel_simulation_config,
|
||||
"hot_water_energy_eff_ending": "Good"
|
||||
}
|
||||
|
||||
boiler_costs = self.costs.boiler(
|
||||
is_combi=is_combi,
|
||||
size=f"{boiler_size}kw",
|
||||
exising_room_heaters=exising_room_heaters,
|
||||
system_change=system_change,
|
||||
n_heated_rooms=self.property.data["number-heated-rooms"],
|
||||
n_rooms=self.property.number_of_rooms
|
||||
)
|
||||
|
||||
already_installed = "heating" in self.property.already_installed
|
||||
if already_installed:
|
||||
boiler_costs = override_costs(boiler_costs)
|
||||
description = "Heating system has already been upgraded, no further action needed."
|
||||
|
||||
boiler_recommendation = {
|
||||
"phase": recommendation_phase,
|
||||
"parts": [
|
||||
# TODO
|
||||
],
|
||||
"type": "heating",
|
||||
"description": description,
|
||||
"starting_u_value": None,
|
||||
"new_u_value": None,
|
||||
"sap_points": None,
|
||||
"already_installed": already_installed,
|
||||
"simulation_config": simulation_config,
|
||||
**boiler_costs
|
||||
}
|
||||
|
||||
# We recommend the heating controls
|
||||
# If the property did not previously have a boiler, we combine
|
||||
controls_recommender = HeatingControlRecommender(self.property)
|
||||
controls_recommender.recommend(heating_description="Boiler and radiators, mains gas")
|
||||
# We may have 2 recommendations from the heating controls
|
||||
|
||||
if not controls_recommender.recommendation:
|
||||
return
|
||||
|
||||
if system_change:
|
||||
# We combine the heating and controls recommendations, in the case of a system change
|
||||
combined_recommendations = []
|
||||
for controls_recommendation in controls_recommender.recommendation:
|
||||
combined_recommendation = self.combine_heating_and_controls(
|
||||
controls_recommendations=[controls_recommendation],
|
||||
heating_simulation_config=simulation_config,
|
||||
costs=boiler_costs,
|
||||
description=boiler_recommendation["description"],
|
||||
phase=recommendation_phase,
|
||||
heating_controls_only=False,
|
||||
system_change=True
|
||||
)
|
||||
combined_recommendations.extend(combined_recommendation)
|
||||
|
||||
# Overwrite the existing boiler recommendation
|
||||
self.recommendations.extend(combined_recommendations)
|
||||
else:
|
||||
# We increment the recommendation phase, since the heating controls are separate from the boiler upgrade
|
||||
# but we'll only upgrade if we have a heating recommendation
|
||||
has_heating_recommendation = any(
|
||||
recommendation["type"] == "heating" for recommendation in self.recommendations
|
||||
)
|
||||
if has_heating_recommendation:
|
||||
recommendation_phase += 1
|
||||
# The heating controls recommendation is distrinct from the boiler upgrade recommendation
|
||||
# We insert phase into the recommendations for heating controls
|
||||
for recommendation in controls_recommender.recommendation:
|
||||
recommendation["phase"] = recommendation_phase
|
||||
|
||||
self.recommendations.extend(controls_recommender.recommendation)
|
||||
|
||||
return
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue