diff --git a/.gitignore b/.gitignore
index 75f9cd1c..63884ad7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -265,4 +265,7 @@ model_data/simulation_system/predictions/
.idea/misc.iml
adhoc
-adhoc/*
\ No newline at end of file
+adhoc/*
+
+etl-router-venv/
+refactor_datasets/
\ No newline at end of file
diff --git a/backend/DbClient.py b/backend/DbClient.py
new file mode 100644
index 00000000..2ee01349
--- /dev/null
+++ b/backend/DbClient.py
@@ -0,0 +1,7 @@
+class DbClient:
+
+ def __init__(self):
+ """
+ This class handles interaction with the database
+ """
+ pass
diff --git a/backend/OrdnanceSurvey.py b/backend/OrdnanceSurvey.py
new file mode 100644
index 00000000..837e76bd
--- /dev/null
+++ b/backend/OrdnanceSurvey.py
@@ -0,0 +1,105 @@
+from functools import lru_cache
+import urllib.parse
+import requests
+from utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+class OrdnanceSuveyClient:
+
+ def __init__(self, address, postcode, api_key):
+ """
+ This class is tasked with interaction with the ordnance survey API.
+ :param address: The address for the property to search for
+ :param postcode: The postcode for the property to search for
+ """
+
+ self.address = address
+ self.postcode = postcode
+ self.full_address = ", ".join([self.address, self.postcode])
+ self.api_key = api_key
+
+ self.results = None
+
+ self.most_relevant_result = None
+ self.property_type = None
+ self.built_form = None
+ # This will be postcode and address, as returned by the ordnance survey
+ self.address_os = None
+ self.postcode_os = None
+
+ def set_places_address(self):
+ """
+ Given a response from the places api, this function will set the address and postcode of the property
+ """
+
+ if self.most_relevant_result is None:
+ raise ValueError("No results found - run get_places_api first")
+
+ self.address_os = self.most_relevant_result["ADDRESS"]
+ self.postcode_os = self.most_relevant_result["POSTCODE"]
+ # We strip out the postcode from the address as this is already stored separately
+ self.address_os = self.address_os.replace(self.postcode_os, "").strip()
+ # Remove trailing comma
+ self.address_os = self.address_os.rstrip(",").strip()
+ # Convert to title case
+ self.address_os = self.address_os.title()
+ # Make sure postcode is upper case
+ self.postcode_os = self.postcode_os.upper()
+
+ @lru_cache(maxsize=128)
+ def get_places_api(self):
+ """
+ This method is tasked with getting the places api from the Ordnance Survey.
+ """
+
+ if not self.api_key:
+ raise ValueError("Ordnance Survey API key not specified")
+
+ encoded_address_query = urllib.parse.quote(self.full_address)
+ url = (f"https://api.os.uk/search/places/v1/find?query={encoded_address_query}&key="
+ f"{self.api_key}")
+ response = requests.get(url)
+ if response.status_code == 200:
+ data = response.json()
+ results = data['results']
+ self.results = results
+
+ # Extract some details about the best match
+ self.most_relevant_result = self.results[0]["DPA"]
+
+ self.parse_classification_code(self.most_relevant_result["CLASSIFICATION_CODE"])
+ self.set_places_address()
+
+ else:
+ logger.info("Could not find any results for the provided address and postcode")
+
+ return {"status": response.status_code}
+
+ def parse_classification_code(self, classification_code: str):
+ """
+ This function will convert the classification code, returned by the OS places api, to a property type that is
+ compatible with the EPC database.
+
+ The various classifications cane be found here:
+ https://osdatahub.os.uk/docs/places/technicalSpecification
+
+ Under LPI Output, CLASSIFICATION_CODE is described, and a link is provided to the full table of classifications
+ For these purposes, we do not need the full classification as this includes non-residential properties. We only
+ parse the ones of interest to us
+ :return:
+ """
+
+ value_map = {
+ # In the OS api, "RD" is a "Dwelling" however this is not valid property type in the EPC database
+ 'RD': {},
+ 'RD02': {'property_type': 'House', 'built_form': 'Detatched'},
+ 'RD03': {'property_type': 'House', 'built_form': 'Semi-Detatched'},
+ 'RD04': {'property_type': 'House', 'built_form': 'Mid-Terrace'},
+ 'RD06': {'property_type': 'Flat'},
+ }
+
+ mapped = value_map.get(classification_code, {})
+ self.property_type = mapped.get("property_type", "")
+ self.built_form = mapped.get("built_form", "")
diff --git a/backend/Property.py b/backend/Property.py
index e193ffbb..c784f6f2 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -9,18 +9,17 @@ from etl.epc.DataProcessor import EPCDataProcessor
from etl.epc.Dataset import TrainingDataset
from etl.epc.settings import LATEST_FIELD, MANDATORY_FIXED_FEATURES, POTENTIAL_COLUMNS, EFFICIENCY_FEATURES, BUILT_FORM_REMAP
from etl.epc_clean.epc_attributes.all_cleaners import all_cleaner_map
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
from utils.logger import setup_logger
from utils.s3 import read_dataframe_from_s3_parquet
-from epc_api.client import EpcClient
from BaseUtility import Definitions
from recommendations.rdsap_tables import england_wales_age_band_lookup, FLOOR_LEVEL_MAP
from recommendations.recommendation_utils import (
- estimate_perimeter, get_wall_type, estimate_external_wall_area, esimtate_pitched_roof_area
+ estimate_perimeter, get_wall_type, estimate_external_wall_area, esimtate_pitched_roof_area, estimate_windows
)
ENVIRONMENT = os.environ.get('ENVIRONMENT', 'dev')
-EPC_AUTH_TOKEN = os.environ.get('EPC_AUTH_TOKEN')
DATA_BUCKET = os.environ.get('DATA_BUCKET', 'retrofit-data-dev' if ENVIRONMENT == 'dev' else None)
logger = setup_logger()
@@ -51,13 +50,14 @@ class Property(Definitions):
spatial = None
- def __init__(self, id, postcode, address1, epc_record, data=None):
+ def __init__(self, id, postcode, address, epc_record, data=None):
self.epc_record = epc_record
self.id = id
+
+ self.address = address
self.postcode = postcode
- self.address1 = address1
self.data = {k.replace("_", "-"): v for k,v in epc_record.get("prepared_epc").items()}
self.old_data = epc_record.get("old_data")
self.property_dimensions = None
@@ -112,6 +112,9 @@ class Property(Definitions):
self.insulation_floor_area = None
self.number_lighting_outlets = epc_record.prepared_epc.get("fixed_lighting_outlets_count")
self.floor_level = None
+ self.number_of_windows = None
+ self.solar_pv_roof_area = None
+ self.solar_pv_percentage = None
self.current_adjusted_energy = None
self.expected_adjusted_energy = None
@@ -177,81 +180,51 @@ class Property(Definitions):
recommendation_record["walls_insulation_thickness_ending"] = "above average"
recommendation_record["walls_energy_eff_ending"] = "Good"
else:
- if recommendation_record["walls_thermal_transmittance_ending"] is None:
- raise ValueError("We should not have a None value for the u value")
+ wind_turbine_count = int(wind_turbine_count)
- if recommendation_record["walls_insulation_thickness_ending"] is None:
- recommendation_record["walls_insulation_thickness_ending"] = "none"
+ self.wind_turbine = {
+ "wind_turbine": wind_turbine_count,
+ }
- # Update description to indicate it's insulate
- if recommendation["type"] in ["solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation"]:
- if len(recommendation["parts"]) > 1:
- raise NotImplementedError("Have more than 1 floor insulation part - handle this case")
+ def set_count_variables(self):
- recommendation_record["floor_thermal_transmittance_ending"] = recommendation["new_u_value"]
- # We don't really see above average for this in the training data
- recommendation_record["floor_insulation_thickness_ending"] = "average"
- recommendation_record["floor_energy_eff_ending"] = "Good"
- else:
- if recommendation_record["floor_thermal_transmittance_ending"] is None:
- raise ValueError("We should not have a None value for the u value")
+ """
+ For EPC fields that are just counts, we'll set them here
+ These are fields that are integers but may contain additional values such as "" so we can't do a direct
+ conversion straight to an integer
+ :return:
+ """
- if recommendation_record["floor_insulation_thickness_ending"] is None:
- recommendation_record["floor_insulation_thickness_ending"] = "none"
+ fields = {
+ "number_of_open_fireplaces": "number-open-fireplaces",
+ "number_of_extensions": "extension-count",
+ "number_of_storeys": "flat-storey-count",
+ "number_of_rooms": "number-habitable-rooms",
+ }
- if recommendation["type"] in ["loft_insulation", "room_roof_insulation", "flat_roof_insulation"]:
- recommendation_record["roof_thermal_transmittance_ending"] = recommendation["new_u_value"]
+ null_attributes = ["number_of_storeys", "number_of_rooms"]
- parts = recommendation["parts"]
- if len(parts) != 1:
- raise ValueError("More than one part for roof insulation - investiage me")
+ for attribute, epc_field in fields.items():
+ value = self.data["extension-count"]
+ if value == "" or value in self.DATA_ANOMALY_MATCHES:
+ if attribute in null_attributes:
+ value = None
+ else:
+ value = 0
+ else:
+ value = int(value)
- # This is based on the values we have in the training data
- valid_numeric_values = [
- 12, 25, 50, 75, 100, 150, 200, 250, 270, 300, 350, 400
- ]
+ setattr(self, attribute, value)
- proposed_depth = int(parts[0]["depth"])
- if proposed_depth not in valid_numeric_values:
- # Take the nearest value for scoring
- proposed_depth = min(valid_numeric_values, key=lambda x: abs(x - proposed_depth))
-
- recommendation_record["roof_insulation_thickness_ending"] = str(proposed_depth)
- recommendation_record["roof_energy_eff_ending"] = "Very Good"
- else:
- # Fill missing roof u-values - this fill is not based on recommended upgrades
- if recommendation_record["roof_thermal_transmittance_ending"] is None:
- raise ValueError("We should not have a None value for the u value")
-
- if recommendation_record["roof_insulation_thickness_ending"] is None:
- recommendation_record["roof_insulation_thickness_ending"] = "none"
-
- if recommendation["type"] == "mechanical_ventilation":
- recommendation_record["mechanical_ventilation_ending"] = 'mechanical, extract only'
-
- if recommendation["type"] == "sealing_open_fireplace":
- recommendation_record["number_open_fireplaces_ending"] = 0
-
- if recommendation["type"] == "low_energy_lighting":
- recommendation_record["low_energy_lighting_ending"] = 100
- recommendation_record["lighting_energy_eff_starting"] = "Very Good"
-
- if recommendation["type"] not in [
- "mechanical_ventilation", "sealing_open_fireplace", "low_energy_lighting",
- "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
- "loft_insulation", "room_roof_insulation", "flat_roof_insulation",
- "solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation"
- ]:
- raise NotImplementedError("Implement me")
-
- return recommendation_record
-
-
- def get_components(self, cleaned):
+ def get_components(self, cleaned, photo_supply_lookup, floor_area_decile_thresholds):
"""
Given the cleaning that has been performed, we'll use this to identify the property
components, from roof to walls to windows, heating and hot water
:param cleaned: This is the dictionary of components found in cleaner.cleaned
+ :param photo_supply_lookup: This is the lookup table for the photo supply, used to estimate the percentage
+ of the roof that is suitable for solar panels
+ :param floor_area_decile_thresholds: This is the decile thresholds for the floor area, used in estimating the
+ solar pv roof area
:return:
"""
@@ -301,6 +274,10 @@ class Property(Definitions):
self.set_wall_type()
self.set_floor_type()
self.set_floor_level()
+ self.set_windows_count()
+ self.set_solar_panel_area(
+ photo_supply_lookup=photo_supply_lookup, floor_area_decile_thresholds=floor_area_decile_thresholds
+ )
def set_spatial(self, spatial: pd.DataFrame):
"""
@@ -368,7 +345,7 @@ class Property(Definitions):
"""
Utility function for usage in the lambda, for preparing the _rating fields
"""
- return rating_lookup[field].value if field not in cls.DATA_ANOMALY_MATCHES else None
+ return rating_lookup[field].value if (field not in cls.DATA_ANOMALY_MATCHES) and (field is not None) else None
def get_property_details_epc(self, portfolio_id: int, rating_lookup):
@@ -409,6 +386,7 @@ class Property(Definitions):
"primary_energy_consumption": self.energy["primary_energy_consumption"],
"co2_emissions": self.energy["co2_emissions"],
"adjusted_energy_consumption": self.current_adjusted_energy,
+ "estimated": self.data.get("estimated", False)
}
return property_details_epc
@@ -664,7 +642,7 @@ class Property(Definitions):
:return:
"""
- if self.data["fixed-lighting-outlets-count"] == "":
+ if self.data["fixed-lighting-outlets-count"] in [None, ""]:
# We check old EPCs and the full SAP EPC
@@ -693,3 +671,52 @@ class Property(Definitions):
"""
self.current_adjusted_energy = current_adjusted_energy
self.expected_adjusted_energy = expected_adjusted_energy
+
+ def set_windows_count(self):
+ """
+ Using the estimate_windows function, this method will set the number of windows in the property
+ :return:
+ """
+
+ self.number_of_windows = estimate_windows(
+ property_type=self.data["property-type"],
+ built_form=self.data["built-form"],
+ construction_age_band=self.construction_age_band,
+ floor_area=self.floor_area,
+ number_habitable_rooms=self.number_of_rooms,
+ extension_count=float(self.data["extension-count"]),
+ )
+
+ def set_solar_panel_area(self, photo_supply_lookup, floor_area_decile_thresholds):
+ """
+ Sets the approximate area of the solar panels
+ :return:
+ """
+
+ if (self.insulation_floor_area is None) and (self.pitched_roof_area is None):
+ raise ValueError(
+ "Need to set insulation floor area and pitched roof area before setting solar pv roof area"
+ )
+
+ photo_supply_matched = SolarPhotoSupply.filter_photo_supply_lookup(
+ photo_supply_lookup=photo_supply_lookup,
+ floor_area_decile_thresholds=floor_area_decile_thresholds,
+ tenure=self.data["tenure"],
+ built_form=self.data["built-form"],
+ property_type=self.data["property-type"],
+ construction_age_band=self.construction_age_band,
+ is_flat=self.roof["is_flat"],
+ is_pitched=self.roof["is_pitched"],
+ is_roof_room=self.roof["is_roof_room"],
+ floor_area=self.floor_area
+ )
+
+ percentage_of_roof = photo_supply_matched["photo_supply_median"].mean()
+ percentage_of_roof = percentage_of_roof / 100
+
+ self.solar_pv_roof_area = (
+ self.insulation_floor_area * percentage_of_roof if self.roof["is_flat"] else
+ self.pitched_roof_area * percentage_of_roof
+ )
+
+ self.solar_pv_percentage = percentage_of_roof
diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py
index 16c2a8c8..d69d8d86 100644
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@@ -1,12 +1,114 @@
import os
import time
+import re
+
+import usaddress
+import pandas as pd
+import numpy as np
from epc_api.client import EpcClient
+from backend.OrdnanceSurvey import OrdnanceSuveyClient
+from BaseUtility import Definitions
from utils.logger import setup_logger
from typing import List
from fuzzywuzzy import process
logger = setup_logger()
+vartypes = {
+ 'low-energy-fixed-light-count': "Int64",
+ # 'address': 'str',
+ # 'uprn-source': 'str',
+ 'floor-height': 'float',
+ 'heating-cost-potential': 'float',
+ 'unheated-corridor-length': 'float',
+ 'hot-water-cost-potential': 'float',
+ 'construction-age-band': 'str',
+ 'potential-energy-rating': 'str',
+ 'mainheat-energy-eff': 'str',
+ 'windows-env-eff': 'str',
+ 'lighting-energy-eff': 'str',
+ 'environment-impact-potential': "Int64",
+ 'glazed-type': 'str',
+ 'heating-cost-current': 'float',
+ 'address3': 'str',
+ 'mainheatcont-description': 'str',
+ 'sheating-energy-eff': 'str',
+ 'property-type': 'str',
+ 'local-authority-label': 'str',
+ 'fixed-lighting-outlets-count': "Int64",
+ 'energy-tariff': 'str',
+ 'mechanical-ventilation': 'str',
+ 'hot-water-cost-current': 'str',
+ 'county': 'str',
+ 'postcode': 'str',
+ 'solar-water-heating-flag': 'str',
+ 'constituency': 'str',
+ 'co2-emissions-potential': 'float',
+ 'number-heated-rooms': 'float',
+ 'floor-description': 'str',
+ 'energy-consumption-potential': 'float',
+ 'local-authority': 'str',
+ 'built-form': 'str',
+ 'number-open-fireplaces': "Int64",
+ 'windows-description': 'str',
+ 'glazed-area': 'str',
+ # 'inspection-date': str,
+ 'mains-gas-flag': 'str',
+ 'co2-emiss-curr-per-floor-area': 'float',
+ 'address1': 'str',
+ 'heat-loss-corridor': 'str',
+ 'flat-storey-count': "Int64",
+ 'constituency-label': 'str',
+ 'roof-energy-eff': 'str',
+ 'total-floor-area': 'float',
+ 'building-reference-number': 'str',
+ 'environment-impact-current': 'float',
+ 'co2-emissions-current': 'float',
+ 'roof-description': 'str',
+ 'floor-energy-eff': 'str',
+ 'number-habitable-rooms': 'float',
+ 'address2': 'str',
+ 'hot-water-env-eff': 'str',
+ 'posttown': 'str',
+ 'mainheatc-energy-eff': 'str',
+ 'main-fuel': 'str',
+ 'lighting-env-eff': 'str',
+ 'windows-energy-eff': 'str',
+ 'floor-env-eff': 'str',
+ 'sheating-env-eff': 'str',
+ 'lighting-description': 'str',
+ 'roof-env-eff': 'str',
+ 'walls-energy-eff': 'str',
+ 'photo-supply': 'float',
+ 'lighting-cost-potential': 'float',
+ 'mainheat-env-eff': 'str',
+ 'multi-glaze-proportion': 'float',
+ 'main-heating-controls': 'str',
+ # 'lodgement-datetime',
+ 'flat-top-storey': 'str',
+ 'current-energy-rating': 'str',
+ 'secondheat-description': 'str',
+ 'walls-env-eff': 'str',
+ 'transaction-type': 'str',
+ # 'uprn': "Int64",
+ 'current-energy-efficiency': 'float',
+ 'energy-consumption-current': 'float',
+ 'mainheat-description': 'str',
+ 'lighting-cost-current': 'float',
+ # 'lodgement-date',
+ 'extension-count': "Int64",
+ 'mainheatc-env-eff': 'str',
+ 'lmk-key': 'str',
+ 'wind-turbine-count': "Int64",
+ 'tenure': 'str',
+ 'floor-level': 'str',
+ 'potential-energy-efficiency': "Int64",
+ 'hot-water-energy-eff': 'str',
+ 'low-energy-lighting': 'float',
+ 'walls-description': 'str',
+ 'hotwater-description': 'str'
+}
+
class SearchEpc:
"""
@@ -38,53 +140,127 @@ class SearchEpc:
self,
address1: str,
postcode: str,
- address2: str = None,
- address3: str = None,
- address4: str = None,
- max_retries: int = None
+ auth_token: str,
+ os_api_key: str,
+ full_address: str | None = None,
+ max_retries: int = None,
+ uprn: [int, None] = None,
+ size=None,
+ property_type=None,
):
"""
Address lines 1 and postcode are mandatory fields. The other address lines are optional
but can be used to find the epc for the home, if address1 and postcode are insufficient
:param address1: string, propery's address line 1
:param postcode: string, propery's postcode
- :param address2: string, optional, propery's address line 2
- :param address3: string, optional, propery's address line 3
- :param address4: string, optional, propery's address line 4
+ :param full_address: string, optional parameter, the full address of the property
+ :param max_retries: int, optional, number of retries to make when searching the api
+ :param uprn: int, optional, the uprn of the property
+ :param size: int, optional, the number of results to return. If not provided, defaults to 25 which is the api's
+ default
+ :param property_type: str, optional, the property type of the property, if known before hand
"""
self.address1 = address1
self.postcode = postcode
- self.address2 = address2
- self.address3 = address3
- self.address4 = address4
+ self.full_address = full_address
+ self.uprn = uprn
+ self.house_number = self.get_house_number(self.address1)
+ self.numeric_house_number = self.extract_numeric_housenumber_part(self.house_number)
self.max_retries = max_retries if max_retries is not None else self.MAX_RETRIES
- self.client = EpcClient(auth_token=os.getenv("EPC_AUTH_TOKEN"))
+ self.client = EpcClient(auth_token=auth_token)
+ self.ordnance_survey_client = OrdnanceSuveyClient(
+ address=self.address1, postcode=self.postcode, api_key=os_api_key
+ )
self.data = None
+ self.newest_epc = None
+ self.older_epcs = None
+ self.full_sap_epc = None
- def search(self):
+ # These are the address and postcode values, which we store in the database
+ self.address_clean = None
+ self.postcode_clean = None
+
+ self.size = size if size is not None else 25
+
+ self.property_type = property_type
+
+ @classmethod
+ def get_house_number(cls, address: str) -> str | None:
+ """
+ This method will use the usaddress library to parse an address and extract the house number
+ :return:
+ """
+
+ parsed = usaddress.parse(address)
+ parsed_house_number = [x for x in parsed if (x[1] == "AddressNumber")]
+ parsed_house_number = parsed_house_number[0][0] if parsed_house_number else None
+
+ if parsed_house_number is None:
+ # Because usaddress isn't optimal for parsing addresses with some prefixes such as 'Flat',
+ # we also add a custom approach
+
+ # Pattern to look for 'Flat' or 'Apartment' followed by a number, or just a number at the beginning
+ pattern = r'(?i)(?:flat|apartment)\s*(\d+)|^\s*(\d+)'
+
+ match = re.search(pattern, address)
+
+ if match:
+ # Return the first non-None group found
+ return next(g for g in match.groups() if g is not None)
+ else:
+ return None
+
+ # Remove training commas
+ parsed_house_number = parsed_house_number.replace(",", "")
+
+ return parsed_house_number
+
+ @staticmethod
+ def extract_numeric_housenumber_part(house_number: str | None) -> int | None:
+ # Regular expression to find the first occurrence of one or more digits
+
+ if house_number is None:
+ return None
+
+ match = re.search(r'\d+', house_number)
+
+ if match:
+ return int(match.group())
+ else:
+ return None
+
+ def get_epc(self, params=None, size=None):
# Get the EPC data with retries
+ size = size if size is not None else self.size
+ if params is None:
+ if self.uprn:
+ params = {"uprn": self.uprn}
+ else:
+ params = {"address": self.address1, "postcode": self.postcode}
for retry in range(self.max_retries):
try:
- response = self.client.domestic.search(
- params={"address": self.address1, "postcode": self.postcode}
- )
+
+ if "uprn" in params:
+ # We use the direct call method inside, since we need to implement uprn as a valid
+ # parameter for the search function
+ url = os.path.join(self.client.domestic.host, "search")
+ response = self.client.domestic.call(method="get", url=url, params=params)
+ else:
+ response = self.client.domestic.search(params=params, size=size)
if response:
self.data = response
return self.SUCCESS
if retry > 0:
- print("Failed previous attempt but retry successful")
+ logger.info("Failed previous attempt but retry successful")
# If we got nothing, final try
if not response:
- # TODO: Make a call to OS uprn service and get the address' uprn, just in case there is an
- # issue with how we are searching the api
-
return {
"status": 204,
"message": "no data",
@@ -127,7 +303,6 @@ class SearchEpc:
if len(uprns) == 1:
return rows
- logger.error("Multiple UPRNS found - we should use an alternate method of searching - TODO")
if property_type is not None:
# We can do a filter on the property type
rows_filtered = [r for r in rows if r["property-type"] == property_type]
@@ -147,7 +322,24 @@ class SearchEpc:
return rows
- def retrieve(self, property_type=None, address=None):
+ @staticmethod
+ def format_address(newest_epc):
+ """
+ Format address and postcode for storage in the database
+ """
+ postcode = newest_epc["postcode"]
+ address = newest_epc["address"]
+
+ # Format them
+ address = address.replace(postcode, "").strip()
+ address = address.rstrip(",").strip()
+ address = address.title()
+
+ postcode = postcode.upper()
+
+ return address, postcode
+
+ def extract_epc_data(self, address=None):
"""
Given a successful search, this method will format the data and return it
@@ -163,7 +355,7 @@ class SearchEpc:
# Firstly, we should only have 1 urpn so if we have multiple, we'll need to filter down the
# property further
- rows = self.filter_rows(rows, property_type=property_type, address=None)
+ rows = self.filter_rows(rows, property_type=self.property_type, address=None)
rows = self.filter_rows(rows, property_type=None, address=address)
# We now check for a full sap epc:
@@ -173,7 +365,26 @@ class SearchEpc:
# Finally, we identify the newest epc and the rest, and then return
newest_epc, older_epcs = self.filter_newest_epc(list_of_epcs=rows)
- return newest_epc, older_epcs, full_sap_epc
+ # Retrieve postcode and address
+ address_epc, postcode_epc = self.format_address(newest_epc=newest_epc)
+
+ # Ge the uprn from the newest record for this home
+ uprns = {r["uprn"] for r in rows if r["uprn"]}
+ # We can sometimes have no uprn for a property
+ if (len(uprns) == 0) and len(rows) > 0:
+ logger.warning("Found data but missing uprn")
+ elif len(uprns) != 1:
+ # There is a possibility that we have multiple UPRNs for a single property, which is an error
+ addresses = {r["address"] for r in rows}
+ if len(addresses) == 1:
+ # Take the uprn from the most recent
+ uprns = {newest_epc["uprn"]}
+ else:
+ raise ValueError("Multiple UPRNs found - investigate me")
+
+ uprn = uprns.pop() if uprns else None
+
+ return newest_epc, older_epcs, full_sap_epc, address_epc, postcode_epc, uprn
@staticmethod
def filter_newest_epc(list_of_epcs: List):
@@ -186,8 +397,334 @@ class SearchEpc:
return {}, []
if len(newest_response) != 1:
- raise Exception("More than one result found for this address - investigate me")
+ # It is possible (but rare, and likely an error on EPC lodgement) that we have multiple EPCs that
+ # were lodged at the exact same time. In this case, we will take the first one
+ newest_response = [newest_response[0]]
older_epcs = [epc for epc in list_of_epcs if epc["lmk-key"] != newest_response[0]["lmk-key"]]
return newest_response[0], older_epcs
+
+ @staticmethod
+ def _get_epc_mode(col: str, epc_data: pd.DataFrame):
+ """
+ Simple method to extract the mode value from the EPC data
+ :param col: name of the column to take the mode of
+ :param epc_data: pandas dataframe of epc data
+ """
+
+ mode_value = epc_data[[col]].mode(dropna=True)
+ if len(mode_value) != 1:
+ raise NotImplementedError("TODO: Handle multiple modes")
+ mode_value = mode_value.iloc[0][col]
+
+ return mode_value
+
+ def fetch_nearby_epcs(
+ self, initial_postcode: str,
+ lmks_to_drop: list[str] | None = None,
+ built_form: str = "",
+ property_type: str = ""
+ ):
+ """
+ Fetches and processes EPC data for a given initial postcode, applying successive trimming
+ to the postcode and filtering the data until a non-empty result set is found.
+
+ The function queries the EPC API with the provided postcode, and if no data is found or
+ if the data doesn't meet certain criteria, it progressively shortens the postcode by
+ removing the last character and retries the query. This process continues until a valid
+ set of EPC data is obtained or the postcode is exhausted.
+
+ Additional filtering is applied to the obtained EPC data based on 'lmk-key', 'built-form',
+ and 'property-type'. The data is also processed to extract and numerically interpret house
+ numbers, calculate house number distances, and apply weights based on these distances.
+
+ :param initial_postcode: The initial full postcode for the EPC data query.
+ :param lmks_to_drop: List of 'lmk-key' values to be excluded from the EPC data.
+ :param built_form: The 'built-form' value to be used for filtering the EPC data.
+ :param property_type: The 'property-type' value to be used for filtering the EPC data.
+ :return:
+ """
+
+ property_type_api_map = {
+ "Bungalow": "bungalow",
+ "Flat": "flat",
+ "House": "house",
+ "Maisonette": "maisonette",
+ "Park home": "park home",
+ }
+
+ postcode = initial_postcode
+ while postcode:
+ # Fetch data from EPC API
+ params = {"postcode": postcode}
+ if property_type:
+ params["property-type"] = property_type_api_map[property_type]
+
+ # We take the 20 nearest homes of the relevant type, so not to pull in too many irrelevant homes
+ epc_response = self.get_epc(params=params, size=100)
+
+ if epc_response["status"] == 200:
+ epc_data = pd.DataFrame(self.data["rows"])
+
+ if lmks_to_drop is not None:
+ epc_data = epc_data[~epc_data["lmk-key"].isin(lmks_to_drop)]
+
+ if not epc_data.empty:
+ # Further processing of the EPC data
+ epc_data['lodgement-datetime'] = pd.to_datetime(epc_data['lodgement-datetime'], format='mixed')
+ epc_data = epc_data.sort_values("lodgement-datetime", ascending=False).groupby("uprn").head(1)
+ epc_data["house_number"] = epc_data["address"].apply(lambda add1: self.get_house_number(add1))
+ epc_data["numeric_house_number"] = epc_data["house_number"].apply(
+ lambda house_num: self.extract_numeric_housenumber_part(house_num)
+ )
+
+ if self.numeric_house_number is None:
+ # If we don't have a house number, we treat all weights as equal
+ epc_data["weight"] = 1
+ else:
+ epc_data["house_number_distance"] = abs(
+ epc_data["numeric_house_number"] - self.numeric_house_number
+ )
+ # # We add 1, just in case we have a 0 weight (e.g. comparing house number 7a to 7b, or 9A to 9)
+ # epc_data["weight"] = 1 / (epc_data["house_number_distance"] + 1)
+ # # If we have a home without a house number, fill that weight with average
+ # epc_data["weight"] = epc_data["weight"].fillna(epc_data["weight"].mean())
+ # # Finally, we might not have any house numbers whatsoever so everything could be
+ # # missing, so we fill with 1
+ # epc_data["weight"] = epc_data["weight"].fillna(1)
+ # TODO: Testing
+ # If the postcode is different from the initial postcode, it doesn't make sense to have
+ # any weightings
+ if all(pd.isnull(epc_data["house_number_distance"])) or (postcode != initial_postcode):
+ epc_data["weight"] = 1
+ else:
+ epc_data["weight"] = 1 / np.sqrt(epc_data["house_number_distance"] + 1)
+ epc_data["weight"] = epc_data["weight"].fillna(epc_data["weight"].mean())
+
+ estimation_property_type = self._estimate_str(
+ key="property-type", estimation_data=epc_data
+ ) if property_type == "" else property_type
+
+ epc_built_form = self._estimate_str(
+ key="built-form",
+ estimation_data=epc_data[epc_data["property-type"] == estimation_property_type]
+ )
+
+ if built_form == "Semi-Detached" and epc_built_form in ["End-Terraced", "Mid-Terraced"]:
+ estimation_built_form = "End-Terraced"
+ elif (built_form == "") or (pd.isnull(built_form)):
+ estimation_built_form = epc_built_form
+ else:
+ estimation_built_form = built_form
+
+ # We handle some edge cases experiences with maisonettes - if built form is detatched, just filter
+ # on maisonette
+ # We also add some additional logic for Park homes, because they are far less common than other
+ # property types
+
+ is_maisonette_with_bad_built_form = (estimation_property_type == "Maisonette") & (
+ estimation_built_form in ["Detached", "Semi-Detached"]
+ )
+
+ is_park_home_without_built_form = (estimation_property_type == "Park home") & (
+ sum(epc_data["built-form"] == estimation_built_form) == 0
+ )
+
+ has_missing_built_form = not estimation_built_form
+
+ if is_maisonette_with_bad_built_form or is_park_home_without_built_form or has_missing_built_form:
+ epc_data = epc_data[epc_data["property-type"] == estimation_property_type]
+ else:
+ epc_data = epc_data[
+ (epc_data["built-form"] == estimation_built_form) & (
+ epc_data["property-type"] == estimation_property_type)
+ ]
+
+ if not epc_data.empty:
+ return epc_data # Return the filtered data if it's not empty
+
+ # Shorten the postcode by one character for the next iteration
+ postcode = postcode[:-1].rstrip()
+
+ # If loop finishes without a valid response, raise an exception
+ raise Exception("Unable to find postcode data after trimming - investigate me")
+
+ def estimate_epc(self, property_type, built_form, lmks_to_drop=None):
+ """
+ For a property that does not have an EPC, we retrieve the EPC data for the closest properties
+ and estimate the EPC for the property in question.
+
+ Note - do we have postcodes with just a single address? We would need to use a different approach
+ to find the closest homes
+ :param property_type: This is the property type of the property we are estimating, that can be retrieved from
+ the ordnance survey api
+ :param built_form: This is the built form of the property we are estimating, that can be retrieved from
+ the ordnance survey api
+ :param lmks_to_drop: This is a list of LMK keys that should be dropped from the estimation process. This
+ is used as an override for testing, to drop EPCs for the property we are testing
+ :return:
+ """
+
+ # From the ordnance survey data, we want to determine the property type and then use only similar property
+ # types for the estimation process
+ epc_data = self.fetch_nearby_epcs(
+ initial_postcode=self.postcode,
+ lmks_to_drop=lmks_to_drop,
+ built_form=built_form,
+ property_type=property_type
+ )
+
+ # For each attribute, we need to determine the datatype and use an appropriate method
+ # to estimate.
+ estimated_epc = {}
+ for key, vartype in vartypes.items():
+ epc_data[key] = np.where(pd.isnull(epc_data[key]), None, epc_data[key])
+ epc_data[key] = np.where(epc_data[key] == "", None, epc_data[key])
+ estimation_data = epc_data[[key, "weight", "lodgement-datetime"]].copy()
+ estimation_data = estimation_data[~pd.isnull(estimation_data[key])]
+ estimation_data = estimation_data[~estimation_data[key].isin(Definitions.DATA_ANOMALY_MATCHES)]
+ if vartype == "Int64":
+ # We have some edge cases where we get the error "invalid literal for int() with base 10: '1.0'"
+ # so this handles this
+ estimation_data[key] = estimation_data[key].astype(float).astype(vartype)
+ else:
+ estimation_data[key] = estimation_data[key].astype(vartype)
+
+ if estimation_data.shape[0] == 0:
+ estimated_epc[key] = None
+ continue
+
+ if vartype == "Int64":
+ estimated_value = self._estimate_int(estimation_data, key)
+ elif vartype == "float":
+ estimated_value = self._estimate_float(estimation_data, key)
+ elif vartype == "str":
+ estimated_value = self._estimate_str(estimation_data, key)
+ else:
+ raise NotImplementedError("estimation method not implemented for type")
+
+ estimated_epc[key] = estimated_value
+
+ # Insert an estimated lodgement datetime, with a weighted average
+ estimated_epc["lodgement-datetime"] = self.calculate_weighted_lodgement_datetime(epc_data=epc_data)
+ # Extract logement date
+ estimated_epc["lodgement-date"] = estimated_epc["lodgement-datetime"].strftime("%Y-%m-%d")
+
+ estimated_epc["postcode"] = self.postcode
+ estimated_epc["uprn"] = self.uprn
+ estimated_epc["address"] = self.full_address
+ # Indicate that this epc was estimated
+ estimated_epc["estimated"] = True
+
+ return estimated_epc
+
+ @staticmethod
+ def calculate_weighted_lodgement_datetime(epc_data):
+ numeric_dates = pd.to_datetime(epc_data['lodgement-datetime']).view('int64')
+
+ # Calculate the weighted sum of dates
+ weighted_sum = (numeric_dates * epc_data['weight']).sum()
+
+ # Calculate the sum of weights
+ total_weights = epc_data['weight'].sum()
+
+ # Calculate the weighted mean in numeric format
+ weighted_mean_numeric = weighted_sum / total_weights
+
+ # Convert the numeric weighted mean back to datetime
+ weighted_mean_datetime = pd.to_datetime(weighted_mean_numeric)
+
+ return weighted_mean_datetime
+
+ @staticmethod
+ def _estimate_int(estimation_data, key):
+ return round(np.average(a=estimation_data[key], weights=estimation_data["weight"]))
+
+ @staticmethod
+ def _estimate_float(estimation_data, key):
+ return round(np.average(a=estimation_data[key], weights=estimation_data["weight"]), 2)
+
+ @staticmethod
+ def _estimate_str(estimation_data, key):
+ agg = estimation_data.groupby(key)["weight"].sum().reset_index()
+ agg = agg[agg["weight"] == agg["weight"].max()]
+ if agg.shape[0] != 1:
+ # If we have multiple modes, we take the more recent data on average
+ recent_grouped = estimation_data[
+ estimation_data[key].isin(agg[key].values)
+ ].groupby(key)["lodgement-datetime"].mean()
+
+ newest_group = recent_grouped.idxmax()
+ return newest_group
+
+ return agg[key].values[0]
+
+ def find_property(self, skip_os=False):
+ """
+ This method will attempt to identify a property. It will, at first, use the EPC api to try and
+ find the EPC for the property and the associated UPRN. If this fails, it will use the Ordnance Survey API to
+ find the UPRN of the address.
+
+ Because no result may have been provided by the EPC api because of formatting issues with the address,
+ if the ordnance survey api is used and the uprn retrieved, the EPC api is queried again with the UPRN, just
+ as a final check to see if there is any EPC data.
+
+ If there is no EPC data, the epc data will be estimated based on the surrounding properties
+ """
+
+ # Step 1: use the epc api to find the property and uprn
+ response = self.get_epc()
+
+ if response["status"] == 200:
+ (
+ self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean, self.uprn
+ ) = self.extract_epc_data(address=self.full_address)
+ return
+
+ # Step 2: If we don't have an EPC, we use the ordnance survey api to find the uprn
+ if skip_os:
+ if self.ordnance_survey_client.property_type is not None:
+ # We can try and estimate
+ estimated_epc = self.estimate_epc(
+ property_type=self.ordnance_survey_client.property_type,
+ built_form=self.ordnance_survey_client.built_form
+ )
+ self.newest_epc = estimated_epc
+ self.older_epcs = []
+ self.full_sap_epc = {}
+
+ # Finally, set a standardised address 1 and postcode
+ self.address_clean = self.ordnance_survey_client.address_os
+ self.postcode_clean = self.ordnance_survey_client.postcode_os
+ return
+
+ os_response = self.ordnance_survey_client.get_places_api()
+
+ if os_response["status"] != 200:
+ # Investigate this if it happens
+ raise Exception("Unable to find property - investigate me")
+
+ # Step 3: Now that we have a urpn, do another check against the epc api, this time searching with the uprn
+ self.uprn = self.ordnance_survey_client.most_relevant_result["UPRN"]
+ response = self.get_epc()
+ if response["status"] == 200:
+ (
+ self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean, self.uprn
+ ) = self.extract_epc_data()
+ return
+
+ # Step 4: If we still don't have an EPC, we estimate the EPC data
+ self.full_address = self.ordnance_survey_client.most_relevant_result["ADDRESS"]
+ estimated_epc = self.estimate_epc(
+ property_type=self.ordnance_survey_client.property_type,
+ built_form=self.ordnance_survey_client.built_form
+ )
+ self.newest_epc = estimated_epc
+ self.older_epcs = []
+ self.full_sap_epc = {}
+
+ # Finally, set a standardised address 1 and postcode
+ self.address_clean = self.ordnance_survey_client.address_os
+ self.postcode_clean = self.ordnance_survey_client.postcode_os
+ return
diff --git a/backend/app/config.py b/backend/app/config.py
index 22621972..764bddf5 100644
--- a/backend/app/config.py
+++ b/backend/app/config.py
@@ -13,6 +13,7 @@ class Settings(BaseSettings):
HEAT_PREDICTIONS_BUCKET: str
PLAN_TRIGGER_BUCKET: str
EPC_AUTH_TOKEN: str
+ ORDNANCE_SURVEY_API_KEY: str
DB_HOST: str
DB_PASSWORD: str
DB_USERNAME: str
diff --git a/backend/app/db/functions/property_functions.py b/backend/app/db/functions/property_functions.py
index 93dc0c49..88b4e87d 100644
--- a/backend/app/db/functions/property_functions.py
+++ b/backend/app/db/functions/property_functions.py
@@ -11,7 +11,7 @@ from backend.app.db.models.portfolio import (
from sqlalchemy.orm.exc import NoResultFound
-def create_property(session: Session, portfolio_id: int, address: str, postcode: str) -> (int, bool):
+def create_property(session: Session, portfolio_id: int, address: str, postcode: str, uprn: str) -> (int, bool):
"""
This function will create a record for the property in the database if it does not exist.
If it does exist, it will just update the updated_at field.
@@ -25,7 +25,7 @@ def create_property(session: Session, portfolio_id: int, address: str, postcode:
try:
# Attempt to fetch the existing property
existing_property = session.query(PropertyModel).filter_by(
- address=address, postcode=postcode, portfolio_id=portfolio_id
+ uprn=uprn, portfolio_id=portfolio_id
).one()
# Update the 'updated_at' field
@@ -43,6 +43,7 @@ def create_property(session: Session, portfolio_id: int, address: str, postcode:
address=address,
postcode=postcode,
portfolio_id=portfolio_id,
+ uprn=uprn,
creation_status=PropertyCreationStatus.LOADING,
status=PortfolioStatus.ASSESSMENT.value,
has_pre_condition_report=False,
diff --git a/backend/app/db/models/materials.py b/backend/app/db/models/materials.py
index 2ac7ddf4..97085d7a 100644
--- a/backend/app/db/models/materials.py
+++ b/backend/app/db/models/materials.py
@@ -19,7 +19,6 @@ class MaterialType(enum.Enum):
flat_roof_insulation = "flat_roof_insulation"
room_roof_insulation = "room_roof_insulation"
windows_glazing = "windows_glazing"
-
iwi_wall_demolition = "iwi_wall_demolition"
iwi_vapour_barrier = "iwi_vapour_barrier"
diff --git a/backend/app/db/models/portfolio.py b/backend/app/db/models/portfolio.py
index 6f865381..f7c0370b 100644
--- a/backend/app/db/models/portfolio.py
+++ b/backend/app/db/models/portfolio.py
@@ -153,6 +153,7 @@ class PropertyDetailsEpcModel(Base):
primary_energy_consumption = Column(Float)
co2_emissions = Column(Float)
adjusted_energy_consumption = Column(Float)
+ estimated = Column(Boolean, default=False)
class PropertyDetailsSpatial(Base):
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 1704a42f..d35ea98b 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -4,6 +4,7 @@ import numpy as np
import pandas as pd
from epc_api.client import EpcClient
from etl.epc.Record import EPCRecord
+from backend.SearchEpc import SearchEpc
from fastapi import APIRouter, Depends
from sqlalchemy.exc import IntegrityError, OperationalError
from sqlalchemy.orm import sessionmaker
@@ -30,6 +31,8 @@ from backend.ml_models.api import ModelApi
from backend.Property import Property
from etl.epc.DataProcessor import EPCDataProcessor
from etl.epc.settings import COLUMNS_TO_MERGE_ON
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+
from recommendations.optimiser.CostOptimiser import CostOptimiser
from recommendations.optimiser.GainOptimiser import GainOptimiser
from recommendations.optimiser.optimiser_functions import prepare_input_measures
@@ -43,54 +46,6 @@ logger = setup_logger()
BATCH_SIZE = 5
-class DummyDownloader:
-
- def __init__(self, postcode, address1, id, epc_client):
- self.id = id
- self.postcode = postcode
- self.address1 = address1
-
- self.data = None
- self.old_data = None
-
- self.epc_client = epc_client
-
- def search_address_epc(self):
- """
- This method searches for an address in the EPC database and returns the first result
- :return: property data
- """
- if self.data:
- return
-
- # This will fail if a property does not have an EPC - this has been documented as a case to handle
- response = self.epc_client.domestic.search(params={"address": self.address1, "postcode": self.postcode})
-
- # Check if we have a full sap EPC
- self.full_sap_epc = [r for r in response["rows"] if r["transaction-type"] == "new dwelling"]
- self.full_sap_epc = self.full_sap_epc[0] if self.full_sap_epc else self.full_sap_epc
-
- if len(response["rows"]) > 1:
- newest_response = [
- r for r in response["rows"] if
- r["lodgement-datetime"] == max([x["lodgement-datetime"] for x in response["rows"]])
- ]
- if len(newest_response) > 1:
- raise Exception("More than one result found for this address - investigate me")
-
- # We'll keep old EPCs in case it contains information, not present on the newest one
- self.old_data = [epc for epc in response["rows"] if epc["lmk-key"] != newest_response[0]["lmk-key"]]
-
- response["rows"] = newest_response
-
- self.data = response["rows"][0]
- # For the moment, if we don't have a UPRN, we don't do anything about it, however we'll handle this in
- # the future by using the Ordnance Survey places API
- if not self.data["uprn"]:
- logger.warning("We do not have a UPRN for this property")
- else:
- self.uprn = int(self.data["uprn"])
-
router = APIRouter(
prefix="/plan",
tags=["plan"],
@@ -103,37 +58,34 @@ router = APIRouter(
@router.post("/trigger")
async def trigger_plan(body: PlanTriggerRequest):
logger.info("Connecting to db")
- # session = sessionmaker(bind=db_engine)()
+ session = sessionmaker(bind=db_engine)()
created_at = datetime.now().isoformat()
try:
session.begin()
logger.info("Getting the inputs")
- Body = {'portfolio_id': '56', 'housing_type': 'Social', 'goal': 'Increase EPC', 'goal_value': 'A', 'trigger_file_path': '8/56/windows_portfolio_inputs.csv'}
- body = PlanTriggerRequest(**Body)
epc_client = EpcClient(auth_token=get_settings().EPC_AUTH_TOKEN)
plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path)
- uprn_filenames = read_dataframe_from_s3_parquet(
- bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
- )
- cleaning_data = read_parquet_from_s3(
- bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
- )
input_properties = []
for config in plan_input:
# We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
- # TODO: implment validation. We should also standardise postcode and address in some fashion as
- # a postcode of abcdef would be considered different to ABCDEF
+
+ epc_searcher = SearchEpc(
+ address1=config["address"],
+ postcode=config["postcode"],
+ auth_token=get_settings().EPC_AUTH_TOKEN,
+ os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY
+ )
+ epc_searcher.find_property()
# Create a record in db
property_id, is_new = create_property(
- session, portfolio_id=body.portfolio_id, address=config['address'], postcode=config['postcode']
+ session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
)
# if a new record was not created, we don't produduce recommendations
if not is_new:
continue
- # TODO: Need to add heat demand target
create_property_targets(
session,
@@ -143,29 +95,21 @@ async def trigger_plan(body: PlanTriggerRequest):
heat_demand_target=None
)
- epc_downloader = DummyDownloader(id=0, epc_client=epc_client, postcode=config['postcode'], address1=config['address'])
- epc_downloader.search_address_epc()
-
epc_records ={
- 'original_epc': epc_downloader.data.copy(),
- 'full_sap_epc': epc_downloader.full_sap_epc.copy() if epc_downloader.full_sap_epc else [],
- 'old_data': epc_downloader.old_data.copy() if epc_downloader.old_data else []
+ 'original_epc': epc_searcher.newest_epc,
+ 'full_sap_epc': epc_searcher.full_sap_epc,
+ 'old_data': epc_searcher.old_data,
}
prepared_epc = EPCRecord(epc_records=epc_records, run_mode="newdata", cleaning_data=cleaning_data) # This uses all the epc records to clean the data
-
- p = Property(
+
+ input_properties.append(
+ Property(
id=property_id,
address1=config['address'],
postcode=config['postcode'],
epc_record=prepared_epc,
)
-
- logger.info("Getting spatial data")
-
- p.get_spatial_data(uprn_filenames)
- input_properties.append(
- p
)
@@ -180,10 +124,19 @@ async def trigger_plan(body: PlanTriggerRequest):
materials = get_materials(session)
cleaned = get_cleaned()
- logger.info("Getting components and epc recommendations")
+ uprn_filenames = read_dataframe_from_s3_parquet(
+ bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
+ )
+ cleaning_data = read_dataframe_from_s3_parquet(
+ bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
+ )
+ photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket=get_settings().DATA_BUCKET)
- # TODO: Move this to a class. We probably want a Recommender class which takes the injects the optimisers
- # in as a dependency and then the optimisers can take the input measures in as part of the setup() method
+ logger.info("Getting spatial data")
+ for p in input_properties:
+ p.get_spatial_data(uprn_filenames)
+
+ logger.info("Getting components and epc recommendations")
recommendations = {}
recommendations_scoring_data = []
@@ -192,7 +145,7 @@ async def trigger_plan(body: PlanTriggerRequest):
for p in input_properties:
# Property recommendations
- p.get_components(cleaned)
+ p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
recommender = Recommendations(property_instance=p, materials=materials)
property_recommendations = recommender.recommend()
diff --git a/backend/app/plan/utils.py b/backend/app/plan/utils.py
index 7aba99c9..7672c316 100644
--- a/backend/app/plan/utils.py
+++ b/backend/app/plan/utils.py
@@ -175,11 +175,34 @@ def create_recommendation_scoring_data(
scoring_dict["LOW_ENERGY_LIGHTING_ENDING"] = 100
scoring_dict["LIGHTING_ENERGY_EFF_STARTING"] = "Very Good"
+ if recommendation["type"] == "windows_glazing":
+ scoring_dict["MULTI_GLAZE_PROPORTION_ENDING"] = 100
+ scoring_dict["WINDOWS_ENERGY_EFF_ENDING"] = "Average"
+
+ is_secondary_glazing = recommendation["is_secondary_glazing"]
+
+ if scoring_dict["glazing_type_ENDING"] == "multiple":
+ pass
+ elif scoring_dict["glazing_type_ENDING"] == "single":
+ scoring_dict["glazing_type_ENDING"] = "secondary" if is_secondary_glazing else "double"
+ elif scoring_dict["glazing_type_ENDING"] == "double":
+ scoring_dict["glazing_type_ENDING"] = "multiple" if is_secondary_glazing else "double"
+ elif scoring_dict["glazing_type_ENDING"] == "secondary":
+ scoring_dict["glazing_type_ENDING"] = "secondary" if is_secondary_glazing else "multiple"
+ elif scoring_dict["glazing_type_ENDING"] in ["triple", "high performance"]:
+ scoring_dict["glazing_type_ENDING"] = "multiple"
+ else:
+ raise ValueError("Invalid glazing type - implement me")
+
+ if recommendation["type"] == "solar_pv":
+ scoring_dict["PHOTO_SUPPLY_ENDING"] = recommendation["photo_supply"]
+
if recommendation["type"] not in [
"mechanical_ventilation", "sealing_open_fireplace", "low_energy_lighting",
"internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
"loft_insulation", "room_roof_insulation", "flat_roof_insulation",
- "solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation"
+ "solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation",
+ "windows_glazing", "solar_pv"
]:
raise NotImplementedError("Implement me")
diff --git a/backend/app/utils.py b/backend/app/utils.py
index d912a94a..9a03ab21 100644
--- a/backend/app/utils.py
+++ b/backend/app/utils.py
@@ -121,19 +121,6 @@ def epc_to_sap_lower_bound(epc: str):
raise ValueError("EPC rating should be between A and G")
-def read_parquet_from_s3(bucket_name, file_key):
- client = boto3.client('s3')
-
- # Get the object
- s3_object = client.get_object(Bucket=bucket_name, Key=file_key)
-
- # Read the CSV body into a DataFrame
- csv_body = s3_object["Body"].read()
- df = pd.read_parquet(BytesIO(csv_body))
-
- return df
-
-
def save_dataframe_to_s3_parquet(df, bucket_name, file_key):
"""
Save a pandas DataFrame to S3 as a Parquet file.
diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py
index f5a7e2bb..018b4678 100644
--- a/backend/ml_models/Valuation.py
+++ b/backend/ml_models/Valuation.py
@@ -19,7 +19,9 @@ class PropertyValuation:
100070505235: 344000, # Based on Zoopla's estimation of 131 School road, which is also semi-detached
100070513306: 182000, # Based on Zoopla's estimation of 61 Simmons Drive
100071306896: 77000, # Based on Flat 2 of 44 Wedgewood Road on Zoopla
- 100021192109: 650000 # Based on Zoopla
+ 100021192109: 650000, # Based on Zoopla
+ 766249482: 358000, # Based on Zoopla estimate for 19 Spring Lane, 3 bedroom semi-detached
+ 100120703802: 277000, # Based on Zoopla
}
# We base our valuation uplifts on a number of sources
@@ -93,7 +95,13 @@ class PropertyValuation:
value = cls.UPRN_VALUE_LOOKUP.get(property_instance.uprn)
if not value:
- raise ValueError("Have not implemented valuation for this property")
+ return {
+ "current_value": None,
+ "lower_bound_increased_value": None,
+ "upper_bound_increased_value": None,
+ "average_increased_value": None,
+ "average_increase": None
+ }
current_epc = property_instance.data["current-energy-rating"]
# We get the spectrum of ratings between the current and target EPC
@@ -119,4 +127,5 @@ class PropertyValuation:
"lower_bound_increased_value": value * (1 + min_increase),
"upper_bound_increased_value": value * (1 + max_increase),
"average_increased_value": value * (1 + avg_increase),
+ "average_increase": value * (1 + avg_increase) - value
}
diff --git a/backend/ml_models/api.py b/backend/ml_models/api.py
index e6947906..bc09f26c 100644
--- a/backend/ml_models/api.py
+++ b/backend/ml_models/api.py
@@ -2,8 +2,7 @@ import pandas as pd
import requests
from requests.exceptions import RequestException
from utils.logger import setup_logger
-from utils.s3 import save_dataframe_to_s3_parquet
-from backend.app.utils import read_parquet_from_s3
+from utils.s3 import save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
logger = setup_logger()
@@ -125,7 +124,7 @@ class ModelApi:
# Retrieve the predictions
predictions_df = pd.DataFrame(
- read_parquet_from_s3(
+ read_dataframe_from_s3_parquet(
bucket_name=predictions_bucket,
file_key=response["storage_filepath"].split(predictions_bucket + "/")[1]
)
diff --git a/backend/requirements/base.txt b/backend/requirements/base.txt
index 7a925030..3173f7f8 100644
--- a/backend/requirements/base.txt
+++ b/backend/requirements/base.txt
@@ -35,4 +35,5 @@ mip==1.15.0
boto3==1.28.3
pandas==1.5.3
pyarrow==12.0.1
-textblob
\ No newline at end of file
+textblob
+usaddress==0.5.10
\ No newline at end of file
diff --git a/backend/tests/test_property.py b/backend/tests/test_property.py
index 871c9291..09594a40 100644
--- a/backend/tests/test_property.py
+++ b/backend/tests/test_property.py
@@ -9,6 +9,7 @@ from etl.epc_clean.EpcClean import EpcClean
mock_epc_response = {
"rows": [
{
+ "tenure": "rental (social)",
"lmk-key": 1,
"uprn": 1,
"number-habitable-rooms": 5,
@@ -17,7 +18,7 @@ mock_epc_response = {
"inspection-date": "2023-06-01",
'lodgement-datetime': '2023-06-01 20:29:01',
"some-other-key": "some-value",
- "roof-description": "Roof Description",
+ "roof-description": "pitched, no insulation",
"walls-description": "Walls Description",
"windows-description": "Windows Description",
"mainheat-description": "Main Heating Description",
@@ -37,7 +38,8 @@ mock_epc_response = {
"floor-height": 2.5,
"total-floor-area": 100,
"construction-age-band": "England and Wales: 1967-1975",
- "floor-description": "Floor Description"
+ "floor-description": "Floor Description",
+ "floor-level": "Ground"
},
{
"lmk-key": 2,
@@ -68,7 +70,8 @@ mock_epc_response = {
"floor-height": 2.5,
"total-floor-area": 100,
"construction-age-band": "England and Wales: 1967-1975",
- "floor-description": "Floor Description"
+ "floor-description": "Floor Description",
+ "floor-level": "Ground"
}
]
}
@@ -100,7 +103,8 @@ mock_epc_response_dupe = {
"floor-height": 2.5,
"total-floor-area": 100,
"construction-age-band": "England and Wales: 1967-1975",
- "floor-description": "Floor Description"
+ "floor-description": "Floor Description",
+ "floor-level": "Ground"
},
{
"lmk-key": 2,
@@ -128,7 +132,8 @@ mock_epc_response_dupe = {
"floor-height": 2.5,
"total-floor-area": 100,
"construction-age-band": "England and Wales: 1967-1975",
- "floor-description": "Floor Description"
+ "floor-description": "Floor Description",
+ "floor-level": "Ground"
},
{
"lmk-key": 3,
@@ -156,36 +161,62 @@ mock_epc_response_dupe = {
"floor-height": 2.5,
"total-floor-area": 100,
"construction-age-band": "England and Wales: 1967-1975",
- "floor-description": "Floor Description"
+ "floor-description": "Floor Description",
+ "floor-level": "Ground"
}
]
}
class TestProperty:
+
@pytest.fixture(autouse=True)
- def property_instance(self, mock_epc_client, mock_cleaner):
- property_instance = Property(1, "AB12CD", "Test Address", epc_client=mock_epc_client)
+ def mock_photo_supply_lookup(self):
+ return pd.DataFrame(
+ [
+ dict(
+ tenure="rental (social)",
+ built_form="Detached",
+ property_type="House",
+ construction_age_band="England and Wales: 1967-1975",
+ is_flat=False,
+ is_pitched=True,
+ is_roof_room=False,
+ floor_area_decile=2,
+ photo_supply_median=40
+ )
+ ]
+ )
+
+ @pytest.fixture(autouse=True)
+ def mock_floor_area_decile_thresholds(self):
+ return pd.DataFrame(
+ {"floor_area_decile_thresholds": [0, 10, 30, 50]}
+ )
+
+ @pytest.fixture(autouse=True)
+ def property_instance(self, mock_cleaner):
+ property_instance = Property(id=1, postcode="AB12CD", address="Test Address", data=mock_epc_response["rows"][0])
return property_instance
@pytest.fixture(autouse=True)
- def property_instance_dupe_data(self, mock_epc_client_dupe_data):
- property_instance_dupe_data = Property(2, "AB12CD", "Test Address", epc_client=mock_epc_client_dupe_data)
+ def property_instance_dupe_data(self):
+ property_instance_dupe_data = Property(id=2, postcode="AB12CD", address="Test Address")
return property_instance_dupe_data
- @pytest.fixture
- def mock_epc_client(self):
- mock_epc_client = Mock(spec=EpcClient(auth_token="mocked_auth_token"))
- mock_epc_client.domestic.search.return_value = mock_epc_response.copy()
- mock_epc_client.auth_token = "mocked_auth_token"
- return mock_epc_client
-
- @pytest.fixture
- def mock_epc_client_dupe_data(self):
- mock_epc_client_dupe_data = Mock(spec=EpcClient(auth_token="mocked_auth_token"))
- mock_epc_client_dupe_data.domestic.search.return_value = mock_epc_response_dupe.copy()
- mock_epc_client_dupe_data.auth_token = "mocked_auth_token"
- return mock_epc_client_dupe_data
+ # @pytest.fixture
+ # def mock_epc_client(self):
+ # mock_epc_client = Mock(spec=EpcClient(auth_token="mocked_auth_token"))
+ # mock_epc_client.domestic.search.return_value = mock_epc_response.copy()
+ # mock_epc_client.auth_token = "mocked_auth_token"
+ # return mock_epc_client
+ #
+ # @pytest.fixture
+ # def mock_epc_client_dupe_data(self):
+ # mock_epc_client_dupe_data = Mock(spec=EpcClient(auth_token="mocked_auth_token"))
+ # mock_epc_client_dupe_data.domestic.search.return_value = mock_epc_response_dupe.copy()
+ # mock_epc_client_dupe_data.auth_token = "mocked_auth_token"
+ # return mock_epc_client_dupe_data
@pytest.fixture
def mock_cleaner(self):
@@ -224,7 +255,11 @@ class TestProperty:
}
mock_cleaner.cleaned = {
- "roof-description": [{"original_description": "Roof Description"}],
+ "roof-description": [
+ {"original_description": "Roof Description"},
+ {"original_description": "pitched, no insulation", "is_pitched": True, "is_flat": False,
+ "is_roof_room": False}
+ ],
"walls-description": [walls_data],
"windows-description": [{"original_description": "Windows Description"}],
"mainheat-description": [{"original_description": "Main Heating Description"}],
@@ -235,37 +270,32 @@ class TestProperty:
}
return mock_cleaner
- def test_init(self, mock_epc_client):
- inst1 = Property(0, "AB12CD", "Test Address", epc_client=mock_epc_client)
- # Should be mocked auth token
- assert inst1.epc_client.auth_token == "mocked_auth_token"
+ def test_init(self):
+ inst1 = Property(0, postcode="AB12CD", address="Test Address")
- inst2 = Property(3, "AB12CD", "Test Address", epc_client=mock_epc_client)
- assert inst2.epc_client.auth_token
+ assert inst1.data is None
- inst3 = Property(4, "AB12CD", "Test Address", data={"some": "data"}, epc_client=mock_epc_client)
- assert inst3.data == {"some": "data"}
+ inst2 = Property(3, "AB12CD", "Test Address")
+ assert inst2.id == 3
- data = inst3.search_address_epc()
- assert data is None
+ inst3 = Property(4, "AB12CD", "Test Address", data={"some": "data", "uprn": 123})
+ assert inst3.data == {"some": "data", "uprn": 123}
- def test_search_address_epc(self, property_instance):
- # Call the method to test
- property_instance.search_address_epc()
-
- # Verify that the correct data is being returned
- assert property_instance.data == mock_epc_response["rows"][0]
-
- def test_search_address_epc_multiple_results(self, property_instance_dupe_data, mock_epc_client_dupe_data):
- with pytest.raises(Exception, match="More than one result found for this address - investigate me"):
- property_instance_dupe_data.search_address_epc()
-
- def test_get_components(self, property_instance, mock_cleaner, mock_epc_client):
- property_instance.search_address_epc()
- property_instance.get_components(mock_cleaner.cleaned)
+ def test_get_components(
+ self, property_instance, mock_cleaner, mock_photo_supply_lookup, mock_floor_area_decile_thresholds
+ ):
+ property_instance.get_components(
+ mock_cleaner.cleaned,
+ photo_supply_lookup=mock_photo_supply_lookup,
+ floor_area_decile_thresholds=mock_floor_area_decile_thresholds
+ )
# Verify that the components are set correctly
- assert property_instance.roof == {"original_description": "Roof Description"}
+ assert property_instance.roof == {
+ 'original_description': 'pitched, no insulation', 'is_pitched': True,
+ 'is_flat': False, 'is_roof_room': False
+ }
+
assert property_instance.walls == {
"original_description": "Walls Description",
"is_cavity_wall": True,
@@ -289,24 +319,15 @@ class TestProperty:
# Verify that ValueError is raised when EpcClean doesn't contain cleaned data
with pytest.raises(ValueError, match="Cleaner does not contain cleaned data"):
- property_instance.get_components(mock_cleaner.cleaned)
+ property_instance.get_components(mock_cleaner.cleaned, pd.DataFrame(), pd.DataFrame())
- def test_get_components_no_data(self, property_instance, mock_cleaner):
+ def test_get_components_no_attributes(
+ self, property_instance, mock_cleaner, mock_photo_supply_lookup, mock_floor_area_decile_thresholds
+ ):
# Modify the mock cleaner to have no attributes for a specific description
mock_cleaner.cleaned = {
"roof-description": []
}
-
- # Verify that ValueError is raised when no attributes are found
- with pytest.raises(ValueError, match="Property does not contain data"):
- property_instance.get_components(mock_cleaner.cleaned)
-
- def test_get_components_no_attributes(self, property_instance, mock_cleaner):
- # Modify the mock cleaner to have no attributes for a specific description
- mock_cleaner.cleaned = {
- "roof-description": []
- }
- property_instance.search_address_epc()
property_instance.data["roof-description"] = "Pitched, no insulation"
property_instance.walls = {
"original_description": "Walls Description",
@@ -327,14 +348,17 @@ class TestProperty:
}
# Assert backup cleaning has been applied
- property_instance.get_components(mock_cleaner.cleaned)
+ property_instance.get_components(
+ mock_cleaner.cleaned, mock_photo_supply_lookup, mock_floor_area_decile_thresholds
+ )
assert property_instance.roof["clean_description"] == "Pitched, no insulation"
assert property_instance.roof["is_pitched"]
- def test_get_components_multiple_attributes(self, property_instance, mock_cleaner):
+ def test_get_components_multiple_attributes(
+ self, property_instance, mock_cleaner, mock_photo_supply_lookup, mock_floor_area_decile_thresholds
+ ):
# This shouldn't happen - it would mean a cleaning error
- property_instance.search_address_epc()
property_instance.data["roof-description"] = "Roof Description"
cleaned = {
"roof-description": [
@@ -345,10 +369,10 @@ class TestProperty:
# Verify that ValueError is raised when multiple attributes are found
with pytest.raises(ValueError, match="Either No attributes or multiple found for roof-description"):
- property_instance.get_components(cleaned)
+ property_instance.get_components(cleaned, mock_photo_supply_lookup, mock_floor_area_decile_thresholds)
- def test_set_spatial(self, mock_epc_client):
- prop = Property(1, "AB12CD", "Test Address", mock_epc_client)
+ def test_set_spatial(self):
+ prop = Property(1, postcode="AB12CD", address="Test Address")
spatial1 = pd.DataFrame([{
'X_COORDINATE': 411143.0, 'Y_COORDINATE': 281701.0, 'LATITUDE': 52.4331896, 'LONGITUDE': -1.8375238,
@@ -362,7 +386,7 @@ class TestProperty:
assert prop.is_heritage
assert prop.restricted_measures
- prop2 = Property(1, "AB12CD", "Test Address", mock_epc_client)
+ prop2 = Property(1, "AB12CD", "Test Address")
spatial2 = pd.DataFrame([{
'X_COORDINATE': 411143.0, 'Y_COORDINATE': 281701.0, 'LATITUDE': 52.4331896, 'LONGITUDE': -1.8375238,
@@ -376,10 +400,10 @@ class TestProperty:
assert not prop2.is_heritage
assert not prop2.restricted_measures
- def test_set_floor_level(self, mock_epc_client):
+ def test_set_floor_level(self):
# In this case, we have a flat which looks looks it's on the first floor, but it's actually on the ground
# floor, so we should set floor_level to 0
- prop = Property(1, "AB12CD", "Test Address", mock_epc_client)
+ prop = Property(1, postcode="AB12CD", address="Test Address")
prop.data = {'floor-level': '01', 'property-type': 'Flat'}
prop.floor = {
'original_description': 'Solid, no insulation (assumed)', 'clean_description': 'Solid, no insulation',
@@ -395,7 +419,7 @@ class TestProperty:
# This property is labelled as being on the ground floor but actually has another property below
# so we set floor level to 1
- prop2 = Property(1, "AB12CD", "Test Address", mock_epc_client)
+ prop2 = Property(1, postcode="AB12CD", address="Test Address")
prop2.data = {'floor-level': 'Ground', 'property-type': 'Flat'}
prop2.floor = {
'original_description': '(Another dwelling below)', 'clean_description': 'Solid, no insulation',
@@ -410,7 +434,7 @@ class TestProperty:
assert prop2.floor_level == 1
# this property is correctly labelled as being on the 2nd floor
- prop3 = Property(1, "AB12CD", "Test Address", mock_epc_client)
+ prop3 = Property(1, postcode="AB12CD", address="Test Address")
prop3.data = {'floor-level': '02', 'property-type': 'Flat'}
prop3.floor = {
'original_description': '(Another dwelling below)', 'clean_description': 'Solid, no insulation',
@@ -425,7 +449,7 @@ class TestProperty:
assert prop3.floor_level == 2
# Example of a house
- prop4 = Property(1, "AB12CD", "Test Address", mock_epc_client)
+ prop4 = Property(1, postcode="AB12CD", address="Test Address")
prop4.data = {'floor-level': '', 'property-type': 'House'}
prop4.floor = {
'original_description': '(Another dwelling below)', 'clean_description': 'Solid, no insulation',
diff --git a/backend/tests/test_sap_model_prep.py b/backend/tests/test_sap_model_prep.py
index f20e4993..89c436ce 100644
--- a/backend/tests/test_sap_model_prep.py
+++ b/backend/tests/test_sap_model_prep.py
@@ -2,13 +2,11 @@ from backend.Property import Property
from etl.epc.DataProcessor import DataProcessor
from backend.app.plan.utils import create_recommendation_scoring_data, get_cleaned
from etl.epc.settings import COLUMNS_TO_MERGE_ON
-from epc_api.client import EpcClient
import pandas as pd
import pytest
import msgpack
from utils.s3 import read_dataframe_from_s3_parquet, read_from_s3
-from tqdm import tqdm
# Handy code for selecting testing data
@@ -122,7 +120,21 @@ class TestSapModelPrep:
cleaned = msgpack.unpackb(cleaned, raw=False)
return cleaned
- def test_fill_cavity_wall(self, cleaned, cleaning_data):
+ @pytest.fixture
+ def photo_supply_lookup(self):
+ photo_supply_lookup = read_dataframe_from_s3_parquet(
+ bucket_name="retrofit-data-dev", file_key="solar_pv_supply/photo_supply_lookup.parquet",
+ )
+ return photo_supply_lookup
+
+ @pytest.fixture
+ def floor_area_decile_thresholds(self):
+ floor_area_decile_thresholds = read_dataframe_from_s3_parquet(
+ bucket_name="retrofit-data-dev", file_key="solar_pv_supply/floor_area_decile_thresholds.parquet",
+ )
+ return floor_area_decile_thresholds
+
+ def test_fill_cavity_wall(self, cleaned, cleaning_data, photo_supply_lookup, floor_area_decile_thresholds):
"""
We ensure that the process that prepares the data in the engine code results in the same data as
the model is trained on
@@ -288,11 +300,10 @@ class TestSapModelPrep:
home = Property(
id=0,
postcode=starting_epc["postcode"],
- address1=starting_epc["address1"],
- epc_client=EpcClient(auth_token="notoken"),
+ address=starting_epc["address1"],
data=starting_epc
)
- home.get_components(cleaned)
+ home.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
data_processor = DataProcessor(None, newdata=True)
data_processor.insert_data(pd.DataFrame([home.get_model_data()]))
@@ -356,7 +367,7 @@ class TestSapModelPrep:
assert test_record[c].values[0] == row[c]
- def test_internal_wall_insulation(self, cleaned, cleaning_data):
+ def test_internal_wall_insulation(self, cleaned, cleaning_data, photo_supply_lookup, floor_area_decile_thresholds):
starting_epc2 = {
'low-energy-fixed-light-count': '2', 'address': 'FLAT 12, WAREHOUSE W, 3 WESTERN GATEWAY',
@@ -508,11 +519,10 @@ class TestSapModelPrep:
home2 = Property(
id=0,
postcode=starting_epc2["postcode"],
- address1=starting_epc2["address1"],
- epc_client=EpcClient(auth_token="notoken"),
+ address=starting_epc2["address1"],
data=starting_epc2
)
- home2.get_components(cleaned)
+ home2.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
home2.set_number_lighting_outlets(None)
data_processor2 = DataProcessor(None, newdata=True)
@@ -578,7 +588,7 @@ class TestSapModelPrep:
assert test_record2[c].values[0] == row2[c]
- def test_ventilation(self, cleaned, cleaning_data):
+ def test_ventilation(self, cleaned, cleaning_data, photo_supply_lookup, floor_area_decile_thresholds):
starting_epc3 = {
'low-energy-fixed-light-count': '', 'address': '45 Shepperson Road', 'uprn-source': 'Energy Assessor',
@@ -728,11 +738,10 @@ class TestSapModelPrep:
home3 = Property(
id=0,
postcode=starting_epc3["postcode"],
- address1=starting_epc3["address1"],
- epc_client=EpcClient(auth_token="notoken"),
+ address=starting_epc3["address1"],
data=starting_epc3
)
- home3.get_components(cleaned)
+ home3.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
home3.set_number_lighting_outlets(None)
data_processor3 = DataProcessor(None, newdata=True)
@@ -782,7 +791,7 @@ class TestSapModelPrep:
assert test_record3[c].values[0] == row3[c]
- def test_fireplaces(self, cleaned, cleaning_data):
+ def test_fireplaces(self, cleaned, cleaning_data, photo_supply_lookup, floor_area_decile_thresholds):
starting_epc4 = {
'low-energy-fixed-light-count': '', 'address': '9 Glebe Road, Asfordby Hill',
@@ -937,11 +946,10 @@ class TestSapModelPrep:
home4 = Property(
id=0,
postcode=starting_epc4["postcode"],
- address1=starting_epc4["address1"],
- epc_client=EpcClient(auth_token="notoken"),
+ address=starting_epc4["address1"],
data=starting_epc4
)
- home4.get_components(cleaned)
+ home4.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
home4.set_number_lighting_outlets(None)
data_processor4 = DataProcessor(None, newdata=True)
diff --git a/etl/costs/app.py b/etl/costs/app.py
index 4d53ce28..30eff735 100644
--- a/etl/costs/app.py
+++ b/etl/costs/app.py
@@ -75,6 +75,7 @@ def app():
ewi_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="external_wall_insulation", header=0)
lel_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="low_energy_lighting", header=0)
flat_roof_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="flat_roof_insulation", header=0)
+ window_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="window_glazing", header=0)
# Form a single table to be uploaded
costs = pd.concat(
diff --git a/etl/eligibility/Eligibility.py b/etl/eligibility/Eligibility.py
index f25d06bd..13966655 100644
--- a/etl/eligibility/Eligibility.py
+++ b/etl/eligibility/Eligibility.py
@@ -33,6 +33,7 @@ class Eligibility:
# If the loft has less than 100mm of insulation, we classify the home has needing loft insulation
LOFT_INSULATION_THRESHOLD = 100
+ HIGH_LOFT_INSULATION_THRESHOLD = 269
# Because EPCS have different values for tenure, we need to remap them to a common set of values
tenure_remap = {
@@ -104,6 +105,8 @@ class Eligibility:
self.LOFT_INSULATION_THRESHOLD if loft_thickness_threshold is None else loft_thickness_threshold
)
+ high_loft_thickness_threshold = self.HIGH_LOFT_INSULATION_THRESHOLD
+
# We firstly check if the roof is a loft
is_loft = self.roof["is_pitched"] and (not self.roof["is_roof_room"])
@@ -122,7 +125,22 @@ class Eligibility:
is_flat=self.roof["is_flat"]
)
- if insulation_thickness > loft_thickness_threshold:
+ if insulation_thickness <= loft_thickness_threshold:
+ self.loft = {
+ "suitability": True,
+ "thickness": insulation_thickness,
+ "reason": None
+ }
+
+ if insulation_thickness <= high_loft_thickness_threshold:
+ self.loft = {
+ "suitability": True,
+ "thickness": insulation_thickness,
+ "reason": "high loft thickness but below regulation"
+ }
+ return
+
+ if insulation_thickness > high_loft_thickness_threshold:
# Insulation is already thick enough
self.loft = {
"suitability": False,
@@ -131,12 +149,6 @@ class Eligibility:
}
return
- self.loft = {
- "suitability": True,
- "thickness": insulation_thickness,
- "reason": None
- }
-
def cavity_insulation(self):
"""
@@ -152,9 +164,25 @@ class Eligibility:
is_partial_filled = (
self.walls["is_as_built"] and self.walls["insulation_thickness"] not in ["below average"]
)
+ # We look for potentially under performing cavities - anything that is assumed, as built and insulated
+ is_underperforming = (
+ self.walls["is_as_built"] and self.walls["insulation_thickness"] in ["average"] and self.walls["is_assumed"]
+ )
is_unfilled_cavity = is_cavity and is_empty
is_partial_filled_cavity = is_cavity and is_partial_filled
+ is_underperforming_cavity = is_cavity and is_underperforming
+
+ # Check if it has internal or external wall insulation
+ has_internal_wall_insulation = self.walls["internal_insulation"]
+ has_external_wall_insulation = self.walls["external_insulation"]
+
+ if has_internal_wall_insulation or has_external_wall_insulation:
+ self.cavity = {
+ "suitability": False,
+ "type": "internal or external wall insulation"
+ }
+ return
if is_unfilled_cavity:
self.cavity = {
@@ -170,6 +198,13 @@ class Eligibility:
}
return
+ if is_underperforming_cavity:
+ self.cavity = {
+ "suitability": True,
+ "type": "underperforming"
+ }
+ return
+
self.cavity = {
"suitability": False,
"type": "full"
@@ -223,6 +258,14 @@ class Eligibility:
}
def suspended_floor_insulation(self):
+
+ if "no_data" in self.floor.keys():
+ if self.floor["no_data"]:
+ self.suspended_floor = {
+ "suitability": False,
+ }
+ return
+
is_suspended = self.floor["is_suspended"]
is_insulated = self.floor["insulation_thickness"] in ["average", "above average"]
@@ -232,6 +275,14 @@ class Eligibility:
return
def solid_floor_insulation(self):
+
+ if "no_data" in self.floor.keys():
+ if self.floor["no_data"]:
+ self.solid_floor = {
+ "suitability": False,
+ }
+ return
+
is_solid = self.floor["is_solid"]
is_insulated = self.floor["insulation_thickness"] in ["average", "above average"]
@@ -305,7 +356,8 @@ class Eligibility:
"""
current_sap = int(self.epc["current-energy-efficiency"])
- if current_sap > 54:
+
+ if current_sap >= 69:
self.eco4_warmfront = {
"eligible": False,
"message": "sap too high"
@@ -319,9 +371,22 @@ class Eligibility:
is_eligible = self.cavity["suitability"] & self.loft["suitability"]
if post_retrofit_sap is None:
+
+ if current_sap >= 55:
+ message = "Possibly eligible but property currently EPC D"
+ else:
+ message = "subject to post retrofit sap" if is_eligible else "not eligible"
+
+ # Update the message to flag properties that failed just because of a full cavity.
+ # We need to double check that the wall is a cavity, that the loft is suitable and that the
+ # sap is within reason
+ # We can then estimate the age of the cavity fill
+ if not is_eligible and (current_sap < 69) and self.loft["suitability"] and self.walls["is_cavity_wall"]:
+ message = "Failed due to full cavity - check cavity age"
+
self.eco4_warmfront = {
"eligible": is_eligible,
- "message": "subject to post retrofit sap"
+ "message": message
}
return
diff --git a/etl/eligibility/ha_15_32/app.py b/etl/eligibility/ha_15_32/app.py
index 3c7ae901..76aadcc4 100644
--- a/etl/eligibility/ha_15_32/app.py
+++ b/etl/eligibility/ha_15_32/app.py
@@ -11,13 +11,12 @@ import numpy as np
import msgpack
from datetime import datetime, timedelta
from utils.logger import setup_logger
-from utils.s3 import read_from_s3
+from utils.s3 import read_from_s3, read_dataframe_from_s3_parquet
from dotenv import load_dotenv
from backend.SearchEpc import SearchEpc
from backend.Property import Property
from etl.eligibility.Eligibility import Eligibility
from etl.epc.DataProcessor import DataProcessor
-from backend.app.utils import read_parquet_from_s3
from backend.app.plan.utils import create_recommendation_scoring_data
from etl.epc.settings import COLUMNS_TO_MERGE_ON
from backend.ml_models.api import ModelApi
@@ -247,6 +246,8 @@ def merge_ha_15(asset_list, identified_addresses):
identified_addresses = identified_addresses.drop_duplicates("merge_key")
+ # We pull out raw counts for the survey lists
+
# Check asset list for dupes
asset_list_dupes = asset_list["merge_key"].duplicated()
if asset_list_dupes.sum():
@@ -336,7 +337,10 @@ def merge_ha_15(asset_list, identified_addresses):
return merged_data, dropped_identified_merge_keys
-def prepare_model_data_row(property_id, modelling_epc, cleaned, cleaning_data, created_at):
+def prepare_model_data_row(
+ property_id, modelling_epc, cleaned, cleaning_data, created_at,
+ photo_supply_lookup, floor_area_decile_thresholds, old_data=None, full_sap_epc=None,
+):
"""
This function prepares the data for modelling, in the same fashion as the recommendation engine
With up-coming refactoring, this will change
@@ -346,15 +350,24 @@ def prepare_model_data_row(property_id, modelling_epc, cleaned, cleaning_data, c
p = Property(
id=property_id,
postcode=modelling_epc["postcode"],
- address1=modelling_epc["address1"],
- epc_client=None,
- data=modelling_epc
+ address=modelling_epc["address1"],
+ data=modelling_epc,
+ old_data=old_data,
+ full_sap_epc=full_sap_epc
)
- p.get_components(cleaned)
+ p.get_components(cleaned, photo_supply_lookup=photo_supply_lookup,
+ floor_area_decile_thresholds=floor_area_decile_thresholds)
+
+ # THIS IS TEMP AND SHOULDN'T BE HERE
+ data_to_clean = p.get_model_data()
+ if data_to_clean["NUMBER_HEATED_ROOMS"] in ['', None]:
+ data_to_clean["NUMBER_HEATED_ROOMS"] = data_to_clean["NUMBER_HABITABLE_ROOMS"]
+ p.data["number-heated-rooms"] = data_to_clean["NUMBER_HABITABLE_ROOMS"]
+
# This is temp - this should happen after scoring
cleaned_property_data = DataProcessor.apply_averages_cleaning(
- data_to_clean=pd.DataFrame([dict(**p.get_model_data(), LOCAL_AUTHORITY=p.data["local-authority"])]),
+ data_to_clean=pd.DataFrame([dict(**data_to_clean, LOCAL_AUTHORITY=p.data["local-authority"])]),
cleaning_data=cleaning_data,
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
)
@@ -829,6 +842,18 @@ def analyse_ha_32_results(results, ha32, no_house_numbers):
results_df["warmfront_identified"]
]
+ # Aggregates of no eco and gbis jobs identified
+ n_eco = results_df["eco4_eligible"].sum()
+ # Gbis is rows where eco4 is not eligible
+ n_gbis = results_df[
+ (results_df["gbis_eligible"] == True) & (results_df["eco4_eligible"] == False)
+ ]["gbis_eligible"].sum()
+
+ pipeline_potential = results_df[
+ (results_df["warmfront_identified"] == True) | (results_df["eco4_eligible"] == True) | (
+ results_df["gbis_eligible"] == True)
+ ]
+
success_rate = warmfront_identified["gbis_eligible"].sum() / warmfront_identified.shape[0]
# For HA32, this is 89%
@@ -886,8 +911,16 @@ def analyse_ha_32_results(results, ha32, no_house_numbers):
new_possibilities = results_df[
(~results_df["warmfront_identified"]) &
- (results_df["gbis_eligible"] | results_df["eco4_eligible"]) &
- (results_df["tenure"] == "Rented (social)")
+ (results_df["gbis_eligible"] | results_df["eco4_eligible"])
+ ].copy()
+
+ new_possibilities_eco = results_df[
+ (~results_df["warmfront_identified"]) &
+ (results_df["eco4_eligible"] == True)
+ ].copy()
+ new_possibilities_gbis = results_df[
+ (~results_df["warmfront_identified"]) &
+ (results_df["eco4_eligible"] == False) & (results_df["gbis_eligible"] == True)
].copy()
future_possibilities_eco = results_df[
@@ -947,6 +980,8 @@ def analyse_ha_15_results(results_df, ha15, no_house_numbers):
results_df["warmfront_identified"]
]
+ warmfront_identified = warmfront_identified
+
n_identified = (warmfront_identified["gbis_eligible"] | warmfront_identified["eco4_eligible"]).sum()
success_rate = n_identified / warmfront_identified.shape[0]
@@ -955,6 +990,11 @@ def analyse_ha_15_results(results_df, ha15, no_house_numbers):
"eligibility_classification"].value_counts()
# For HA15 this is 50.3%
+ pipeline_potential = results_df[
+ (results_df["warmfront_identified"] == True) | (results_df["eco4_eligible"] == True) | (
+ results_df["gbis_eligible"] == True)
+ ]
+
# of the properties we identify, what is the mix of confidenc
missed = results_df[
@@ -973,32 +1013,37 @@ def analyse_ha_15_results(results_df, ha15, no_house_numbers):
missed["sap"] < 69
]
- sap_low_enough["walls"].value_counts()
- z = ha15[ha15["row_id"].isin(sap_too_high["row_id"].values)]
-
- investigate_1 = ha15[ha15["row_id"].isin(sap_too_high["row_id"])][
- ["row_id", "Postcode", "Address Line 1", "Address Line 2", "Address Line 3"]]
-
- investigate_2 = ha15[ha15["row_id"].isin(sap_low_enough["row_id"])][
- ["row_id", "Postcode", "Address Line 1", "Address Line 2", "Address Line 3"]]
-
- missed["message"].value_counts()
+ # Aggregates of no eco and gbis jobs identified
+ n_eco = results_df["eco4_eligible"].sum()
+ # Gbis is rows where eco4 is not eligible
+ n_gbis = results_df[
+ (results_df["gbis_eligible"] == True) & (results_df["eco4_eligible"] == False)
+ ]["gbis_eligible"].sum()
# We now look for properties that we identified, that were not identified by Warmfront
new_possibilities = results_df[
(~results_df["warmfront_identified"]) &
- ((results_df["gbis_eligible"] == True) | (results_df["eco4_eligible"] == True)) &
- (results_df["tenure"] == "Rented (social)")
+ ((results_df["gbis_eligible"] == True) | (results_df["eco4_eligible"] == True))
+ ].copy()
+
+ new_possibilities_eco = results_df[
+ (~results_df["warmfront_identified"]) &
+ (results_df["eco4_eligible"] == True)
+ ].copy()
+
+ new_possibilities_gbis = results_df[
+ (~results_df["warmfront_identified"]) &
+ (results_df["eco4_eligible"] == False) & (results_df["gbis_eligible"] == True)
].copy()
# These are future possibilityies
- new_possibilities_eco = results_df[
+ future_possibilities_eco = results_df[
(~results_df["warmfront_identified"]) &
(results_df["eco4_eligible_future"] == True) & (~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
].copy()
- new_possibilities_gbis = results_df[
+ future_possibilities_gbis = results_df[
(~results_df["warmfront_identified"]) &
(results_df["gbis_eligible_future"] == True) & (results_df["eco4_eligible_future"] == False) & (
~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
@@ -1058,7 +1103,7 @@ def app():
)
cleaned = msgpack.unpackb(cleaned, raw=False)
- cleaning_data = read_parquet_from_s3(
+ cleaning_data = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
)
diff --git a/etl/eligibility/ha_15_32/ha16_app.py b/etl/eligibility/ha_15_32/ha16_app.py
new file mode 100644
index 00000000..0d67e0b4
--- /dev/null
+++ b/etl/eligibility/ha_15_32/ha16_app.py
@@ -0,0 +1,647 @@
+import os
+import msgpack
+import openpyxl
+from pathlib import Path
+from datetime import datetime
+import pandas as pd
+import numpy as np
+from utils.s3 import read_from_s3
+from utils.logger import setup_logger
+from dotenv import load_dotenv
+from utils.s3 import read_dataframe_from_s3_parquet
+from tqdm import tqdm
+from backend.SearchEpc import SearchEpc
+from etl.eligibility.Eligibility import Eligibility
+from etl.eligibility.ha_15_32.app import prepare_model_data_row
+from etl.epc.DataProcessor import DataProcessor
+from etl.epc.settings import COLUMNS_TO_MERGE_ON
+from backend.ml_models.api import ModelApi
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+from recommendations.recommendation_utils import calculate_cavity_age
+from recommendation_utils import convert_thickness_to_numeric
+
+import re
+
+ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+logger = setup_logger()
+load_dotenv(ENV_FILE)
+
+
+def load_data():
+ # This asset list is spread across two sheets, which we need to combine
+
+ asset_list_filenames = [
+ "HESTIA - HA 16 ASSET LIST PART 1 OF 2.xlsx",
+ "HESTIA - HA 16 ASSET LIST PART 2 OF 2.xlsx",
+ ]
+
+ # Prepare lists to collect rows data and their colors
+ rows_data = []
+ rows_colors = []
+ colnames = []
+ for asset_list_filename in asset_list_filenames:
+ workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/{asset_list_filename}')
+ sheet = workbook.active
+ sheet_colnames = [cell.value for cell in sheet[1]]
+ colnames.append(sheet_colnames)
+
+ for row in sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
+ row_data = [cell.value for cell in row] # This will get you the cell values
+ row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
+ # row_color = COLOR_INDEX[row_color]
+ rows_data.append(row_data)
+ rows_colors.append(row_color)
+
+ asset_list = pd.DataFrame(rows_data, columns=colnames[0])
+ # Remove None columns
+ asset_list = asset_list.iloc[:, 0:12]
+ asset_list['row_color'] = rows_colors
+
+ asset_list["row_colour_name"] = np.where(
+ asset_list["row_color"] == "FFFF0000", "red",
+ np.where(asset_list["row_color"] == "FF92D050", "green", "yellow")
+ )
+
+ # Split up the address on commas, which is useful for matching later
+ split_addresses = asset_list['Address'].str.split(',', expand=True)
+ split_addresses.columns = ['temp', 'address2', 'address3', 'address4', 'address5']
+
+ asset_list = pd.concat([asset_list, split_addresses], axis=1)
+ # There is no commas separating house number and address 1
+ split_addresses2 = asset_list['temp'].str.split(' ', expand=True)
+ split_addresses2.columns = ['HouseNo', 'part1', 'part2', "part3", "part4"]
+ # We could re-concatenate but we only care about HouseNo for the moment
+ asset_list = pd.concat([asset_list, split_addresses2[["HouseNo"]]], axis=1)
+
+ # We now read in the survey list
+ survey_workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/HESTIA- HA 16 ECO4 SURVEY LIST.xlsx')
+ survey_sheet = survey_workbook.active
+
+ survey_rows = []
+ survey_colors = []
+
+ for row in survey_sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
+ row_data = [cell.value for cell in row] # This will get you the cell values
+ row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
+ # row_color = COLOR_INDEX[row_color]
+ survey_rows.append(row_data)
+ survey_colors.append(row_color)
+
+ survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
+
+ # For the survey list, we don't need the colours, since there is a column called "INSTALLED OR CANCELLED"
+ # which describes the status of the property
+ survey_list["row_colour"] = survey_colors
+ survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(survey_list))]
+ # Tidy up the street/block name a bit
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("/", ", ")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.lower()
+ survey_list["Street / Block Name"] = np.where(
+ survey_list["Street / Block Name"] == "REEDS RD",
+ "Reeds ROAD",
+ survey_list["Street / Block Name"]
+ )
+ # Replace " rd " with "road"
+ survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(r'\brd\b', 'road', regex=True)
+
+ # Replace " , " with ", "
+ survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(
+ " , ", ', ',
+ )
+ # Fix "{place} ,{place}" with "{place}, {place}"
+ survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(r'\s*,\s*', ', ', regex=True)
+ # Strip whitespace
+ survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.strip()
+
+ # Correct errors
+ survey_list["Post Code"] = np.where(
+ survey_list["Post Code"] == "M38 0SA",
+ "M38 9SA",
+ survey_list["Post Code"]
+ )
+
+ survey_list["Post Code"] = np.where(
+ (survey_list["Street / Block Name"] == "nelson drive") & (survey_list["Post Code"] == "M44 5JE"),
+ "M44 5JF",
+ survey_list["Post Code"]
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("eccels", "eccles")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("chatley, road", "chatley road")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("vaughen", "Vaughan")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("cresent", "crescent")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("plantation road",
+ "plantation avenue")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("how clough drive",
+ "howclough drive")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brockhurst lane",
+ "brookhurst lane")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("biirch road",
+ "birch road")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hadson road",
+ "hodson road")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("harbonne avennue",
+ "narbonne avenue")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("cumberland road, cadishead",
+ "cumberland avenue, cadishead")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("aston field drive",
+ "ashton field drive")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("wedgewood road",
+ "wedgwood road")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hamilton close",
+ "hamilton avenue")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("lichens crescent, fitton hill",
+ "lichens crescent")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("south croft, fitton hill",
+ "south croft")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(", fitton hill", "")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("firtree dr", "fir tree avenue")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hawthorne road",
+ "hawthorn crescent")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("rein lee avenue",
+ "reins lee avenue")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("westerhill road",
+ "wester hill road")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("st martins road",
+ "saint martins road")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("timperley avenue",
+ "timperley close")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("eastwood road",
+ "eastwood avenue")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("new road", "new street")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("grassmere road",
+ "grasmere road")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hulton road",
+ "hulton avenue")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("beechfield avenue",
+ "beechfield road")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("princess avenue",
+ "princes avenue")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("edge ford crecent",
+ "edge fold crescent")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("conniston avenue",
+ "coniston avenue")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("blackthorne crescent",
+ "blackthorn crescent")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("wellstock road",
+ "wellstock lane")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brackley avenue",
+ "brackley street")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brook avenue swinton",
+ "brook avenue, swinton")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("green avenue swinton",
+ "green avenue, swinton")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("grasmere avenue wardley",
+ "grasmere avenue, wardley")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("mardale avenue wardle",
+ "mardale avenue, wardle")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("carleach grove",
+ "cartleach Grove")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("arbour grove",
+ "arbor Grove")
+
+ # Replacement for clively avenue 66-68
+ survey_list["NO."] = np.where(
+ survey_list["NO."] == "66-68",
+ "66",
+ survey_list["NO."]
+ )
+
+ # asset_list[asset_list["Address"].str.lower().str.contains("clively")]
+
+ # We now need to merge the survey list onto the asset list
+ # Could be easier just to do a search on each row, even though it's much slower
+ matched = []
+ for _, row in tqdm(survey_list.iterrows(), total=len(survey_list)):
+
+ house_number = row["NO."]
+ if isinstance(house_number, str):
+ house_number = house_number.lower()
+
+ # Filter on the first line of the address
+ df = asset_list[asset_list["Address"].str.lower().str.contains(row["Street / Block Name"].lower())].copy()
+ # df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
+ df = df[df["Address"].str.lower().str.contains(str(house_number))]
+ if df.shape[0] != 1:
+ df = df[df["HouseNo"] == str(house_number)]
+ if df.shape[0] != 1:
+ df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
+ if df.shape[0] != 1:
+ raise ValueError("Investigate")
+
+ matched.append(
+ {
+ "survey_key": row["survey_key"],
+ "matched_address": df["Address"].values[0],
+ "survey_house_no": row["NO."],
+ "survey_street_name": row["Street / Block Name"],
+ "survey_postcode": row["Post Code"],
+ "survey_status": row["INSTALLED OR CANCELLED"]
+ }
+ )
+
+ matched = pd.DataFrame(matched)
+ matched["warmfront_identified"] = True
+
+ # Combine asset list and surveys
+ data = asset_list.merge(
+ matched, how="left", left_on="Address", right_on="matched_address",
+ )
+ data["warmfront_identified"] = data["warmfront_identified"].fillna(False)
+
+ return data, survey_list
+
+
+def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
+ scoring_data = []
+ results = []
+ nodata = []
+
+ property_type_lookup = {
+ 'Semi Detached Bungalow': {"property-type": "Bungalow", "built-form": "Semi-Detached"},
+ 'Mid Terraced House': {"property-type": "House", "built-form": "Mid-Terrace"},
+ 'End Terraced House': {"property-type": "House", "built-form": "End-Terrace"},
+ 'Low Rise Flat': {"property-type": "Flat", "built-form": "Mid-Terrace"},
+ 'Semi-Detached House': {"property-type": "House", "built-form": "Semi-Detached"},
+ 'Detached Bungalow': {"property-type": "Bungalow", "built-form": "Detached"},
+ 'End Terraced Bungalow': {"property-type": "Bungalow", "built-form": "End-Terrace"},
+ 'Mid Terraced Bungalow': {"property-type": "Bungalow", "built-form": "Mid-Terrace"},
+ 'Medium Rise Flat': {"property-type": "Flat", "built-form": "Mid-Terrace"},
+ 'Detached House': {"property-type": "House", "built-form": "Detached"},
+ 'Cottage Flat': {"property-type": "Flat", "built-form": "Semi-Detached"},
+ 'Maisonette Medium Rise': {"property-type": "Flat", "built-form": "Mid-Terrace"},
+ 'Maisonette Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"},
+ 'End Terraced Town House': {"property-type": "House", "built-form": "End-Terrace"},
+ 'Flat Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"},
+ 'Mid Terraced Town House': {"property-type": "House", "built-form": "Mid-Terrace"},
+ }
+
+ for index, property_meta in tqdm(data.iterrows(), total=len(data)):
+
+ searcher = SearchEpc(
+ address1=property_meta["HouseNo"],
+ postcode=property_meta["Postcode"],
+ auth_token=EPC_AUTH_TOKEN,
+ os_api_key=None,
+ full_address=property_meta["Address"]
+ )
+ searcher.ordnance_survey_client.property_type = property_type_lookup[property_meta["Type"]]["property-type"]
+ searcher.ordnance_survey_client.built_form = property_type_lookup[property_meta["Type"]]["built-form"]
+ searcher.find_property(skip_os=True)
+
+ if searcher.newest_epc is None:
+ nodata.append(property_meta)
+ continue
+
+ if searcher.newest_epc.get("estimated"):
+ # We insert the row ID as our proxy for UPRN
+ proxy_uprn = int(property_meta["row_id"].split("_")[1])
+ searcher.newest_epc["uprn"] = proxy_uprn
+
+ newest_epc = searcher.newest_epc
+ older_epcs = searcher.older_epcs
+ full_sap_epc = searcher.full_sap_epc
+ # We also want to get the penultimate epc
+ penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
+ if not penultimate_epc:
+ penultimate_epc = newest_epc
+
+ eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
+ eligibility.check_gbis_warmfront()
+ eligibility.check_eco4_warmfront()
+
+ if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
+ eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
+ eligibility.check_gbis_warmfront()
+ eligibility.check_eco4_warmfront()
+ # If this is the case, we need to update the older epcs
+ # We don't update just to make data cleaning easier
+ if penultimate_epc.get("estimated") is None:
+ older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
+
+ # If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity
+
+ # Loft MUST be suitable
+ cavity_age = None
+ if (
+ eligibility.walls["is_cavity_wall"] and
+ eligibility.walls["is_filled_cavity"] and
+ eligibility.loft["suitability"] and
+ eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
+ ):
+ # We check the age of the cavity and if it's particularly old, we flag it
+ cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
+
+ # Full checks
+ eligibility.check_gbis()
+ eligibility.check_eco4()
+
+ if eligibility.eco4_warmfront["eligible"]:
+ if eligibility.epc["uprn"] == "":
+ eligibility.epc["uprn"] = int(property_meta["row_id"].split("_")[1])
+
+ scoring_dictionary = prepare_model_data_row(
+ property_id=property_meta["row_id"],
+ modelling_epc=eligibility.epc,
+ cleaned=cleaned,
+ cleaning_data=cleaning_data,
+ created_at=created_at,
+ old_data=older_epcs,
+ full_sap_epc=full_sap_epc,
+ photo_supply_lookup=photo_supply_lookup,
+ floor_area_decile_thresholds=floor_area_decile_thresholds
+ )
+ scoring_data.extend(scoring_dictionary)
+
+ results.append(
+ {
+ "row_id": property_meta["row_id"],
+ "uprn": eligibility.epc["uprn"],
+ "Address": property_meta["Address"],
+ "Postcode": property_meta["Postcode"],
+ "property_type": eligibility.epc["property-type"],
+ "gbis_eligible": eligibility.gbis_warmfront,
+ "eco4_eligible": eligibility.eco4_warmfront["eligible"],
+ "eco4_message": eligibility.eco4_warmfront["message"],
+ "sap": float(eligibility.epc["current-energy-efficiency"]),
+ "gbis_eligible_future": eligibility.gbis["eligible"],
+ "gbis_eligible_future_message": eligibility.gbis["message"],
+ "eco4_eligible_future": eligibility.eco4["eligible"],
+ "eco4_eligible_future_message": eligibility.eco4["message"],
+ # Property components
+ "roof": eligibility.roof["clean_description"],
+ "walls": eligibility.walls["clean_description"],
+ "cavity_type": eligibility.cavity["type"],
+ "heating": eligibility.epc["mainheat-description"],
+ "tenure": eligibility.tenure,
+ "date_epc": eligibility.epc["lodgement-date"],
+ "loft_thickness": eligibility.roof["insulation_thickness"],
+ "cavity_age": cavity_age,
+ **eligibility.walls,
+ **eligibility.roof,
+ }
+ )
+
+ scoring_df = pd.DataFrame(scoring_data)
+
+ # Perform the same cleaning as in the model - first clean number of room variables though
+ scoring_df = DataProcessor.apply_averages_cleaning(
+ data_to_clean=scoring_df,
+ cleaning_data=cleaning_data,
+ cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
+ colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
+ )
+
+ scoring_df = DataProcessor.apply_averages_cleaning(
+ data_to_clean=scoring_df,
+ cleaning_data=cleaning_data,
+ cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
+ ).drop(columns=["LOCAL_AUTHORITY"])
+
+ scoring_df = DataProcessor.clean_missings_after_description_process(
+ scoring_df,
+ ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
+ "insulation_thickness" in c) or ("ENERGY_EFF" in c)]
+ )
+
+ scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
+ scoring_df["UPRN"] = scoring_df["UPRN"].astype(int)
+
+ model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
+ all_predictions = model_api.predict_all(
+ df=scoring_df,
+ bucket="retrofit-data-dev",
+ prediction_buckets={
+ "sap_change_predictions": "retrofit-sap-predictions-dev",
+ "heat_demand_predictions": "retrofit-heat-predictions-dev",
+ "carbon_change_predictions": "retrofit-carbon-predictions-dev"
+ }
+ )
+
+ predictions = all_predictions["sap_change_predictions"].copy()
+
+ results_df = pd.DataFrame(results)
+
+ predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
+ results_df[["row_id", "sap"]], how="left", on="row_id"
+ )
+ predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
+ predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
+
+ results_df = results_df.merge(
+ predictions[["sap_uplift", "row_id"]],
+ how="left",
+ on="row_id"
+ )
+ results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
+
+ eligibility_assessment = []
+ for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
+ # The upgrade requirements are dependent on the current SAP
+
+ # If the property is an F or G, it only needs to upgrade to an %
+ if row["sap"] <= 38:
+ if row["post_install_sap"] >= 57:
+ eligibility_classification = "highest confidence"
+ elif row["post_install_sap"] >= 55:
+ eligibility_classification = "high confidence"
+ elif row["post_install_sap"] >= 53:
+ eligibility_classification = "medium confidence"
+ else:
+ eligibility_classification = "unlikely"
+ else:
+
+ if row["post_install_sap"] >= 71:
+ eligibility_classification = "highest confidence"
+ elif row["post_install_sap"] >= 69:
+ eligibility_classification = "high confidence"
+ elif row["post_install_sap"] >= 67:
+ eligibility_classification = "medium confidence"
+ else:
+ eligibility_classification = "unlikely"
+
+ eligibility_assessment.append(
+ {
+ "row_id": row["row_id"],
+ "eligibility_classification": eligibility_classification
+ }
+ )
+
+ eligibility_assessment = pd.DataFrame(eligibility_assessment)
+
+ results_df = results_df.merge(
+ eligibility_assessment, how="left", on="row_id"
+ )
+ return results_df, scoring_data, nodata
+
+
+def analyse_results(results_df, data, survey_list):
+ analysis_data = data[["row_id", "survey_key", "warmfront_identified", "row_colour_name"]].merge(
+ results_df, how="left", on="row_id"
+ ).merge(
+ survey_list[["survey_key", survey_list.columns[0]]].rename(columns={survey_list.columns[0]: "funding_scheme"}),
+ how="left", on="survey_key"
+ )
+
+ analysis_data["roof_insulation_thickness"] = np.where(
+ pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
+ )
+ analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
+ lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
+ )
+
+ warmfront_sold_eco4 = analysis_data[
+ (analysis_data["warmfront_identified"] == True) & (
+ analysis_data["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"]))
+ ] # 1407
+
+ warmfront_sold_gbis = analysis_data[
+ (analysis_data["warmfront_identified"] == True) & (
+ analysis_data["funding_scheme"].isin(["ECO4 GBIS (ECO+)"]))
+ ]
+
+ ideal_eco4_warmfront_not_sold = analysis_data[
+ (analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
+ analysis_data["roof_insulation_thickness_numeric"] <= 100)
+ ]
+
+ secondary_eco4_warmfront_not_sold = analysis_data[
+ (analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
+ analysis_data["roof_insulation_thickness_numeric"] > 100)
+ ]
+
+ # underperforming cavities
+ underperforming_cavities = analysis_data[
+ (analysis_data["eco4_message"] == "Failed due to full cavity - check cavity age") & (
+ analysis_data["cavity_age"] > 10 * 365
+ ) & (analysis_data["roof_insulation_thickness_numeric"] <= 100)
+ ]
+
+ identified_gbis_not_sold = analysis_data[
+ (analysis_data["gbis_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
+ analysis_data["eco4_eligible"] == False
+ )
+ ]
+
+ eco_eligible = analysis_data[analysis_data["eco4_eligible"] == True]
+ eco_ineligible = analysis_data[analysis_data["eco4_eligible"] == False]
+
+ eco_ineligible["eco4_message"].value_counts()
+
+ # SAP too high:
+ sap_too_high = eco_ineligible[eco_ineligible["eco4_message"] == "sap too high"].copy()
+ further_possibilities = sap_too_high[
+ sap_too_high["walls"].isin(
+ [
+ "Cavity wall, as built, insulated",
+ "Cavity wall, as built, no insulation",
+ "Cavity wall, as built, partial insulation",
+ "Cavity wall, no insulation",
+ "Cavity wall, partial insulation"
+ ]
+ )
+ ]
+
+ filled_cavities = eco_ineligible[
+ eco_ineligible["eco4_message"] == "sap too high"
+ ]
+
+ warmfront_identified = analysis_data[analysis_data["warmfront_identified"]]
+ warmfront_identified["walls"].value_counts()
+
+ all_identified_gbis = analysis_data[
+ (analysis_data["warmfront_identified"] & analysis_data["funding_scheme"].isin(
+ ["ECO4 GBIS (ECO+)"])) |
+ (analysis_data["gbis_eligible"] & analysis_data["eco4_eligible"].isin([False, None]))
+ ]
+
+ empty_cavity_desriptions = [
+ "Cavity wall, as built, no insulation", "Cavity wall, as built, partial insulation",
+ "Cavity wall, no insulation", "Cavity wall, partial insulation"
+ ]
+
+ empty_cavities = analysis_data[analysis_data["walls"].isin(empty_cavity_desriptions)]
+ remaining_empty = empty_cavities[~empty_cavities["warmfront_identified"]]
+
+ warmfront_identified = analysis_data[analysis_data["warmfront_identified"]]
+
+ # Of the ECO jobs, what proportion to we get right
+ warmfront_identified_eco = warmfront_identified[
+ warmfront_identified["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"])
+ ]
+
+ eco_success_rate = warmfront_identified_eco["eco4_eligible"].sum() / warmfront_identified_eco.shape[0]
+
+ warmfront_identified_gbis = warmfront_identified[
+ warmfront_identified["funding_scheme"].isin(["ECO4 GBIS (ECO+)"])
+ ]
+
+ gbis_success_rate = warmfront_identified_gbis["gbis_eligible"].sum() / warmfront_identified_gbis.shape[0]
+
+ # Additional identified
+ additional_identified_eco = analysis_data[
+ (analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False)
+ ]
+
+ additional_identified_eco["eligibility_classification"].value_counts()
+
+ additional_identified_gbis = analysis_data[
+ (analysis_data["gbis_eligible"] == True) & (analysis_data["eco4_eligible"] == False) & (
+ analysis_data["warmfront_identified"] == False
+ )
+ ].shape[0]
+ # Future
+ additional_identified_eco_future = analysis_data[
+ (analysis_data["eco4_eligible_future"] == True) & (analysis_data["warmfront_identified"] == False)
+ ].shape[0]
+ additional_identified_gbis_future = analysis_data[
+ (analysis_data["gbis_eligible_future"] == True) & (analysis_data["eco4_eligible_future"] == False) & (
+ analysis_data["warmfront_identified"] == False
+ )
+ ].shape[0]
+
+
+def app():
+ data, survey_list = load_data()
+
+ data["row_id"] = ["ha16_" + str(i) for i in range(0, len(data))]
+
+ cleaned = read_from_s3(
+ s3_file_name="cleaned_epc_data/cleaned.bson",
+ bucket_name="retrofit-data-dev"
+ )
+ cleaned = msgpack.unpackb(cleaned, raw=False)
+
+ cleaning_data = read_dataframe_from_s3_parquet(
+ bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+ )
+
+ created_at = datetime.now().isoformat()
+
+ photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+ results_df, scoring_data, nodata = get_epc_data(
+ data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds
+ )
+
+ # Store
+ # Old file was ha16.pickle
+ # import pickle
+ # with open("ha16_10_jan.pickle", "wb") as f:
+ # pickle.dump(
+ # {
+ # "scoring_data": scoring_data,
+ # "results": results_df,
+ # "nodata": nodata
+ # }, f
+ # )
+
+ # Read pickle
+ # import pickle
+ # with open("ha16_10_jan.pickle", "rb") as f:
+ # saved = pickle.load(f)
+ # scoring_data = saved["scoring_data"]
+ # results_df = saved["results"]
+ # nodata = saved["nodata"]
diff --git a/etl/eligibility/ha_15_32/ha24_app.py b/etl/eligibility/ha_15_32/ha24_app.py
new file mode 100644
index 00000000..dc4df018
--- /dev/null
+++ b/etl/eligibility/ha_15_32/ha24_app.py
@@ -0,0 +1,524 @@
+import os
+import msgpack
+import openpyxl
+from pathlib import Path
+from datetime import datetime
+import pandas as pd
+import numpy as np
+from utils.s3 import read_from_s3, read_dataframe_from_s3_parquet
+from utils.logger import setup_logger
+from dotenv import load_dotenv
+from tqdm import tqdm
+from backend.SearchEpc import SearchEpc
+from etl.eligibility.Eligibility import Eligibility
+from etl.eligibility.ha_15_32.app import prepare_model_data_row
+from etl.epc.DataProcessor import DataProcessor
+from etl.epc.settings import COLUMNS_TO_MERGE_ON
+from backend.ml_models.api import ModelApi
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+from recommendations.recommendation_utils import calculate_cavity_age
+from recommendation_utils import convert_thickness_to_numeric
+
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
+
+logger = setup_logger()
+load_dotenv(ENV_FILE)
+
+
+def load_data():
+ workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/HESTIA - HA 24 ASSET LIST.xlsx')
+ sheet = workbook.active
+ sheet_colnames = [cell.value for cell in sheet[1]]
+
+ rows_data = []
+ rows_colors = []
+ for row in sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
+ row_data = [cell.value for cell in row] # This will get you the cell values
+ row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
+ # row_color = COLOR_INDEX[row_color]
+ rows_data.append(row_data)
+ rows_colors.append(row_color)
+
+ asset_list = pd.DataFrame(rows_data, columns=sheet_colnames)
+ # Remove None columns
+ asset_list = asset_list.iloc[:, 0:10]
+ asset_list['row_color'] = rows_colors
+
+ asset_list["row_colour_name"] = np.where(
+ asset_list["row_color"] == "FFFF0000", "red",
+ np.where(asset_list["row_color"] == "FF92D050", "green", "yellow")
+ )
+
+ asset_list["row_colour_code"] = np.where(
+ asset_list["row_colour_name"] == "red", "does not meet criteria",
+ np.where(asset_list["row_colour_name"] == "green", "identified potential eco", "maybe in the future")
+ )
+
+ # The third column is listed as "Address" but it's actually the postcode". We have two Address columns so we
+ # change just the third
+ asset_list.columns.values[2] = "Postcode"
+
+ # Split up the address on commas, which is useful for matching later
+ split_addresses = asset_list['Address'].str.split(',', expand=True)
+ split_addresses.columns = ['temp', 'address2', 'address3', 'address4', 'address5', 'address6']
+
+ asset_list = pd.concat([asset_list, split_addresses], axis=1)
+ # There is no commas separating house number and address 1
+ split_addresses2 = asset_list['temp'].str.split(' ', expand=True)
+ split_addresses2.columns = ['HouseNo', 'part1', 'part2', "part3", "part4"]
+ # We could re-concatenate but we only care about HouseNo for the moment
+ asset_list = pd.concat([asset_list, split_addresses2[["HouseNo"]]], axis=1)
+
+ # Read in surveys
+ survey_workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/HESTIA - HA 24 ECO4 SURVEY LIST.xlsx')
+ survey_sheet = survey_workbook.active
+
+ survey_rows = []
+ survey_colors = []
+
+ for row in survey_sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
+ row_data = [cell.value for cell in row] # This will get you the cell values
+ row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
+ # row_color = COLOR_INDEX[row_color]
+ survey_rows.append(row_data)
+ survey_colors.append(row_color)
+
+ survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
+
+ survey_list["row_colour"] = survey_colors
+ survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(survey_list))]
+ # Tidy up the street/block name a bit
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("/", ", ")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.lower()
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.strip()
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "council house, nidds lane", "nidds lane"
+ )
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "wirral avenue", "wirrall avenue"
+ )
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "st ives road", "st. ives crescent"
+ )
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "sundringham road", "sandringham road"
+ )
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "milton avenue", "milton road"
+ )
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "st ives crescent", "st. ives crescent"
+ )
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "council house, waterbelly lane", "waterbelly lane"
+ )
+ # Generally remove "councile house, " from the start of the street name
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "council house, ", ""
+ )
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "st. leodegars close", "st leodegars close"
+ )
+
+ # asset_list[asset_list["Address"].str.lower().str.contains("wirral")]["Address"]
+
+ # Drop all None rows
+ survey_list = survey_list[~pd.isnull(survey_list["Street / Block Name"])]
+ survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(survey_list))]
+
+ matched = []
+ for _, row in tqdm(survey_list.iterrows(), total=len(survey_list)):
+ house_number = row["NO."]
+ if isinstance(house_number, str):
+ house_number = house_number.lower()
+
+ # Filter on the first line of the address
+ df = asset_list[asset_list["Address"].str.lower().str.contains(row["Street / Block Name"].lower())].copy()
+ # df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
+ df = df[df["Address"].str.lower().str.contains(str(house_number))]
+ if df.shape[0] != 1:
+ df = df[df["HouseNo"] == str(house_number)]
+ if df.shape[0] != 1:
+ df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
+ if df.shape[0] != 1:
+ print(row["Street / Block Name"])
+ print(house_number)
+ print(row["Post Code"].lower())
+ raise ValueError("Investigate")
+
+ matched.append(
+ {
+ "survey_key": row["survey_key"],
+ "matched_address": df["Address"].values[0],
+ "survey_house_no": row["NO."],
+ "survey_street_name": row["Street / Block Name"],
+ "survey_postcode": row["Post Code"],
+ "survey_status": row["INSTALLED OR CANCELLED"]
+ }
+ )
+
+ matched = pd.DataFrame(matched)
+ matched["warmfront_identified"] = True
+
+ # Combine asset list and surveys
+ data = asset_list.merge(
+ matched, how="left", left_on="Address", right_on="matched_address",
+ )
+ data["warmfront_identified"] = data["warmfront_identified"].fillna(False)
+
+ return data, survey_list
+
+
+def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
+ scoring_data = []
+ results = []
+ nodata = []
+
+ property_type_lookup = {
+ "01 HOUSE": "House",
+ "02 FLAT": "Flat",
+ "03 BUNGALOW": "Bungalow",
+ "05 BEDSIT": "Flat",
+ "04 MAISONETTE": "Maisonette",
+ "01 HOUSE MID": "House",
+ "10 PBUNGALOW": "Bungalow",
+ "14 SFLAT": "Flat",
+ "12 SBEDSIT": "Flat",
+ "11 PFLAT": "Flat",
+ "13 SBUNGALOW": "Bungalow",
+ " 01 HOUSE MID": "House",
+ "09 PBEDSIT": "Flat"
+ }
+
+ for _, property_meta in tqdm(data.iterrows(), total=len(data)):
+
+ searcher = SearchEpc(
+ address1=property_meta["HouseNo"],
+ postcode=property_meta["Postcode"],
+ auth_token=EPC_AUTH_TOKEN,
+ os_api_key=None,
+ full_address=property_meta["Address"]
+ )
+ searcher.ordnance_survey_client.property_type = property_type_lookup[property_meta["Property Type"]]
+ searcher.find_property(skip_os=True)
+
+ if searcher.newest_epc is None:
+ nodata.append(property_meta)
+ continue
+
+ newest_epc = searcher.newest_epc
+ older_epcs = searcher.older_epcs
+ full_sap_epc = searcher.full_sap_epc
+ # We also want to get the penultimate epc
+ penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
+ if not penultimate_epc:
+ penultimate_epc = newest_epc
+
+ eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
+ eligibility.check_gbis_warmfront()
+ eligibility.check_eco4_warmfront()
+
+ if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
+ eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
+ eligibility.check_gbis_warmfront()
+ eligibility.check_eco4_warmfront()
+ # If this is the case, we need to update the older epcs
+ # older_epcs = [
+ # x for x in older_epcs if x["lmk-key"] not in [newest_epc["lmk-key"], penultimate_epc["lmk-key"]]
+ # ]
+ # If this is the case, we need to update the older epcs
+ # We don't update just to make data cleaning easier
+ if penultimate_epc.get("estimated") is None:
+ older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
+
+ # Loft MUST be suitable
+ cavity_age = None
+ if (
+ eligibility.walls["is_cavity_wall"] and
+ eligibility.walls["is_filled_cavity"] and
+ eligibility.loft["suitability"] and
+ eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
+ ):
+ # We check the age of the cavity and if it's particularly old, we flag it
+ cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
+
+ # Full checks
+ eligibility.check_gbis()
+ eligibility.check_eco4()
+
+ if eligibility.eco4_warmfront["eligible"]:
+ if eligibility.epc["uprn"] in ["", None]:
+ eligibility.epc["uprn"] = int(property_meta["row_id"].split("_")[1])
+
+ scoring_dictionary = prepare_model_data_row(
+ property_id=property_meta["row_id"],
+ modelling_epc=eligibility.epc,
+ cleaned=cleaned,
+ cleaning_data=cleaning_data,
+ created_at=created_at,
+ old_data=older_epcs,
+ full_sap_epc=full_sap_epc,
+ photo_supply_lookup=photo_supply_lookup,
+ floor_area_decile_thresholds=floor_area_decile_thresholds
+ )
+ scoring_data.extend(scoring_dictionary)
+
+ results.append(
+ {
+ "row_id": property_meta["row_id"],
+ "uprn": eligibility.epc["uprn"],
+ "Address": property_meta["Address"],
+ "Postcode": property_meta["Postcode"],
+ "property_type": eligibility.epc["property-type"],
+ "gbis_eligible": eligibility.gbis_warmfront,
+ "eco4_eligible": eligibility.eco4_warmfront["eligible"],
+ "eco4_message": eligibility.eco4_warmfront["message"],
+ "sap": float(eligibility.epc["current-energy-efficiency"]),
+ "gbis_eligible_future": eligibility.gbis["eligible"],
+ "gbis_eligible_future_message": eligibility.gbis["message"],
+ "eco4_eligible_future": eligibility.eco4["eligible"],
+ "eco4_eligible_future_message": eligibility.eco4["message"],
+ # Property components
+ "roof": eligibility.roof["clean_description"],
+ "walls": eligibility.walls["clean_description"],
+ "cavity_type": eligibility.cavity["type"],
+ "heating": eligibility.epc["mainheat-description"],
+ "tenure": eligibility.tenure,
+ "date_epc": eligibility.epc["lodgement-date"],
+ "cavity_age": cavity_age,
+ **eligibility.walls,
+ **eligibility.roof,
+ }
+ )
+
+ scoring_df = pd.DataFrame(scoring_data)
+
+ # Perform the same cleaning as in the model - first clean number of room variables though
+ scoring_df = DataProcessor.apply_averages_cleaning(
+ data_to_clean=scoring_df,
+ cleaning_data=cleaning_data,
+ cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
+ colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
+ )
+
+ scoring_df = DataProcessor.apply_averages_cleaning(
+ data_to_clean=scoring_df,
+ cleaning_data=cleaning_data,
+ cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
+ ).drop(columns=["LOCAL_AUTHORITY"])
+
+ scoring_df = DataProcessor.clean_missings_after_description_process(
+ scoring_df,
+ ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
+ "insulation_thickness" in c) or ("ENERGY_EFF" in c)]
+ )
+
+ scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
+ scoring_df["UPRN"] = scoring_df["UPRN"].astype(int)
+
+ model_api = ModelApi(portfolio_id="ha24-eligibility", timestamp=created_at)
+ all_predictions = model_api.predict_all(
+ df=scoring_df,
+ bucket="retrofit-data-dev",
+ prediction_buckets={
+ "sap_change_predictions": "retrofit-sap-predictions-dev",
+ "heat_demand_predictions": "retrofit-heat-predictions-dev",
+ "carbon_change_predictions": "retrofit-carbon-predictions-dev"
+ }
+ )
+
+ predictions = all_predictions["sap_change_predictions"].copy()
+
+ results_df = pd.DataFrame(results)
+
+ predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
+ results_df[["row_id", "sap"]], how="left", on="row_id"
+ )
+ predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
+ predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
+
+ results_df = results_df.merge(
+ predictions[["sap_uplift", "row_id"]],
+ how="left",
+ on="row_id"
+ )
+ results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
+
+ eligibility_assessment = []
+ for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
+ # The upgrade requirements are dependent on the current SAP
+
+ # If the property is an F or G, it only needs to upgrade to an %
+ if row["sap"] <= 38:
+ if row["post_install_sap"] >= 57:
+ eligibility_classification = "highest confidence"
+ elif row["post_install_sap"] >= 55:
+ eligibility_classification = "high confidence"
+ elif row["post_install_sap"] >= 53:
+ eligibility_classification = "medium confidence"
+ else:
+ eligibility_classification = "unlikely"
+ else:
+
+ if row["post_install_sap"] >= 71:
+ eligibility_classification = "highest confidence"
+ elif row["post_install_sap"] >= 69:
+ eligibility_classification = "high confidence"
+ elif row["post_install_sap"] >= 67:
+ eligibility_classification = "medium confidence"
+ else:
+ eligibility_classification = "unlikely"
+
+ eligibility_assessment.append(
+ {
+ "row_id": row["row_id"],
+ "eligibility_classification": eligibility_classification
+ }
+ )
+
+ eligibility_assessment = pd.DataFrame(eligibility_assessment)
+
+ results_df = results_df.merge(
+ eligibility_assessment, how="left", on="row_id"
+ )
+ return results_df, scoring_data, nodata
+
+
+def analyse_results(results_df, data, survey_list):
+ analysis_data = data[["row_id", "survey_key", "warmfront_identified"]].merge(
+ results_df, how="left", on="row_id"
+ ).merge(
+ survey_list[["survey_key", survey_list.columns[0]]].rename(columns={survey_list.columns[0]: "funding_scheme"}),
+ how="left", on="survey_key"
+ )
+
+ # NEW
+
+ analysis_data["roof_insulation_thickness"] = np.where(
+ pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
+ )
+ analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
+ lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
+ )
+
+ warmfront_sold_eco4 = analysis_data[
+ (analysis_data["warmfront_identified"] == True) & (
+ analysis_data["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"]))
+ ]
+
+ warmfront_sold_gbis = analysis_data[
+ (analysis_data["warmfront_identified"] == True) & (
+ analysis_data["funding_scheme"].isin(["ECO4 GBIS (ECO+)"]))
+ ]
+ # 1407
+
+ additional_eco4_warmfront_not_sold = analysis_data[
+ (analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
+ analysis_data["roof_insulation_thickness_numeric"] <= 100)
+ ]
+
+ additional_gbis_warmfront_not_sold = analysis_data[
+ (analysis_data["gbis_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
+ ~analysis_data["row_id"].isin(additional_eco4_warmfront_not_sold["row_id"].values)
+ )
+ ]
+
+ additional_gbis_warmfront_not_sold["walls"].value_counts()
+ analysis_data["walls"].value_counts()
+
+ # END NEW
+
+ all_identified_eco = analysis_data[
+ (analysis_data["warmfront_identified"] & analysis_data["funding_scheme"].isin(
+ ["ECO4 A/W"])) |
+ (analysis_data["eco4_eligible"])
+ ]
+
+ all_identified_gbis = analysis_data[
+ (analysis_data["warmfront_identified"] & analysis_data["funding_scheme"].isin(
+ ["ECO4 GBIS (ECO+)"])) |
+ (analysis_data["gbis_eligible"] & analysis_data["eco4_eligible"].isin([False, None]))
+ ]
+
+ warmfront_identified = analysis_data[analysis_data["warmfront_identified"]]
+
+ # Of the ECO jobs, what proportion to we get right
+ warmfront_identified_eco = warmfront_identified[
+ warmfront_identified["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"])
+ ]
+
+ eco_success_rate = warmfront_identified_eco["eco4_eligible"].sum() / warmfront_identified_eco.shape[0]
+
+ warmfront_identified_gbis = warmfront_identified[
+ warmfront_identified["funding_scheme"].isin(["ECO4 GBIS (ECO+)"])
+ ]
+
+ # No gbis for this
+ # gbis_success_rate = warmfront_identified_gbis["gbis_eligible"].sum() / warmfront_identified_gbis.shape[0]
+
+ # Additional identified
+ additional_identified_eco = analysis_data[
+ (analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False)
+ ]
+
+ additional_identified_eco["eligibility_classification"].value_counts()
+
+ additional_identified_gbis = analysis_data[
+ (analysis_data["gbis_eligible"] == True) & (analysis_data["eco4_eligible"] == False) & (
+ analysis_data["warmfront_identified"] == False
+ )
+ ].shape[0]
+ # Future
+ additional_identified_eco_future = analysis_data[
+ (analysis_data["eco4_eligible_future"] == True) & (analysis_data["warmfront_identified"] == False)
+ ].shape[0]
+ additional_identified_gbis_future = analysis_data[
+ (analysis_data["gbis_eligible_future"] == True) & (analysis_data["eco4_eligible_future"] == False) & (
+ analysis_data["warmfront_identified"] == False
+ )
+ ].shape[0]
+
+
+def app():
+ data, survey_list = load_data()
+
+ data["row_id"] = ["ha24_" + str(i) for i in range(0, len(data))]
+
+ cleaned = read_from_s3(
+ s3_file_name="cleaned_epc_data/cleaned.bson",
+ bucket_name="retrofit-data-dev"
+ )
+ cleaned = msgpack.unpackb(cleaned, raw=False)
+
+ cleaning_data = read_dataframe_from_s3_parquet(
+ bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+ )
+
+ created_at = datetime.now().isoformat()
+
+ photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+ results_df, scoring_data, nodata = get_epc_data(
+ data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds
+ )
+
+ # Pickle results just in case
+ # import pickle
+ # with open("ha24_10_jan.pickle", "wb") as f:
+ # pickle.dump(
+ # {
+ # "scoring_data": scoring_data,
+ # "results": results_df,
+ # "nodata": nodata
+ # }, f
+ # )
+
+ # Read in pickle
+ # import pickle
+ # with open("ha24_10_jan.pickle", "rb") as f:
+ # saved = pickle.load(f)
+ # scoring_data = saved["scoring_data"]
+ # results_df = saved["results"]
+ # nodata = saved["nodata"]
diff --git a/etl/eligibility/ha_15_32/ha25_app.py b/etl/eligibility/ha_15_32/ha25_app.py
new file mode 100644
index 00000000..7dd36726
--- /dev/null
+++ b/etl/eligibility/ha_15_32/ha25_app.py
@@ -0,0 +1,883 @@
+import os
+import msgpack
+import openpyxl
+from pathlib import Path
+from datetime import datetime
+import pandas as pd
+import numpy as np
+from utils.s3 import read_from_s3
+from utils.logger import setup_logger
+from dotenv import load_dotenv
+from utils.s3 import read_dataframe_from_s3_parquet
+from tqdm import tqdm
+from backend.SearchEpc import SearchEpc
+from etl.eligibility.Eligibility import Eligibility
+from etl.eligibility.ha_15_32.app import prepare_model_data_row
+from etl.epc.DataProcessor import DataProcessor
+from etl.epc.settings import COLUMNS_TO_MERGE_ON
+from backend.ml_models.api import ModelApi
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+from recommendations.recommendation_utils import calculate_cavity_age
+from recommendation_utils import convert_thickness_to_numeric
+
+import re
+
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
+
+logger = setup_logger()
+load_dotenv(ENV_FILE)
+
+
+def load_data():
+ workbook = openpyxl.load_workbook('etl/eligibility/ha_15_32/HESTIA - HA 25 ASSET LIST.xlsx', data_only=True)
+ sheet = workbook.active
+
+ rows_data = []
+ rows_colors = []
+ for row in sheet.iter_rows(min_row=1, values_only=True): # use values_only=True to get values
+
+ row_data = list(row) # No need for comprehension, values_only=True returns a tuple of values
+ rows_data.append(row_data)
+
+ # Headers are on the final row. Pop them off and store them and then remove them from rows_data
+ headers = rows_data.pop()
+ # The postcode header is None, so we replace it with "postcode"
+ headers[-1] = "postcode"
+
+ # Handle colours separately
+ for row in sheet.iter_rows(min_row=1, values_only=False):
+ # Assume first cell color is indicative of entire row
+ row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
+ rows_colors.append(row_color)
+
+ # Remove the final row of colours, which is the header
+ rows_colors.pop()
+
+ asset_list = pd.DataFrame(rows_data, columns=headers)
+ asset_list['row_color'] = rows_colors
+
+ asset_list["row_colour_name"] = np.where(
+ asset_list["row_color"] == "FFFF0000", "red",
+ np.where(asset_list["row_color"] == "FF00B050", "green", "yellow")
+ )
+
+ asset_list["row_colour_code"] = np.where(
+ asset_list["row_colour_name"] == "red", "does not meet criteria",
+ np.where(asset_list["row_colour_name"] == "green", "identified potential eco", "maybe in the future")
+ )
+
+ asset_list["address"] = asset_list["T1_Address"].copy().str.lower()
+ asset_list["address"] = asset_list["address"].str.replace("flat", "")
+ asset_list["address"] = asset_list["address"].str.strip()
+
+ split_addresses = asset_list['address'].str.split(' ', expand=True)
+ split_addresses.columns = ['HouseNo', 'address2', 'address3', 'address4', 'address5', 'address6', 'address7',
+ 'address8',
+ 'address9', 'address10', 'address11', 'address12', 'address13', 'address14', ]
+ split_addresses["HouseNo"] = split_addresses["HouseNo"].str.replace(";", "")
+
+ # We could re-concatenate but we only care about HouseNo for the moment
+ asset_list = pd.concat([asset_list, split_addresses[["HouseNo"]]], axis=1)
+ asset_list["postcode"] = asset_list["postcode"].str.strip()
+
+ # We analysis historical ECO3 survey list
+ eco3_survey_workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/HESTIA - HA 25 ECO3 SURVEY LIST.xlsx')
+ eco3_survey_sheet = eco3_survey_workbook["CAVITY"]
+
+ eco3_survey_rows = []
+ eco3_survey_colors = []
+
+ for row in eco3_survey_sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
+ row_data = [cell.value for cell in row] # This will get you the cell values
+ row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
+ # row_color = COLOR_INDEX[row_color]
+ eco3_survey_rows.append(row_data)
+ eco3_survey_colors.append(row_color)
+
+ # Some adhoc analysis on the eco3 survey list, just to get completion and cancellation rates historically
+ eco3_survey_list = pd.DataFrame(eco3_survey_rows, columns=[cell.value for cell in eco3_survey_sheet[1]])
+ eco3_survey_list["row_colour"] = eco3_survey_colors
+ # Remove rows where street name is missing
+ eco3_survey_list = eco3_survey_list[~pd.isnull(eco3_survey_list["Street / Block Name"])]
+ # We need to parse the row colours
+ # We have the following mappings:
+ # FF7030A0: purple
+ # FF92D050: green
+ # FFFF0000: red
+ # FFFFFF00: yellow
+ # FF38FD23: green
+ eco3_survey_list["row_colour_name"] = np.where(
+ eco3_survey_list["row_colour"] == "FF7030A0", "purple",
+ np.where(eco3_survey_list["row_colour"] == "FF92D050", "green",
+ np.where(eco3_survey_list["row_colour"] == "FFFF0000", "red",
+ np.where(eco3_survey_list["row_colour"] == "FFFFFF00", "yellow",
+ np.where(eco3_survey_list["row_colour"] == "FF38FD23", "green", "unknown")
+ )
+ )
+ )
+ )
+
+ # We map the meaning:
+ # red: cancelled
+ # green: installed advised install complete
+ # purple: installer advised install complete + post works EPC
+ # yellow: filler row - drop
+ eco3_survey_list["row_colour_code"] = np.where(
+ eco3_survey_list["row_colour_name"] == "red", "cancelled",
+ np.where(eco3_survey_list["row_colour_name"] == "green", "installed advised install complete",
+ np.where(eco3_survey_list["row_colour_name"] == "purple",
+ "installer advised install complete + post works EPC",
+ np.where(eco3_survey_list["row_colour_name"] == "yellow", "filler row - drop", "unknown")
+ )
+ )
+ )
+
+ # This is good enough for the indicative cancellation rates
+
+ # We now read in the indicative survey list which identified pospects for ECO4 works
+ eco4_survey_workbook = openpyxl.load_workbook(
+ f'etl/eligibility/ha_15_32/HESTIA - HA 25 ADHOC ISOLATED IDENTIFIED PROPERTIES FOR CWI.xlsx'
+ )
+ eco4_prospect_survey_sheet = eco4_survey_workbook["LiveWest"]
+
+ eco4_prospects_survey_rows = []
+ eco4_prospects_survey_colors = []
+
+ for row in eco4_prospect_survey_sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
+ row_data = [cell.value for cell in row] # This will get you the cell values
+ row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
+ # row_color = COLOR_INDEX[row_color]
+ eco4_prospects_survey_rows.append(row_data)
+ eco4_prospects_survey_colors.append(row_color)
+
+ # Some adhoc analysis on the eco3 survey list, just to get completion and cancellation rates historically
+ eco4_prospects_survey_list = pd.DataFrame(
+ eco4_prospects_survey_rows, columns=[cell.value for cell in eco4_prospect_survey_sheet[1]]
+ )
+ eco4_prospects_survey_list["row_colour"] = eco4_prospects_survey_colors
+
+ eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.lower()
+ eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.strip()
+
+ eco4_prospects_survey_list = eco4_prospects_survey_list[~pd.isnull(eco4_prospects_survey_list["ADDRESS 1"])]
+ eco4_prospects_survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(eco4_prospects_survey_list))]
+
+ # Correct some errors in the survey list
+ eco4_prospects_survey_list["POSTCODE"] = np.where(
+ (eco4_prospects_survey_list["ADDRESS 1"] == "berry park") &
+ (eco4_prospects_survey_list["POSTCODE"] == "PL12 6HP"),
+ "PL12 6EN",
+ eco4_prospects_survey_list["POSTCODE"]
+ )
+
+ # Remove semi colons from address in asset and survey list
+ asset_list["T1_Address"] = asset_list["T1_Address"].str.replace(";", "")
+ eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.replace(";", "")
+
+ # In the prosepcts survey list, we have 6 WALKHAM MEADOWS listed twice, which should be 6a and 6b
+ eco4_prospects_survey_list.loc[838, "NO"] = "6a"
+ eco4_prospects_survey_list.loc[839, "NO"] = "6b"
+
+ # 3, 7, 9 BOLDVENTURE ROAD should be BOLDVENTURE CLOSE
+ eco4_prospects_survey_list["ADDRESS 1"] = np.where(
+ (eco4_prospects_survey_list["ADDRESS 1"] == "boldventure road") &
+ (eco4_prospects_survey_list["NO"].isin([3, 7, 9])),
+ "boldventure close",
+ eco4_prospects_survey_list["ADDRESS 1"]
+ )
+
+ eco4_prospects_survey_list["ADDRESS 1"] = np.where(
+ (eco4_prospects_survey_list["ADDRESS 1"] == "old farm road") & (
+ eco4_prospects_survey_list["POSTCODE"] == "PL5 1EP"),
+ "old school road",
+ eco4_prospects_survey_list["ADDRESS 1"]
+ )
+
+ eco4_prospects_survey_list["ADDRESS 1"] = np.where(
+ (eco4_prospects_survey_list["ADDRESS 1"] == "croft orchard") & (
+ eco4_prospects_survey_list["POSTCODE"] == "TQ12 6RP") & (
+ eco4_prospects_survey_list["NO"] == 52),
+ "drum way",
+ eco4_prospects_survey_list["ADDRESS 1"]
+ )
+
+ # String replace
+ eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.replace(
+ "the gulls, collaton road", "the gulls collaton road"
+ )
+ eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.replace(
+ "crows-an-eglose", "crows-an-eglos"
+ )
+
+ # We have a high volume of rows that do not match
+ matched = []
+ nomatch = []
+ for _, row in tqdm(eco4_prospects_survey_list.iterrows(), total=len(eco4_prospects_survey_list)):
+
+ # Not in the asset list
+ if (row["ADDRESS 1"] == "berry park") and row["NO"] in [40, 42] and row["POSTCODE"] == "PL12 6EN":
+ nomatch.append(row.to_dict())
+ continue
+
+ # Not in the asset list
+ if (row["ADDRESS 1"] == "roberts road") and row["NO"] == 23 and row["POSTCODE"] == "PL5 1DP":
+ nomatch.append(row.to_dict())
+ continue
+
+ # Not in the asset list
+ if row["ADDRESS 1"] in [
+ "kaynton mead", "broadmoor lane", "hoopers barton", "ecos court", "selwood road",
+ "castle street"
+ ]:
+ nomatch.append(row.to_dict())
+ continue
+
+ house_number = row["NO"]
+ if isinstance(house_number, str):
+ house_number = house_number.lower()
+
+ if "flat" in house_number:
+ house_number = house_number.split("flat")[1].strip()
+
+ # Filter on the first line of the address
+ df = asset_list[asset_list["T1_Address"].str.lower().str.contains(row["ADDRESS 1"].lower())].copy()
+ if house_number is not None:
+ if df.shape[0] != 1:
+ df = df[df["T1_Address"].str.lower().str.contains(str(house_number))]
+ if df.shape[0] != 1:
+ if house_number is not None:
+ df = df[df["HouseNo"] == str(house_number)]
+ if df.shape[0] != 1:
+ if row["POSTCODE"] is not None:
+ df = df[df["postcode"].str.lower().str.contains(row["POSTCODE"].lower())]
+ if df.shape[0] != 1:
+ nomatch.append(row.to_dict())
+ continue
+
+ matched.append(
+ {
+ "survey_key": row["survey_key"],
+ "matched_address": df["T1_Address"].values[0],
+ "survey_house_no": row["NO"],
+ "survey_street_name": row["ADDRESS 1"],
+ "survey_postcode": row["POSTCODE"],
+ }
+ )
+
+ nomatch = pd.DataFrame(nomatch)
+ matched = pd.DataFrame(matched)
+
+ matched["warmfront_identified"] = True
+
+ # Combine asset list and surveys
+ data = asset_list.merge(
+ matched, how="left", left_on="T1_Address", right_on="matched_address",
+ )
+ data["warmfront_identified"] = data["warmfront_identified"].fillna(False)
+
+ lost_identified_properties = eco4_prospects_survey_list[
+ ~eco4_prospects_survey_list["survey_key"].isin(matched["survey_key"])
+ ]
+
+ return data, eco4_prospects_survey_list, lost_identified_properties
+
+
+def map_year_to_age_band(year):
+ try:
+ year = int(year)
+ except ValueError:
+ return "Invalid Year" # Or any other way you want to handle invalid inputs
+
+ if year < 1900:
+ return "England and Wales: before 1900"
+ elif 1900 <= year <= 1929:
+ return "England and Wales: 1900-1929"
+ elif 1930 <= year <= 1949:
+ return "England and Wales: 1930-1949"
+ elif 1950 <= year <= 1966:
+ return "England and Wales: 1950-1966"
+ elif 1967 <= year <= 1975:
+ return "England and Wales: 1967-1975"
+ elif 1976 <= year <= 1982:
+ return "England and Wales: 1976-1982"
+ elif 1983 <= year <= 1990:
+ return "England and Wales: 1983-1990"
+ elif 1991 <= year <= 1995:
+ return "England and Wales: 1991-1995"
+ elif 1996 <= year <= 2002:
+ return "England and Wales: 1996-2002"
+ elif 2003 <= year <= 2006:
+ return "England and Wales: 2003-2006"
+ elif 2007 <= year <= 2011:
+ return "England and Wales: 2007-2011"
+ else: # Assuming all remaining years are 2012 onwards
+ return "England and Wales: 2012 onwards"
+
+
+def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
+ scoring_data = []
+ results = []
+ nodata = []
+
+ property_type_lookup = {
+ "Flat": {"property-type": "Flat", "built-form": None},
+ "Mid Terrace House": {"property-type": "House", "built-form": "Mid-Terrace"},
+ "End Terrace House": {"property-type": "House", "built-form": "End-Terrace"},
+ "Maisonnette": {"property-type": "Flat", "built-form": None},
+ "Semi Detached House": {"property-type": "House", "built-form": "Semi-Detached"},
+ "Detached House": {"property-type": "House", "built-form": "Detached"},
+ "Coach House": {"property-type": "House", "built-form": "Detached"},
+ "Bungalow": {"property-type": "Bungalow", "built-form": None},
+ "Detached Bungalow": {"property-type": "Bungalow", "built-form": "Detached"},
+ "House": {"property-type": "House", "built-form": None},
+ "Semi Detached Bung": {"property-type": "Bungalow", "built-form": "Semi-Detached"},
+ "Bedspace": {"property-type": None, "built-form": None},
+ "Office Buildings": {"property-type": None, "built-form": None},
+ "End Terrace Bungalow": {"property-type": "Bungalow", "built-form": "End-Terrace"},
+ "Mid Terrace Bungalow": {"property-type": "Bungalow", "built-form": "Mid-Terrace"},
+ "Bedsit": {"property-type": "Flat", "built-form": None},
+ "Mid Terrace Housekeeping": {"property-type": "House", "built-form": "Mid-Terrace"},
+ "Mid Terrace Housekeeping ": {"property-type": "House", "built-form": "Mid-Terrace"},
+ "End Terrace Housex": {"property-type": "House", "built-form": "End-Terrace"},
+ "Guest Room": {"property-type": None, "built-form": None}
+ }
+
+ for _, property_meta in tqdm(data, total=len(data)):
+
+ searcher = SearchEpc(
+ address1=property_meta["HouseNo"],
+ postcode=property_meta["postcode"],
+ auth_token=EPC_AUTH_TOKEN,
+ os_api_key=None,
+ full_address=property_meta["address"]
+ )
+ searcher.ordnance_survey_client.property_type = property_type_lookup[property_meta["T1_AssetType"]][
+ "property-type"]
+ searcher.ordnance_survey_client.built_form = property_type_lookup[property_meta["T1_AssetType"]]["built-form"]
+ searcher.find_property(skip_os=True)
+
+ if searcher.newest_epc is None:
+ nodata.append(property_meta)
+ continue
+
+ if searcher.newest_epc.get("estimated"):
+ # We insert the row ID as our proxy for UPRN
+ proxy_uprn = int(property_meta["row_id"].split("_")[1])
+ searcher.newest_epc["uprn"] = proxy_uprn
+
+ newest_epc = searcher.newest_epc
+ older_epcs = searcher.older_epcs
+ full_sap_epc = searcher.full_sap_epc
+ # We also want to get the penultimate epc
+ # penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
+ # if not penultimate_epc:
+ # penultimate_epc = newest_epc
+
+ eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
+ eligibility.check_gbis_warmfront()
+ eligibility.check_eco4_warmfront()
+
+ # if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
+ # eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
+ # eligibility.check_gbis_warmfront()
+ # eligibility.check_eco4_warmfront()
+ # # If this is the case, we need to update the older epcs
+ # # We don't update just to make data cleaning easier
+ # if penultimate_epc.get("estimated") is None:
+ # older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
+
+ # If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity
+
+ # Loft MUST be suitable
+ cavity_age = None
+ if (
+ eligibility.walls["is_cavity_wall"] and
+ eligibility.walls["is_filled_cavity"] and
+ eligibility.loft["suitability"] and
+ eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
+ ):
+ # We check the age of the cavity and if it's particularly old, we flag it
+ cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
+
+ # Full checks
+ eligibility.check_gbis()
+ eligibility.check_eco4()
+
+ if eligibility.eco4_warmfront["eligible"]:
+ if eligibility.epc["uprn"] in ["", None]:
+ eligibility.epc["uprn"] = int(property_meta["row_id"].split("_")[1])
+
+ if eligibility.epc["construction-age-band"] in ["", None]:
+ eligibility.epc["construction-age-band"] = map_year_to_age_band(property_meta["Build Yr"])
+
+ # This is not the right place to do this but this is temp
+ if eligibility.epc["extension-count"] in ["", None]:
+ eligibility.epc["extension-count"] = 0
+
+ # Not in the right place but temp
+ if eligibility.epc["built-form"] in ["", None]:
+ if not older_epcs:
+ eligibility.epc["built-form"] = "Mid-Terrace"
+
+ scoring_dictionary = prepare_model_data_row(
+ property_id=property_meta["row_id"],
+ modelling_epc=eligibility.epc,
+ cleaned=cleaned,
+ cleaning_data=cleaning_data,
+ created_at=created_at,
+ old_data=older_epcs,
+ full_sap_epc=full_sap_epc,
+ photo_supply_lookup=photo_supply_lookup,
+ floor_area_decile_thresholds=floor_area_decile_thresholds,
+ )
+ scoring_data.extend(scoring_dictionary)
+
+ results.append(
+ {
+ "row_id": property_meta["row_id"],
+ "uprn": eligibility.epc["uprn"],
+ "Address": property_meta["T1_Address"],
+ "Postcode": property_meta["postcode"],
+ "property_type": eligibility.epc["property-type"],
+ "gbis_eligible": eligibility.gbis_warmfront,
+ "eco4_eligible": eligibility.eco4_warmfront["eligible"],
+ "eco4_message": eligibility.eco4_warmfront["message"],
+ "sap": float(eligibility.epc["current-energy-efficiency"]),
+ "gbis_eligible_future": eligibility.gbis["eligible"],
+ "gbis_eligible_future_message": eligibility.gbis["message"],
+ "eco4_eligible_future": eligibility.eco4["eligible"],
+ "eco4_eligible_future_message": eligibility.eco4["message"],
+ # Property components
+ "roof": eligibility.roof["clean_description"],
+ "walls": eligibility.walls["clean_description"],
+ "cavity_type": eligibility.cavity["type"],
+ "heating": eligibility.epc["mainheat-description"],
+ "tenure": eligibility.tenure,
+ "date_epc": eligibility.epc["lodgement-date"],
+ "cavity_age": cavity_age,
+ **eligibility.walls,
+ **eligibility.roof,
+ }
+ )
+
+ scoring_df = pd.DataFrame(scoring_data)
+
+ # Perform the same cleaning as in the model - first clean number of room variables though
+ scoring_df = DataProcessor.apply_averages_cleaning(
+ data_to_clean=scoring_df,
+ cleaning_data=cleaning_data,
+ cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
+ colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
+ )
+
+ scoring_df = DataProcessor.apply_averages_cleaning(
+ data_to_clean=scoring_df,
+ cleaning_data=cleaning_data,
+ cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
+ ).drop(columns=["LOCAL_AUTHORITY"])
+
+ scoring_df = DataProcessor.clean_missings_after_description_process(
+ scoring_df,
+ ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
+ "insulation_thickness" in c) or ("ENERGY_EFF" in c)]
+ )
+
+ scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
+ scoring_df["UPRN"] = scoring_df["UPRN"].astype(int)
+
+ model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
+ all_predictions = model_api.predict_all(
+ df=scoring_df,
+ bucket="retrofit-data-dev",
+ prediction_buckets={
+ "sap_change_predictions": "retrofit-sap-predictions-dev",
+ "heat_demand_predictions": "retrofit-heat-predictions-dev",
+ "carbon_change_predictions": "retrofit-carbon-predictions-dev"
+ }
+ )
+
+ predictions = all_predictions["sap_change_predictions"].copy()
+
+ results_df = pd.DataFrame(results)
+
+ predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
+ results_df[["row_id", "sap"]], how="left", on="row_id"
+ )
+ predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
+ predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
+
+ results_df = results_df.merge(
+ predictions[["sap_uplift", "row_id"]],
+ how="left",
+ on="row_id"
+ )
+ results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
+
+ eligibility_assessment = []
+ for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
+ # The upgrade requirements are dependent on the current SAP
+
+ # If the property is an F or G, it only needs to upgrade to an %
+ if row["sap"] <= 38:
+ if row["post_install_sap"] >= 57:
+ eligibility_classification = "highest confidence"
+ elif row["post_install_sap"] >= 55:
+ eligibility_classification = "high confidence"
+ elif row["post_install_sap"] >= 53:
+ eligibility_classification = "medium confidence"
+ else:
+ eligibility_classification = "unlikely"
+ else:
+
+ if row["post_install_sap"] >= 71:
+ eligibility_classification = "highest confidence"
+ elif row["post_install_sap"] >= 69:
+ eligibility_classification = "high confidence"
+ elif row["post_install_sap"] >= 67:
+ eligibility_classification = "medium confidence"
+ else:
+ eligibility_classification = "unlikely"
+
+ eligibility_assessment.append(
+ {
+ "row_id": row["row_id"],
+ "eligibility_classification": eligibility_classification
+ }
+ )
+
+ eligibility_assessment = pd.DataFrame(eligibility_assessment)
+
+ results_df = results_df.merge(
+ eligibility_assessment, how="left", on="row_id"
+ )
+ return results_df, scoring_data, nodata
+
+
+def get_epc_data_for_lost_surveys(
+ lost_identified_properties, cleaned, cleaning_data, created_at, photo_supply_lookup,
+ floor_area_decile_thresholds
+):
+ lost_identified_properties["row_id"] = [
+ "lost_surveys_ha25_" + str(i) for i in range(0, len(lost_identified_properties))
+ ]
+
+ scoring_data = []
+ results = []
+ nodata = []
+
+ property_type_lookup = {
+ "MID-TERRACE": {"property-type": "House", "built-form": "Mid-Terrace"},
+ "N/A": {"property-type": "House", "built-form": None},
+ "END-TERRACE": {"property-type": "House", "built-form": "End-Terrace"},
+ "GROUND-FLOOR": {"property-type": "House", "built-form": None},
+ "TOP-FLOOR": {"property-type": "House", "built-form": None},
+ "SEMI-DETACHED": {"property-type": "House", "built-form": "Semi-Detached"},
+ "MID-FLOOR": {"property-type": "House", "built-form": None},
+ "TOP-FLOOR FLAT": {"property-type": "House", "built-form": None},
+ "DETACHED": {"property-type": "House", "built-form": "Detached"},
+ "MID-FLOOR FLAT": {"property-type": "House", "built-form": None},
+ "SEMI- DETACHED": {"property-type": "House", "built-form": "Semi-Detached"},
+ "NO EPC ON GOV": {"property-type": "House", "built-form": None},
+ "Top-floor flat": {"property-type": "House", "built-form": None},
+ "GROUND-FLOOR FLAT": {"property-type": "House", "built-form": None},
+ "NOT ON GOV SITE": {"property-type": "House", "built-form": None}
+ }
+
+ for _, property_meta in tqdm(lost_identified_properties.iterrows(), total=len(lost_identified_properties)):
+
+ if property_meta["POSTCODE"] is None:
+ continue
+
+ full_address = ", ".join(
+ [str(x) for x in [
+ property_meta["NO"], property_meta["ADDRESS 1"], property_meta["ADDRESS 2"], property_meta["ADDRESS 3"]
+ ] if x is not None]
+ )
+
+ searcher = SearchEpc(
+ address1=str(property_meta["NO"]),
+ postcode=property_meta["POSTCODE"],
+ auth_token=EPC_AUTH_TOKEN,
+ os_api_key=None,
+ full_address=full_address
+ )
+
+ property_type_key = property_meta["PROPERTY TYPE"]
+ if property_type_key is not None:
+ searcher.ordnance_survey_client.property_type = property_type_lookup[property_type_key.strip()][
+ "property-type"]
+ searcher.ordnance_survey_client.built_form = property_type_lookup[property_type_key.strip()][
+ "built-form"]
+ searcher.find_property(skip_os=True)
+
+ if searcher.newest_epc is None:
+ nodata.append(property_meta)
+ continue
+
+ if searcher.newest_epc.get("estimated"):
+ # We insert the row ID as our proxy for UPRN
+ proxy_uprn = int(property_meta["row_id"].split("_")[-1])
+ searcher.newest_epc["uprn"] = proxy_uprn
+
+ newest_epc = searcher.newest_epc
+ older_epcs = searcher.older_epcs
+ full_sap_epc = searcher.full_sap_epc
+ # We also want to get the penultimate epc
+ penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
+ if not penultimate_epc:
+ penultimate_epc = newest_epc
+
+ eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
+ eligibility.check_gbis_warmfront()
+ eligibility.check_eco4_warmfront()
+
+ if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
+ eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
+ eligibility.check_gbis_warmfront()
+ eligibility.check_eco4_warmfront()
+ # If this is the case, we need to update the older epcs
+ # We don't update just to make data cleaning easier
+ if penultimate_epc.get("estimated") is None:
+ older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
+
+ # Full checks
+ eligibility.check_gbis()
+ eligibility.check_eco4()
+
+ if eligibility.eco4_warmfront["eligible"] & (eligibility.epc["construction-age-band"] not in ["", None]):
+ if eligibility.epc["uprn"] in ["", None]:
+ eligibility.epc["uprn"] = int(property_meta["row_id"].split("_")[1])
+
+ scoring_dictionary = prepare_model_data_row(
+ property_id=property_meta["row_id"],
+ modelling_epc=eligibility.epc,
+ cleaned=cleaned,
+ cleaning_data=cleaning_data,
+ created_at=created_at,
+ old_data=older_epcs,
+ full_sap_epc=full_sap_epc,
+ photo_supply_lookup=photo_supply_lookup,
+ floor_area_decile_thresholds=floor_area_decile_thresholds,
+ )
+ scoring_data.extend(scoring_dictionary)
+
+ results.append(
+ {
+ "row_id": property_meta["row_id"],
+ "uprn": eligibility.epc["uprn"],
+ "Address": property_meta["ADDRESS 1"],
+ "Postcode": property_meta["POSTCODE"],
+ "property_type": eligibility.epc["property-type"],
+ "gbis_eligible": eligibility.gbis_warmfront,
+ "eco4_eligible": eligibility.eco4_warmfront["eligible"],
+ "eco4_message": eligibility.eco4_warmfront["message"],
+ "sap": float(eligibility.epc["current-energy-efficiency"]),
+ "gbis_eligible_future": eligibility.gbis["eligible"],
+ "gbis_eligible_future_message": eligibility.gbis["message"],
+ "eco4_eligible_future": eligibility.eco4["eligible"],
+ "eco4_eligible_future_message": eligibility.eco4["message"],
+ # Property components
+ "roof": eligibility.roof["clean_description"],
+ "walls": eligibility.walls["clean_description"],
+ "cavity_type": eligibility.cavity["type"],
+ "heating": eligibility.epc["mainheat-description"],
+ "tenure": eligibility.tenure,
+ "date_epc": eligibility.epc["lodgement-date"],
+ **eligibility.walls,
+ **eligibility.roof,
+ }
+ )
+
+ scoring_df = pd.DataFrame(scoring_data)
+
+ # Perform the same cleaning as in the model - first clean number of room variables though
+ scoring_df = DataProcessor.apply_averages_cleaning(
+ data_to_clean=scoring_df,
+ cleaning_data=cleaning_data,
+ cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
+ colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
+ )
+
+ scoring_df = DataProcessor.apply_averages_cleaning(
+ data_to_clean=scoring_df,
+ cleaning_data=cleaning_data,
+ cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
+ ).drop(columns=["LOCAL_AUTHORITY"])
+
+ scoring_df = DataProcessor.clean_missings_after_description_process(
+ scoring_df,
+ ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
+ "insulation_thickness" in c) or ("ENERGY_EFF" in c)]
+ )
+
+ scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
+ scoring_df["UPRN"] = scoring_df["UPRN"].astype(int)
+
+ model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
+ all_predictions = model_api.predict_all(
+ df=scoring_df,
+ bucket="retrofit-data-dev",
+ prediction_buckets={
+ "sap_change_predictions": "retrofit-sap-predictions-dev",
+ "heat_demand_predictions": "retrofit-heat-predictions-dev",
+ "carbon_change_predictions": "retrofit-carbon-predictions-dev"
+ }
+ )
+
+ predictions = all_predictions["sap_change_predictions"].copy()
+
+ results_df = pd.DataFrame(results)
+
+ predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
+ results_df[["row_id", "sap"]], how="left", on="row_id"
+ )
+ predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
+ predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
+
+ results_df = results_df.merge(
+ predictions[["sap_uplift", "row_id"]],
+ how="left",
+ on="row_id"
+ )
+ results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
+
+ eligibility_assessment = []
+ for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
+ # The upgrade requirements are dependent on the current SAP
+
+ # If the property is an F or G, it only needs to upgrade to an %
+ if row["sap"] <= 38:
+ if row["post_install_sap"] >= 57:
+ eligibility_classification = "highest confidence"
+ elif row["post_install_sap"] >= 55:
+ eligibility_classification = "high confidence"
+ elif row["post_install_sap"] >= 53:
+ eligibility_classification = "medium confidence"
+ else:
+ eligibility_classification = "unlikely"
+ else:
+
+ if row["post_install_sap"] >= 71:
+ eligibility_classification = "highest confidence"
+ elif row["post_install_sap"] >= 69:
+ eligibility_classification = "high confidence"
+ elif row["post_install_sap"] >= 67:
+ eligibility_classification = "medium confidence"
+ else:
+ eligibility_classification = "unlikely"
+
+ eligibility_assessment.append(
+ {
+ "row_id": row["row_id"],
+ "eligibility_classification": eligibility_classification
+ }
+ )
+
+ eligibility_assessment = pd.DataFrame(eligibility_assessment)
+
+ results_df = results_df.merge(
+ eligibility_assessment, how="left", on="row_id"
+ )
+ return results_df, scoring_data, nodata
+
+
+def analyse_results(results_df, data, eco4_prospects_survey_list):
+ analysis_data = data[["row_id", "survey_key", "warmfront_identified"]].merge(
+ results_df, how="left", on="row_id"
+ )
+
+ analysis_data = analysis_data.merge(
+ eco4_prospects_survey_list[["survey_key", "ADDRESS 1", "NO", "POSTCODE"]],
+ how="left", on="survey_key"
+ )
+
+ # NEW
+ analysis_data["roof_insulation_thickness"] = np.where(
+ pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
+ )
+ analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
+ lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
+ )
+
+ warmfront_identified = analysis_data[
+ (analysis_data["warmfront_identified"] == True)
+ ] # 2204
+
+ # Because we don't know which property is for which scheme, we'll just look at what we found
+ ideal_eco4 = analysis_data[
+ (analysis_data["eco4_eligible"] == True) &
+ (analysis_data["roof_insulation_thickness_numeric"] <= 100) &
+ (analysis_data["sap"] <= 54)
+ ] # 335
+
+ gbis = analysis_data[
+ (analysis_data["gbis_eligible"] == True) &
+ ~analysis_data["row_id"].isin(ideal_eco4["row_id"].values)
+ ]
+
+ ideal_eco4 = ideal_eco4[ideal_eco4["sap"] <= 54]
+
+
+def analyse_lost_surveys(results_df):
+ results_df["roof_insulation_thickness"] = np.where(
+ pd.isnull(results_df["roof_insulation_thickness"]), None, results_df["roof_insulation_thickness"]
+ )
+ results_df["roof_insulation_thickness_numeric"] = results_df["roof_insulation_thickness"].apply(
+ lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
+ )
+
+ ideal_eco4 = results_df[
+ (results_df["eco4_eligible"] == True) &
+ (results_df["roof_insulation_thickness_numeric"] <= 100) &
+ (results_df["sap"] <= 54)
+ ] # 25
+
+ gbis = results_df[
+ (results_df["gbis_eligible"] == True) &
+ ~results_df["row_id"].isin(ideal_eco4["row_id"].values)
+ ] # 82
+
+
+def app():
+ data, eco4_prospects_survey_list, lost_identified_properties = load_data()
+
+ data["row_id"] = ["ha25_" + str(i) for i in range(0, len(data))]
+
+ cleaned = read_from_s3(
+ s3_file_name="cleaned_epc_data/cleaned.bson",
+ bucket_name="retrofit-data-dev"
+ )
+ cleaned = msgpack.unpackb(cleaned, raw=False)
+
+ cleaning_data = read_dataframe_from_s3_parquet(
+ bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+ )
+
+ created_at = datetime.now().isoformat()
+
+ photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+ results_df, scoring_data, nodata = get_epc_data(
+ data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds
+ )
+ # Pickle the outputs
+ # Old data was ha25.pickle
+ # import pickle
+ # with open("ha25_10_jan.pickle", "wb") as f:
+ # pickle.dump(
+ # {
+ # "results_df": results_df,
+ # "scoring_data": scoring_data,
+ # "nodata": nodata
+ # },
+ # f
+ # )
+
+ # Load in pickle
+ import pickle
+ with open("ha25_10_jan.pickle", "rb") as f:
+ saved = pickle.load(f)
+ results_df = saved["results_df"]
+ scoring_data = saved["scoring_data"]
+ nodata = saved["nodata"]
diff --git a/etl/eligibility/ha_15_32/ha33_app.py b/etl/eligibility/ha_15_32/ha33_app.py
index 9af5eae2..42c8fa81 100644
--- a/etl/eligibility/ha_15_32/ha33_app.py
+++ b/etl/eligibility/ha_15_32/ha33_app.py
@@ -264,21 +264,21 @@ def get_ha_33data(data, cleaned, cleaning_data, created_at):
def analyse_ha_33(results_df, data):
- results_df_social = results_df[results_df["tenure"] == "Rented (social)"]
+ # results_df_social = results_df[results_df["tenure"] == "Rented (social)"]
+ #
+ # results_df_social["tenure"].value_counts()
- results_df_social["tenure"].value_counts()
+ data[data["row_id"].isin(results_df["row_id"].values)]["PROPERTY TYPE"].value_counts()
- data[data["row_id"].isin(results_df_social["row_id"].values)]["PROPERTY TYPE"].value_counts()
+ n_identified = (results_df["gbis_eligible"] | results_df["eco4_eligible"]).sum()
+ n_eco4 = results_df["eco4_eligible"].sum()
+ n_gbis = results_df[~results_df["eco4_eligible"]]["gbis_eligible"].sum()
- n_identified = (results_df_social["gbis_eligible"] | results_df_social["eco4_eligible"]).sum()
- n_eco4 = results_df_social["eco4_eligible"].sum()
- n_gbis = results_df_social[~results_df_social["eco4_eligible"]]["gbis_eligible"].sum()
-
- eco_eligibile = results_df_social[results_df_social["eco4_eligible"]]
+ eco_eligibile = results_df[results_df["eco4_eligible"]]
eco_eligibile["walls"].value_counts()
eco_eligibile["roof"].value_counts()
- results_df_social[results_df_social["gbis_eligible"] | results_df_social["eco4_eligible"]]["tenure"].value_counts()
+ results_df[results_df["gbis_eligible"] | results_df["eco4_eligible"]]["tenure"].value_counts()
results_df_social["eligibility_classification"].value_counts()
@@ -316,3 +316,11 @@ def app():
created_at = datetime.now().isoformat()
results_df, _, _ = get_ha_33data(data, cleaned, cleaning_data, created_at)
+
+ # Read in
+ import pickle
+ with open("ha33_results.pickle", "rb") as f:
+ data = pickle.load(f)
+ results_df = pd.DataFrame(data["results"])
+ scoring_data = data["scoring_data"]
+ nodata = data["nodata"]
diff --git a/etl/eligibility/ha_15_32/ha4_app.py b/etl/eligibility/ha_15_32/ha4_app.py
new file mode 100644
index 00000000..d2702dd8
--- /dev/null
+++ b/etl/eligibility/ha_15_32/ha4_app.py
@@ -0,0 +1,328 @@
+import os
+import msgpack
+from pathlib import Path
+from datetime import datetime
+import numpy as np
+import pandas as pd
+from utils.s3 import read_from_s3
+from utils.logger import setup_logger
+from dotenv import load_dotenv
+from utils.s3 import read_dataframe_from_s3_parquet
+from tqdm import tqdm
+from backend.SearchEpc import SearchEpc
+from etl.eligibility.Eligibility import Eligibility
+from etl.eligibility.ha_15_32.app import prepare_model_data_row
+from etl.epc.DataProcessor import DataProcessor
+from etl.epc.settings import COLUMNS_TO_MERGE_ON
+from backend.ml_models.api import ModelApi
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+from recommendations.recommendation_utils import calculate_cavity_age
+from recommendation_utils import convert_thickness_to_numeric
+
+import re
+
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
+
+logger = setup_logger()
+load_dotenv(ENV_FILE)
+
+
+def load_ha_4():
+ pd.set_option('display.max_rows', 500)
+ pd.set_option('display.max_columns', 500)
+ pd.set_option('display.width', 1000)
+
+ data = pd.read_csv(f"etl/eligibility/ha_15_32/HA 4 Asset List.csv", low_memory=False)
+ return data
+
+
+def standardise_ha_4(data):
+ # Location name contains some strings like {0664} which we remove
+ data['Location Name'] = data['Location Name'].str.replace('\{.*?\}', '', regex=True)
+
+ # Trim whitespace from either end of location name
+ data["Location Name"] = data["Location Name"].str.strip()
+
+ # Remove any unusable postcodes
+ data = data[data["Post Code"] != '\\\\'].copy()
+
+ # Some specific replacements
+ data["Location Name"] = np.where(
+ data["Location Name"] == "Calderbrook Pl & Cog La",
+ "Calderbrook Place",
+ data["Location Name"]
+ )
+
+ return data
+
+
+def get_ha_4_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
+ scoring_data = []
+ results = []
+ nodata = []
+ for _, property_meta in tqdm(data.iterrows(), total=len(data)):
+ # For many of the entries in this dataset, we're actually given an entire building, so we EPCs for every
+ # building
+ searcher = SearchEpc(
+ address1=property_meta["Address Line 1"],
+ postcode=property_meta["Post Code"],
+ auth_token=EPC_AUTH_TOKEN,
+ os_api_key=None,
+ property_type=property_type_lookup.get(house["Archetype"]),
+ )
+
+ searcher.find_property(skip_os=True)
+
+ if searcher.newest_epc is None:
+ searcher = SearchEpc(
+ address1=property_meta["Location Name"],
+ postcode=property_meta["Post Code"],
+ auth_token=EPC_AUTH_TOKEN,
+ os_api_key=None,
+ property_type=property_type_lookup.get(house["Archetype"]),
+ )
+ searcher.search()
+
+ if searcher.newest_epc is None:
+ nodata.append(house["row_id"])
+ continue
+
+ newest_epc = searcher.newest_epc
+ older_epcs = searcher.older_epcs
+ full_sap_epc = searcher.full_sap_epc
+
+ searcher.search()
+
+ if searcher.data is None:
+ nodata.append(property_meta.to_dict())
+ continue
+
+ epcs = searcher.data["rows"]
+ epcs = pd.DataFrame(epcs)
+
+ # Take the newest EPC by UPRN
+ epcs = epcs.sort_values(by=["lodgement-date"], ascending=False)
+ newest_epcs = epcs.drop_duplicates(subset=["uprn"], keep="first")
+
+ # For each EPC, we now check eligibility
+ for _, epc in newest_epcs.iterrows():
+ eligibility = Eligibility(epc=epc.to_dict(), cleaned=cleaned)
+ eligibility.check_gbis_warmfront()
+ eligibility.check_eco4_warmfront()
+
+ # If the house is not identified, we do a full gbis and eco4 check
+ eligibility.check_gbis()
+ eligibility.check_eco4()
+
+ if eligibility.eco4_warmfront["eligible"]:
+ # We get old_eps
+ old_data = epcs[
+ (epcs["uprn"] == epc["uprn"]) &
+ (epcs["lmk-key"] != epc["lmk-key"])
+ ].to_dict("records")
+
+ full_sap_epc = epcs[
+ (epcs["uprn"] == epc["uprn"]) &
+ (epcs["transaction-type"] == "new dwelling")
+ ].to_dict("records")
+
+ scoring_dictionary = prepare_model_data_row(
+ property_id=eligibility.epc["uprn"],
+ modelling_epc=eligibility.epc,
+ cleaned=cleaned,
+ cleaning_data=cleaning_data,
+ created_at=created_at,
+ old_data=old_data,
+ full_sap_epc=full_sap_epc
+ )
+ scoring_data.extend(scoring_dictionary)
+
+ results.append(
+ {
+ "uprn": epc["uprn"],
+ "Location Name": property_meta["Location Name"],
+ "Post Code": property_meta["Post Code"],
+ "property_type": eligibility.epc["property-type"],
+ "gbis_eligible": eligibility.gbis_warmfront,
+ "eco4_eligible": eligibility.eco4_warmfront["eligible"],
+ "eco4_message": eligibility.eco4_warmfront["message"],
+ "sap": float(eligibility.epc["current-energy-efficiency"]),
+ "gbis_eligible_future": eligibility.gbis["eligible"],
+ "gbis_eligible_future_message": eligibility.gbis["message"],
+ "eco4_eligible_future": eligibility.eco4["eligible"],
+ "eco4_eligible_future_message": eligibility.eco4["message"],
+ # Property components
+ "roof": eligibility.roof["clean_description"],
+ "walls": eligibility.walls["clean_description"],
+ "cavity_type": eligibility.cavity["type"],
+ "heating": eligibility.epc["mainheat-description"],
+ "tenure": eligibility.tenure,
+ "date_epc": eligibility.epc["lodgement-date"],
+ }
+ )
+
+ scoring_df = pd.DataFrame(scoring_data)
+
+ # Perform the same cleaning as in the model - first clean number of room variables though
+ scoring_df = DataProcessor.apply_averages_cleaning(
+ data_to_clean=scoring_df,
+ cleaning_data=cleaning_data,
+ cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
+ colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
+ )
+
+ scoring_df = DataProcessor.apply_averages_cleaning(
+ data_to_clean=scoring_df,
+ cleaning_data=cleaning_data,
+ cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
+ ).drop(columns=["LOCAL_AUTHORITY"])
+
+ scoring_df = DataProcessor.clean_missings_after_description_process(
+ scoring_df,
+ ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
+ "insulation_thickness" in c) or ("ENERGY_EFF" in c)]
+ )
+
+ scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
+
+ model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
+ all_predictions = model_api.predict_all(
+ df=scoring_df,
+ bucket="retrofit-data-dev",
+ prediction_buckets={
+ "sap_change_predictions": "retrofit-sap-predictions-dev",
+ "heat_demand_predictions": "retrofit-heat-predictions-dev",
+ "carbon_change_predictions": "retrofit-carbon-predictions-dev"
+ }
+ )
+
+ predictions = all_predictions["sap_change_predictions"].copy()
+
+ results_df = pd.DataFrame(results)
+
+ predictions = predictions.rename(columns={"property_id": "uprn"}).merge(
+ results_df[["uprn", "sap"]], how="left", on="uprn"
+ )
+ predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
+ predictions = predictions.groupby("uprn")["sap_uplift"].sum().reset_index()
+
+ results_df = results_df.merge(
+ predictions[["sap_uplift", "uprn"]],
+ how="left",
+ on="uprn"
+ )
+ results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
+
+ results_df = results_df[~pd.isnull(results_df["uprn"])]
+
+ eligibility_assessment = []
+ for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
+ # The upgrade requirements are dependent on the current SAP
+
+ # If the property is an F or G, it only needs to upgrade to an %
+ if row["sap"] <= 38:
+ if row["post_install_sap"] >= 57:
+ eligibility_classification = "highest confidence"
+ elif row["post_install_sap"] >= 55:
+ eligibility_classification = "high confidence"
+ elif row["post_install_sap"] >= 53:
+ eligibility_classification = "medium confidence"
+ else:
+ eligibility_classification = "unlikely"
+ else:
+
+ if row["post_install_sap"] >= 71:
+ eligibility_classification = "highest confidence"
+ elif row["post_install_sap"] >= 69:
+ eligibility_classification = "high confidence"
+ elif row["post_install_sap"] >= 67:
+ eligibility_classification = "medium confidence"
+ else:
+ eligibility_classification = "unlikely"
+
+ eligibility_assessment.append(
+ {
+ "uprn": row["uprn"],
+ "eligibility_classification": eligibility_classification
+ }
+ )
+
+ eligibility_assessment = pd.DataFrame(eligibility_assessment)
+
+ results_df = results_df.merge(
+ eligibility_assessment, how="left", on="uprn"
+ )
+ # We have some properties that are duplicated so we take just one instance
+ results_df = results_df.drop_duplicates(subset=["uprn"])
+
+ return results_df, scoring_data, nodata
+
+
+def analyse_ha_4(results_df, data):
+ n_identified = (results_df["gbis_eligible"] | results_df["eco4_eligible"]).sum()
+ n_eco4 = results_df["eco4_eligible"].sum()
+ n_gbis = results_df[~results_df["eco4_eligible"]]["gbis_eligible"].sum()
+
+ eco_eligibile = results_df[results_df["eco4_eligible"]]
+ eco_eligibile["eligibility_classification"].value_counts()
+
+ future_possibilities_eco = results_df[
+ (results_df["eco4_eligible_future"] == True) & (~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
+ ].copy()
+
+ future_possibilities_gbis = results_df[
+ (results_df["gbis_eligible_future"] == True) & (results_df["eco4_eligible_future"] == False) & (
+ ~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
+ ].copy()
+
+ total_future_possibilities = future_possibilities_eco.shape[0] + future_possibilities_gbis.shape[0]
+
+
+def app():
+ data = load_ha_4()
+
+ data = standardise_ha_4(data)
+
+ data["row_id"] = ["h4" + str(i) for i in range(0, len(data))]
+
+ cleaned = read_from_s3(
+ s3_file_name="cleaned_epc_data/cleaned.bson",
+ bucket_name="retrofit-data-dev"
+ )
+ cleaned = msgpack.unpackb(cleaned, raw=False)
+
+ cleaning_data = read_dataframe_from_s3_parquet(
+ bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+ )
+
+ created_at = datetime.now().isoformat()
+
+ photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+ results_df, scoring_data, nodata = get_ha_4_data(
+ data=data,
+ cleaned=cleaned,
+ cleaning_data=cleaning_data,
+ created_at=created_at,
+ photo_supply_lookup=photo_supply_lookup,
+ floor_area_decile_thresholds=floor_area_decile_thresholds
+ )
+
+ # Store the data locally as a pickle
+ # import pickle
+ # with open("ha_4.pickle", "wb") as f:
+ # pickle.dump(
+ # {
+ # "results_df": results_df,
+ # "scoring_data": scoring_data,
+ # "nodata": nodata
+ # }, f)
+
+ # Read in
+ # import pickle
+ # with open("ha_4.pickle", "rb") as f:
+ # data = pickle.load(f)
+ # results_df = data["results_df"]
+ # scoring_data = data["scoring_data"]
+ # nodata = data["nodata"]
diff --git a/etl/eligibility/ha_15_32/ha7_app.py b/etl/eligibility/ha_15_32/ha7_app.py
new file mode 100644
index 00000000..c6486159
--- /dev/null
+++ b/etl/eligibility/ha_15_32/ha7_app.py
@@ -0,0 +1,383 @@
+import os
+import msgpack
+import openpyxl
+from openpyxl.styles.colors import COLOR_INDEX
+from pathlib import Path
+from datetime import datetime
+import pandas as pd
+import numpy as np
+from utils.s3 import read_from_s3, read_dataframe_from_s3_parquet
+from utils.logger import setup_logger
+from dotenv import load_dotenv
+from tqdm import tqdm
+from backend.SearchEpc import SearchEpc
+from etl.eligibility.Eligibility import Eligibility
+from etl.eligibility.ha_15_32.app import prepare_model_data_row
+from etl.epc.DataProcessor import DataProcessor
+from etl.epc.settings import COLUMNS_TO_MERGE_ON
+from backend.ml_models.api import ModelApi
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+from recommendations.recommendation_utils import calculate_cavity_age
+from recommendation_utils import convert_thickness_to_numeric
+
+ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
+
+logger = setup_logger()
+load_dotenv(ENV_FILE)
+
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+OS_API_KEY = os.getenv("ORDNANCE_SURVEY_API_KEY")
+
+
+def load_data():
+ """
+ Load the data from the excel
+ """
+
+ workbook = openpyxl.load_workbook('etl/eligibility/ha_15_32/HESTIA - HA 7 ASSET LIST.xlsx')
+ sheet = workbook.active
+
+ # Prepare lists to collect rows data and their colors
+ rows_data = []
+ rows_colors = []
+ for row in sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
+ row_data = [cell.value for cell in row] # This will get you the cell values
+
+ row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
+ row_color = COLOR_INDEX[row_color]
+ rows_data.append(row_data)
+ rows_colors.append(row_color)
+
+ df = pd.DataFrame(rows_data, columns=[cell.value for cell in sheet[1]])
+
+ # Add the row colors as a new column
+ df['row_color'] = rows_colors
+ df.columns.values[8] = "is_active"
+
+ # Remove None columns
+ df = df.dropna(axis=1, how='all')
+ # We now parse the colours
+ df["row_color"].unique()
+ df["row_colour_name"] = np.where(
+ df["row_color"] == "0000FFFF", "red",
+ np.where(df["row_color"] == "00FF00FF", "green", "yellow")
+ )
+ df["row_code"] = np.where(
+ df["row_colour_name"] == "red", "invalid",
+ np.where(df["row_colour_name"] == "green", "potential ECO4", "needs criteria change")
+ )
+
+ return df
+
+
+def get_ha7_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
+ property_type_lookup = {
+ # "Mid Terrace": "Mid-Terrace",
+ # "End Terrace": "End-Terrace",
+ # "Semi Detached": "Semi-Detached",
+ # "Detached": "Detached",
+ "House": "House",
+ "Flat": "Flat",
+ "Bungalow": "Bungalow",
+ "Maisonette": "Maisonette",
+ }
+
+ scoring_data = []
+ results = []
+ nodata = []
+ for _, house in tqdm(data.iterrows(), total=len(data)):
+
+ if house["Address"]:
+ address = house["Address"]
+ else:
+ address = house["Address2"]
+
+ searcher = SearchEpc(
+ address1=address,
+ postcode=house["Postcode"],
+ auth_token=EPC_AUTH_TOKEN,
+ os_api_key=None,
+ property_type=property_type_lookup.get(house["Archetype"]),
+ )
+
+ searcher.find_property(skip_os=True)
+
+ if searcher.newest_epc is None:
+ nodata.append(house["row_id"])
+ continue
+
+ newest_epc = searcher.newest_epc
+ older_epcs = searcher.older_epcs
+ full_sap_epc = searcher.full_sap_epc
+
+ eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
+ eligibility.check_gbis_warmfront()
+ eligibility.check_eco4_warmfront()
+
+ # If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity
+
+ # Loft MUST be suitable
+ cavity_age = None
+ if (
+ eligibility.walls["is_cavity_wall"] and
+ eligibility.walls["is_filled_cavity"] and
+ eligibility.loft["suitability"] and
+ eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
+ ):
+ # We check the age of the cavity and if it's particularly old, we flag it
+ cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
+
+ # If the house is not identified, we do a full gbis and eco4 check
+ eligibility.check_gbis()
+ eligibility.check_eco4()
+
+ if eligibility.eco4_warmfront["eligible"]:
+ scoring_dictionary = prepare_model_data_row(
+ property_id=house["row_id"],
+ modelling_epc=eligibility.epc,
+ cleaned=cleaned,
+ cleaning_data=cleaning_data,
+ created_at=created_at,
+ old_data=older_epcs,
+ full_sap_epc=full_sap_epc,
+ photo_supply_lookup=photo_supply_lookup,
+ floor_area_decile_thresholds=floor_area_decile_thresholds
+ )
+ scoring_data.extend(scoring_dictionary)
+
+ # If nothing is eligible or gbis is eligible, then we make a record this
+ results.append(
+ {
+ "row_id": house["row_id"],
+ "address": house["Address"],
+ "postcode": house["Postcode"],
+ "gbis_eligible": eligibility.gbis_warmfront,
+ "eco4_eligible": eligibility.eco4_warmfront["eligible"],
+ "eco4_message": eligibility.eco4_warmfront["message"],
+ "sap": float(eligibility.epc["current-energy-efficiency"]),
+ "gbis_eligible_future": eligibility.gbis["eligible"],
+ "gbis_eligible_future_message": eligibility.gbis["message"],
+ "eco4_eligible_future": eligibility.eco4["eligible"],
+ "eco4_eligible_future_message": eligibility.eco4["message"],
+ # Property components
+ "roof": eligibility.roof["clean_description"],
+ "walls": eligibility.walls["clean_description"],
+ "heating": eligibility.epc["mainheat-description"],
+ "tenure": eligibility.tenure,
+ "date_epc": eligibility.epc["lodgement-date"],
+ **newest_epc,
+ "cavity_age": cavity_age,
+ **eligibility.walls,
+ **eligibility.roof,
+ }
+ )
+
+ scoring_df = pd.DataFrame(scoring_data)
+ # Implement the same process that is being used in the recommendation engine to cleaning scoring_df
+
+ # Perform the same cleaning as in the model - first clean number of room variables though
+ scoring_df = DataProcessor.apply_averages_cleaning(
+ data_to_clean=scoring_df,
+ cleaning_data=cleaning_data,
+ cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
+ colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
+ )
+
+ scoring_df = DataProcessor.apply_averages_cleaning(
+ data_to_clean=scoring_df,
+ cleaning_data=cleaning_data,
+ cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
+ ).drop(columns=["LOCAL_AUTHORITY"])
+
+ scoring_df = DataProcessor.clean_missings_after_description_process(
+ scoring_df,
+ ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
+ "insulation_thickness" in c) or ("ENERGY_EFF" in c)]
+ )
+
+ scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
+
+ model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
+ all_predictions = model_api.predict_all(
+ df=scoring_df,
+ bucket="retrofit-data-dev",
+ prediction_buckets={
+ "sap_change_predictions": "retrofit-sap-predictions-dev",
+ "heat_demand_predictions": "retrofit-heat-predictions-dev",
+ "carbon_change_predictions": "retrofit-carbon-predictions-dev"
+ }
+ )
+
+ predictions = all_predictions["sap_change_predictions"].copy()
+
+ results_df = pd.DataFrame(results)
+
+ predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
+ results_df[["row_id", "sap"]], how="left", on="row_id"
+ )
+ predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
+ predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
+
+ results_df = results_df.merge(
+ predictions[["sap_uplift", "row_id"]],
+ how="left",
+ on="row_id"
+ )
+
+ results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
+
+ eligibility_assessment = []
+ for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
+ # The upgrade requirements are dependent on the current SAP
+
+ # If the property is an F or G, it only needs to upgrade to an %
+ if row["sap"] <= 38:
+ if row["post_install_sap"] >= 57:
+ eligibility_classification = "highest confidence"
+ elif row["post_install_sap"] >= 55:
+ eligibility_classification = "high confidence"
+ elif row["post_install_sap"] >= 53:
+ eligibility_classification = "medium confidence"
+ else:
+ eligibility_classification = "unlikely"
+ else:
+
+ if row["post_install_sap"] >= 71:
+ eligibility_classification = "highest confidence"
+ elif row["post_install_sap"] >= 69:
+ eligibility_classification = "high confidence"
+ elif row["post_install_sap"] >= 67:
+ eligibility_classification = "medium confidence"
+ else:
+ eligibility_classification = "unlikely"
+
+ eligibility_assessment.append(
+ {
+ "row_id": row["row_id"],
+ "eligibility_classification": eligibility_classification
+ }
+ )
+
+ eligibility_assessment = pd.DataFrame(eligibility_assessment)
+
+ results_df = results_df.merge(
+ eligibility_assessment, how="left", on="row_id"
+ )
+
+ return results_df, scoring_data, nodata
+
+
+def analyse_ha_7(results_df, data):
+ analysis_data = results_df.merge(
+ data[["row_id", "row_code", "Property Type", "Construction Year Band"]], how="left", on="row_id"
+ )
+
+ analysis_data["row_code"].value_counts()
+
+ # NEW
+
+ analysis_data["roof_insulation_thickness"] = np.where(
+ pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
+ )
+ analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
+ lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
+ )
+
+ ideal_eco4 = analysis_data[
+ (analysis_data["eco4_eligible"] == True) & (
+ analysis_data["roof_insulation_thickness_numeric"] <= 100)
+ ]
+
+ secondary_eco4_warmfront_not_sold = analysis_data[
+ (analysis_data["eco4_eligible"] == True) & (
+ analysis_data["roof_insulation_thickness_numeric"] > 100)
+ ]
+
+ # underperforming cavities
+ underperforming_cavities = analysis_data[
+ (analysis_data["eco4_message"] == "Failed due to full cavity - check cavity age") & (
+ analysis_data["cavity_age"] > 9 * 365
+ ) & (analysis_data["roof_insulation_thickness_numeric"] <= 100)
+ ]
+
+ identified_gbis_not_sold = analysis_data[
+ (analysis_data["gbis_eligible"] == True) & (
+ analysis_data["eco4_eligible"] == False
+ )
+ ]
+
+ wf_identified = analysis_data[
+ (analysis_data["row_code"] == "potential ECO4")
+ ]
+
+ # END NEW
+
+ warmfront_identification = analysis_data["row_code"].value_counts()
+ warmfront_identified = analysis_data[analysis_data["row_code"] == "potential ECO4"]
+ warmfront_identified["walls"].value_counts(normalize=True)
+
+ analysis_data["Construction Year Band"].value_counts(normalize=True)
+
+ # Number of days from today
+
+ days_to_today = (datetime.now() - pd.to_datetime(warmfront_identified["date_epc"])).dt.days
+ days_to_today.mean()
+
+ property_types = analysis_data["Property Type"].value_counts()
+
+ n_identified = (results_df["gbis_eligible"] | results_df["eco4_eligible"]).sum()
+
+ eco_identified = results_df[results_df["eco4_eligible"]]
+ n_eco4 = eco_identified["eco4_eligible"].sum()
+ gbis_identified = results_df[~results_df["eco4_eligible"] & results_df["gbis_eligible"]]
+ n_gbis = results_df[~results_df["eco4_eligible"]]["gbis_eligible"].sum()
+
+ eco_eligibile = results_df[results_df["eco4_eligible"]]
+ eco_eligibile["eligibility_classification"].value_counts()
+
+ future_possibilities_eco = results_df[
+ (results_df["eco4_eligible_future"] == True) & (~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
+ ].copy()
+
+ future_possibilities_gbis = results_df[
+ (results_df["gbis_eligible_future"] == True) & (results_df["eco4_eligible_future"] == False) & (
+ ~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
+ ].copy()
+
+ total_future_possibilities = future_possibilities_eco.shape[0] + future_possibilities_gbis.shape[0]
+
+
+def app():
+ data = load_data()
+ data["row_id"] = ["ha7" + str(i) for i in range(0, len(data))]
+
+ cleaned = read_from_s3(
+ s3_file_name="cleaned_epc_data/cleaned.bson",
+ bucket_name="retrofit-data-dev"
+ )
+ cleaned = msgpack.unpackb(cleaned, raw=False)
+
+ cleaning_data = read_dataframe_from_s3_parquet(
+ bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+ )
+
+ photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+ created_at = datetime.now().isoformat()
+
+ results_df, scoring_data, nodata = get_ha7_data(
+ data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds
+ )
+
+ # Pickle results
+ # import pickle
+ # with open("ha7_results_jan_10.pkl", "wb") as f:
+ # pickle.dump({"results_df": results_df, "scoring_data": scoring_data, "nodata": nodata}, f)
+
+ # Read in the old data
+ # import pickle
+ # with open("ha7_results_jan_10.pkl", "rb") as f:
+ # old_data = pickle.load(f)
+ # results_df = old_data["results_df"]
+ # scoring_data = old_data["scoring_data"]
+ # nodata = old_data["nodata"]
diff --git a/etl/epc/DataProcessor.py b/etl/epc/DataProcessor.py
index c9f937c0..801a9456 100644
--- a/etl/epc/DataProcessor.py
+++ b/etl/epc/DataProcessor.py
@@ -766,12 +766,16 @@ class EPCDataProcessor:
how='left'
)
+ global_averages = cleaning_data[cols_to_clean].mean()
+
# Fill NaN values with averages
for col in cols_to_clean:
data_to_clean[col].fillna(data_to_clean[f"{col}_AVERAGE"], inplace=True)
data_to_clean.drop(columns=[f"{col}_AVERAGE"], inplace=True)
# If we still have missings
data_to_clean[col].fillna(data_to_clean[col].mean(), inplace=True)
+ # Final step if we still have missings - use global mean
+ data_to_clean[col].fillna(global_averages[col], inplace=True)
return data_to_clean
diff --git a/etl/epc/property_change_app.py b/etl/epc/property_change_app.py
index b4befcd7..3dc6e39b 100644
--- a/etl/epc/property_change_app.py
+++ b/etl/epc/property_change_app.py
@@ -23,6 +23,12 @@ def main():
pd.DataFrame(epc_pipeline.compiled_all_equal_rows).to_parquet("refactor_datasets/all_equal_rows.parquet")
pd.concat(epc_pipeline.compiled_cleaning_averages).to_parquet("refactor_datasets/cleaning_averages.parquet")
+ from utils.s3 import read_dataframe_from_s3_parquet
+ dataset = read_dataframe_from_s3_parquet(
+ bucket_name="retrofit-data-dev",
+ file_key="sap_change_model/dataset_test.parquet",
+ )
+
if __name__ == "__main__":
main()
diff --git a/etl/epc_clean/epc_attributes/MainheatAttributes.py b/etl/epc_clean/epc_attributes/MainheatAttributes.py
index e21f0d37..813e15a6 100644
--- a/etl/epc_clean/epc_attributes/MainheatAttributes.py
+++ b/etl/epc_clean/epc_attributes/MainheatAttributes.py
@@ -16,6 +16,7 @@ class MainHeatAttributes(Definitions):
"solar assisted heat pump",
"exhaust source heat pump",
"community heat pump",
+ "portable electric heating"
]
FUEL_TYPES = ["electric", "mains gas", "wood logs", "coal", "oil", "wood pellets", "anthracite",
"dual fuel mineral and wood", "smokeless fuel", "lpg", "b30k"]
diff --git a/etl/epc_clean/epc_attributes/WallAttributes.py b/etl/epc_clean/epc_attributes/WallAttributes.py
index bfe600d5..09eac215 100644
--- a/etl/epc_clean/epc_attributes/WallAttributes.py
+++ b/etl/epc_clean/epc_attributes/WallAttributes.py
@@ -152,4 +152,7 @@ class WallAttributes(Definitions):
else:
result["insulation_thickness"] = "average"
+ if result["is_cavity_wall"] & result["is_as_built"] & (result["insulation_thickness"] == "average"):
+ result["is_filled_cavity"] = True
+
return result
diff --git a/etl/epc_clean/epc_attributes/WindowAttributes.py b/etl/epc_clean/epc_attributes/WindowAttributes.py
index e962cd31..ce0b156a 100644
--- a/etl/epc_clean/epc_attributes/WindowAttributes.py
+++ b/etl/epc_clean/epc_attributes/WindowAttributes.py
@@ -52,7 +52,7 @@ class WindowAttributes(Definitions):
raise ValueError('Invalid description')
def process(self) -> Dict[str, Union[str, bool]]:
- result: Dict[str, Union[str, bool]] = {
+ result: Dict[str, Union[str, bool, None]] = {
"has_glazing": False,
"glazing_coverage": None,
"glazing_type": None,
@@ -80,7 +80,11 @@ class WindowAttributes(Definitions):
break
# If we didn't find any coverage or type, we assume full coverage
- if not result["glazing_coverage"]:
+ if (not result["glazing_coverage"]) & (result["glazing_type"] != "single"):
result["glazing_coverage"] = "full"
+ # We reset some values if the glazing is single
+ if result["glazing_type"] == "single":
+ result["has_glazing"] = False
+
return result
diff --git a/etl/epc_clean/tests/test_data/test_mainheat_attributes_cases.py b/etl/epc_clean/tests/test_data/test_mainheat_attributes_cases.py
index d264ebff..558b176e 100644
--- a/etl/epc_clean/tests/test_data/test_mainheat_attributes_cases.py
+++ b/etl/epc_clean/tests/test_data/test_mainheat_attributes_cases.py
@@ -1652,4 +1652,17 @@ mainheat_cases = [
'has_electricaire': False, 'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False,
"has_electric_heat_pumps": False,
"has_micro-cogeneration": False},
+ {'original_description': 'Portable electric heating assumed for most rooms', 'has_radiators': False,
+ 'has_fan_coil_units': False, 'has_pipes_in_screed_above_insulation': False,
+ 'has_pipes_in_insulated_timber_floor': False, 'has_pipes_in_concrete_slab': False, 'has_boiler': False,
+ 'has_air_source_heat_pump': False, 'has_room_heaters': False, 'has_electric_storage_heaters': False,
+ 'has_warm_air': False, 'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False,
+ 'has_community_scheme': False, 'has_ground_source_heat_pump': False, 'has_no_system_present': False,
+ 'has_portable_electric_heaters': False, 'has_water_source_heat_pump': False, 'has_electric_heat_pump': False,
+ 'has_micro-cogeneration': False, 'has_solar_assisted_heat_pump': False, 'has_exhaust_source_heat_pump': False,
+ 'has_community_heat_pump': False, 'has_portable_electric_heating': True, 'has_electric': True,
+ 'has_mains_gas': False, 'has_wood_logs': False, 'has_coal': False, 'has_oil': False, 'has_wood_pellets': False,
+ 'has_anthracite': False, 'has_dual_fuel_mineral_and_wood': False, 'has_smokeless_fuel': False, 'has_lpg': False,
+ 'has_b30k': False, 'has_assumed': True, 'has_electricaire': False, 'has_assumed_for_most_rooms': True,
+ 'has_underfloor_heating': False}
]
diff --git a/etl/epc_clean/tests/test_data/test_wall_attributes_cases.py b/etl/epc_clean/tests/test_data/test_wall_attributes_cases.py
index 300702a7..96c545c1 100644
--- a/etl/epc_clean/tests/test_data/test_wall_attributes_cases.py
+++ b/etl/epc_clean/tests/test_data/test_wall_attributes_cases.py
@@ -550,7 +550,7 @@ wall_cases = [
'is_as_built': False, 'is_cob': False, 'is_assumed': False, 'is_sandstone_or_limestone': False,
'insulation_thickness': None, 'external_insulation': False, 'internal_insulation': False},
{'original_description': 'Cavity wall, as built, insulated (assumed)', 'thermal_transmittance': None,
- 'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': False, 'is_solid_brick': False,
+ 'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': True, 'is_solid_brick': False,
'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': True,
'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average',
'external_insulation': False, 'internal_insulation': False},
@@ -727,7 +727,7 @@ wall_cases = [
'external_insulation': False, 'internal_insulation': False},
{'original_description': 'Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)',
'thermal_transmittance': None,
- 'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': False, 'is_solid_brick': False,
+ 'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': True, 'is_solid_brick': False,
'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': True,
'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average',
'external_insulation': False, 'internal_insulation': False},
diff --git a/etl/epc_clean/tests/test_data/test_window_attributes_cases.py b/etl/epc_clean/tests/test_data/test_window_attributes_cases.py
index 1eeeee21..f01ccba9 100644
--- a/etl/epc_clean/tests/test_data/test_window_attributes_cases.py
+++ b/etl/epc_clean/tests/test_data/test_window_attributes_cases.py
@@ -30,7 +30,8 @@ windows_cases = [
'glazing_type': 'triple', 'no_data': False},
{'original_description': 'Gwydrau triphlyg rhannol', 'has_glazing': True, 'glazing_coverage': 'partial',
'glazing_type': 'triple', 'no_data': False},
- {'original_description': 'Single glazed', 'has_glazing': True, 'glazing_coverage': 'full', 'glazing_type': 'single',
+ {'original_description': 'Single glazed', 'has_glazing': False, 'glazing_coverage': None,
+ 'glazing_type': 'single',
'no_data': False},
{'original_description': 'Some double glazing', 'has_glazing': True, 'glazing_coverage': 'partial',
'glazing_type': 'double', 'no_data': False},
@@ -46,7 +47,8 @@ windows_cases = [
'glazing_type': 'double', 'no_data': False},
{'original_description': 'Gwydrau dwbl gan mwyaf', 'has_glazing': True, 'glazing_coverage': 'most',
'glazing_type': 'double', 'no_data': False},
- {'original_description': 'Gwydrau sengl', 'has_glazing': True, 'glazing_coverage': 'full', 'glazing_type': 'single',
+ {'original_description': 'Gwydrau sengl', 'has_glazing': False, 'glazing_coverage': None,
+ 'glazing_type': 'single',
'no_data': False},
{'original_description': 'Ffenestri perfformiad uchel', 'has_glazing': True, 'glazing_coverage': 'full',
'glazing_type': 'high performance', 'no_data': False},
diff --git a/etl/epc_clean/tests/test_roof_attributes.py b/etl/epc_clean/tests/test_roof_attributes.py
index b0663a3e..481beedc 100644
--- a/etl/epc_clean/tests/test_roof_attributes.py
+++ b/etl/epc_clean/tests/test_roof_attributes.py
@@ -3,12 +3,13 @@ from pathlib import Path
from etl.epc_clean.tests.test_data.test_roof_attributes_cases import clean_roof_test_cases
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
+
# For local testing
-if __file__ == "":
- input_data_path = Path("./model_data/tests/test_data/EpcClean_inputs.obj")
-else:
- current_file_path = Path(__file__)
- input_data_path = current_file_path.parent / 'test_data' / 'EpcClean_inputs.obj'
+# if __file__ == "":
+# input_data_path = Path("./model_data/tests/test_data/EpcClean_inputs.obj")
+# else:
+# current_file_path = Path(__file__)
+# input_data_path = current_file_path.parent / 'test_data' / 'EpcClean_inputs.obj'
class TestRoofAttributes:
@@ -88,7 +89,12 @@ class TestRoofAttributes:
def test_clean_roof_no_description(self):
roof = RoofAttributes('').process()
- assert roof == {}
+ assert roof == {
+ 'thermal_transmittance': False, 'thermal_transmittance_unit': False, 'is_pitched': False,
+ 'is_roof_room': False, 'is_loft': False, 'is_flat': False, 'is_thatched': False,
+ 'is_at_rafters': False, 'is_assumed': False, 'has_dwelling_above': False, 'is_valid': False,
+ 'insulation_thickness': False
+ }
def test_clean_roof_edge_cases(self):
# Insulation thickness edge case
diff --git a/etl/solar/SolarPhotoSupply.py b/etl/solar/SolarPhotoSupply.py
new file mode 100644
index 00000000..180cd6f5
--- /dev/null
+++ b/etl/solar/SolarPhotoSupply.py
@@ -0,0 +1,244 @@
+import pandas as pd
+from tqdm import tqdm
+from utils.s3 import save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
+from utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+class SolarPhotoSupply:
+ DATASET_COLUMNS = [
+ "UPRN", "PROPERTY_TYPE", "TENURE", "BUILT_FORM", "ROOF_DESCRIPTION", "PHOTO_SUPPLY", "TOTAL_FLOOR_AREA",
+ "CONSTRUCTION_AGE_BAND", "SOLAR_WATER_HEATING_FLAG"
+ ]
+
+ def __init__(self, file_directories, cleaned_lookup):
+ """
+ Initialize the SolarPhotoSupply class with file directories and a cleaned lookup. Currently, this class
+ just works with locally stored data, but this could be extended to work with data stored in S3.
+
+ :param file_directories: A list of directories where files are stored.
+ :param cleaned_lookup: A dictionary containing cleaned lookup data.
+ """
+ self.file_directories = file_directories
+
+ self.results = []
+ self.decile_thresholds = None
+
+ self.roof_lookup = pd.DataFrame(cleaned_lookup.get("roof-description"))
+
+ self.photo_supply_lookup = pd.DataFrame()
+ self.floor_area_decile_thresholds = pd.DataFrame()
+
+ def create_dataset(self):
+ """
+ Create a dataset from the provided file directories. This method processes the data files,
+ applies transformations, and aggregates data into a useful format.
+ """
+
+ if self.roof_lookup.empty:
+ raise ValueError("No roof lookup data")
+
+ results = []
+
+ logger.info("Creating solar photo supply dataset")
+ for dir in tqdm(self.file_directories):
+ filepath = dir / "certificates.csv"
+ df = pd.read_csv(filepath, low_memory=False)
+ df = df[~pd.isnull(df["UPRN"])]
+ df["UPRN"] = df["UPRN"].astype(int).astype(str)
+ # Drop rows that have a missing PROPERTY_TYPE, BUILT_FORM, CONSTRUCTION_AGE_BAND, TOTAL_FLOOR_AREA
+ for col in ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA"]:
+ df = df[~pd.isnull(df[col])]
+ # Take newest LODGEMENT_DATE per UPRN
+ df = df.sort_values(by="LODGEMENT_DATE", ascending=False).drop_duplicates(subset=["UPRN"])
+
+ data = df[self.DATASET_COLUMNS].copy()
+ data["PHOTO_SUPPLY"] = data["PHOTO_SUPPLY"].fillna(0)
+ data = data[data["PHOTO_SUPPLY"] != 0]
+ results.append(data)
+
+ self.results = pd.concat(results)
+
+ # Convert total floor area to deciles
+ self.decile_thresholds = self.results["TOTAL_FLOOR_AREA"].quantile(
+ [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
+ ).values
+
+ self.results["floor_area_decile"] = pd.cut(
+ self.results["TOTAL_FLOOR_AREA"],
+ bins=[0] + list(self.decile_thresholds) + [float('inf')],
+ labels=False,
+ include_lowest=True
+ )
+
+ # Convert tenure to lower
+ self.results["TENURE"] = self.results["TENURE"].str.lower()
+
+ self.results = self.results.merge(
+ self.roof_lookup.drop(
+ columns=[
+ "clean_description", "thermal_transmittance", "thermal_transmittance_unit", "insulation_thickness",
+ "is_assumed"
+ ]
+ ),
+ left_on="ROOF_DESCRIPTION",
+ right_on="original_description",
+ how="left"
+ )
+
+ self.photo_supply_lookup = self.results.groupby(
+ [
+ "PROPERTY_TYPE", "BUILT_FORM", "TENURE", "is_pitched", "is_roof_room", "is_flat",
+ "CONSTRUCTION_AGE_BAND", "floor_area_decile"
+ ],
+ observed=True
+ ).agg(
+ {
+ "PHOTO_SUPPLY": ["median", "mean"],
+ }
+ ).reset_index()
+
+ self.photo_supply_lookup.columns = ['_'.join(col).strip() for col in self.photo_supply_lookup.columns.values]
+ # Remove trailing underscore from columns
+ self.photo_supply_lookup.columns = [
+ col[:-1] if col.endswith("_") else col for col in self.photo_supply_lookup.columns.values
+ ]
+ # Convert columns to lowercase
+ self.photo_supply_lookup.columns = [col.lower() for col in self.photo_supply_lookup.columns.values]
+
+ self.floor_area_decile_thresholds = pd.DataFrame(
+ self.decile_thresholds,
+ columns=["floor_area_decile_thresholds"]
+ )
+
+ @staticmethod
+ def classify_floor_area(new_area, thresholds):
+ """
+ Classify a given floor area into a decile based on provided thresholds.
+
+ :param new_area: The new floor area to be classified.
+ :param thresholds: A list of thresholds used for classification.
+ :return: An integer representing the decile index.
+ """
+
+ for i, threshold in enumerate(thresholds):
+ if new_area <= threshold:
+ return i # Returns the decile index (0 to 9)
+ return len(thresholds)
+
+ def save(self):
+ """
+ Save the processed data to an S3 bucket in the parquet format. This method also handles
+ logging and validation to ensure data is present before saving.
+ """
+ if self.photo_supply_lookup.empty:
+ raise ValueError("No data to save")
+
+ logger.info("Storing outputs to S3")
+ # Store this data in s3 as a parquet file
+
+ save_dataframe_to_s3_parquet(
+ df=self.photo_supply_lookup,
+ bucket_name="retrofit-data-dev",
+ file_key="solar_pv_supply/photo_supply_lookup.parquet",
+ )
+
+ save_dataframe_to_s3_parquet(
+ df=self.floor_area_decile_thresholds,
+ bucket_name="retrofit-data-dev",
+ file_key=f"solar_pv_supply/floor_area_decile_thresholds.parquet",
+ )
+
+ @staticmethod
+ def load(bucket):
+ """
+ Load datasets from an S3 bucket.
+
+ :param bucket: The name of the S3 bucket to load data from.
+ :return: A tuple containing photo supply lookup and floor area decile thresholds dataframes.
+ """
+ photo_supply_lookup = read_dataframe_from_s3_parquet(
+ bucket_name=bucket, file_key="solar_pv_supply/photo_supply_lookup.parquet",
+ )
+ floor_area_decile_thresholds = read_dataframe_from_s3_parquet(
+ bucket_name=bucket, file_key="solar_pv_supply/floor_area_decile_thresholds.parquet",
+ )
+
+ return photo_supply_lookup, floor_area_decile_thresholds
+
+ @classmethod
+ def filter_photo_supply_lookup(
+ cls,
+ photo_supply_lookup: pd.DataFrame,
+ floor_area_decile_thresholds: pd.DataFrame,
+ tenure: str,
+ built_form: str,
+ property_type: str,
+ construction_age_band: str,
+ is_flat: bool,
+ is_pitched: bool,
+ is_roof_room: bool,
+ floor_area: float
+ ):
+
+ """
+ Filter the photo supply lookup to find the most appropriate photo supply for a given property.
+ :param photo_supply_lookup: The photo supply lookup dataframe.
+ :param floor_area_decile_thresholds: The floor area decile thresholds dataframe.
+ :param tenure: The tenure of the property.
+ :param built_form: The built form of the property.
+ :param property_type: The property type of the property.
+ :param construction_age_band: The construction age band of the property.
+ :param is_flat: Whether the property has a flat roof.
+ :param is_pitched: Whether the property has a pitched roof.
+ :param is_roof_room: Whether the property has a roof room.
+ :param floor_area: The floor area of the property.
+ :return:
+ """
+
+ # Convert the tenure to lower case, as is done in the creation of the dataset
+ tenure = tenure.lower()
+ # We remap the "not defined"
+ tenure = {
+ "not defined - use in the case of a new dwelling for which the intended tenure in not known. it is not to "
+ "be used for an existing dwelling":
+ "not defined - use in the case of a new dwelling for which the intended tenure in not known. it is no"
+ }.get(tenure, tenure)
+
+ photo_supply_matched = photo_supply_lookup[
+ (photo_supply_lookup["tenure"] == tenure) &
+ (photo_supply_lookup["built_form"] == built_form) &
+ (photo_supply_lookup["property_type"] == property_type) &
+ (photo_supply_lookup["construction_age_band"] == construction_age_band) &
+ (photo_supply_lookup["is_flat"] == is_flat) &
+ (photo_supply_lookup["is_pitched"] == is_pitched) &
+ (photo_supply_lookup["is_roof_room"] == is_roof_room)
+ ]
+
+ if photo_supply_matched.empty:
+ # There are a small number of cases where we don't get a full match so try again with a more aggregated
+ # average
+ photo_supply_matched = photo_supply_lookup[
+ (photo_supply_lookup["tenure"] == tenure) &
+ (photo_supply_lookup["built_form"] == built_form) &
+ (photo_supply_lookup["property_type"] == property_type)
+ ]
+ if construction_age_band in photo_supply_matched["construction_age_band"].values:
+ photo_supply_matched = photo_supply_matched[
+ photo_supply_matched["construction_age_band"] == construction_age_band
+ ]
+
+ if photo_supply_matched.empty:
+ raise ValueError("No photo supply matches")
+
+ floor_area_decile = cls.classify_floor_area(
+ floor_area, floor_area_decile_thresholds["floor_area_decile_thresholds"].values
+ )
+
+ if floor_area_decile in photo_supply_matched["floor_area_decile"].values:
+ photo_supply_matched = photo_supply_matched[
+ photo_supply_matched["floor_area_decile"] == floor_area_decile
+ ]
+
+ return photo_supply_matched
diff --git a/etl/solar/app.py b/etl/solar/app.py
new file mode 100644
index 00000000..50a3d282
--- /dev/null
+++ b/etl/solar/app.py
@@ -0,0 +1,31 @@
+from pathlib import Path
+from etl.epc.property_change_app import get_cleaned
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+
+DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"
+
+
+def app():
+ """
+ This code reads in the EPC data and attempt to produce a reasonable figure for the photo-supply variable, which
+ is the following:
+ "Percentage of photovoltaic area as a percentage of total roof area. 0% indicates that a Photovoltaic Supply
+ is not present in the property."
+
+ When recommending solar, we want to simulate the retrofit by increasing this value from 0, so we need a sensible
+ figure to increase this to. This script will pull the data for that, to allow us to try and deduce what
+ a sensible figure would be
+ :return:
+ """
+
+ directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
+ cleaned_lookup = get_cleaned()
+
+ solar_data_client = SolarPhotoSupply(
+ file_directories=directories,
+ cleaned_lookup=cleaned_lookup
+ )
+
+ solar_data_client.create_dataset()
+
+ solar_data_client.save()
diff --git a/etl/solar/tests/test_solar_photo_supply.py b/etl/solar/tests/test_solar_photo_supply.py
new file mode 100644
index 00000000..b9b7c09c
--- /dev/null
+++ b/etl/solar/tests/test_solar_photo_supply.py
@@ -0,0 +1,109 @@
+import unittest
+import pandas as pd
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+
+
+class TestSolarPhotoSupply(unittest.TestCase):
+
+ def setUp(self):
+ # Mock data for photo_supply_lookup and floor_area_decile_thresholds
+ self.photo_supply_lookup = pd.DataFrame({
+ "tenure": ["leasehold", "freehold"],
+ "built_form": ["detached", "semi-detached"],
+ "property_type": ["house", "flat"],
+ "construction_age_band": ["pre-1900", "1900-1929"],
+ "is_flat": [False, True],
+ "is_pitched": [True, False],
+ "is_roof_room": [False, True],
+ "floor_area_decile": [0, 1],
+ "photo_supply": [100, 200]
+ })
+
+ self.floor_area_decile_thresholds = pd.DataFrame({
+ "floor_area_decile_thresholds": [50, 100]
+ })
+
+ self.solar_photo_supply = SolarPhotoSupply([], {})
+
+ def test_correct_filtering(self):
+ result = self.solar_photo_supply.filter_photo_supply_lookup(
+ self.photo_supply_lookup,
+ self.floor_area_decile_thresholds,
+ "leasehold",
+ "detached",
+ "house",
+ "pre-1900",
+ False,
+ True,
+ False,
+ 45
+ )
+ self.assertEqual(len(result), 1)
+ self.assertEqual(result.iloc[0]["photo_supply"], 100)
+
+ def test_no_matches(self):
+ with self.assertRaises(ValueError):
+ self.solar_photo_supply.filter_photo_supply_lookup(
+ self.photo_supply_lookup,
+ self.floor_area_decile_thresholds,
+ "leasehold",
+ "unknown",
+ "house",
+ "pre-1900",
+ False,
+ True,
+ False,
+ 45
+ )
+
+ def test_floor_area_decile_matching(self):
+ result = self.solar_photo_supply.filter_photo_supply_lookup(
+ self.photo_supply_lookup,
+ self.floor_area_decile_thresholds,
+ "freehold",
+ "semi-detached",
+ "flat",
+ "1900-1929",
+ True,
+ False,
+ True,
+ 60
+ )
+ self.assertEqual(len(result), 1)
+ self.assertEqual(result.iloc[0]["photo_supply"], 200)
+
+ def test_invalid_parameters(self):
+ with self.assertRaises(AttributeError):
+ self.solar_photo_supply.filter_photo_supply_lookup(
+ self.photo_supply_lookup,
+ self.floor_area_decile_thresholds,
+ 123, # Invalid type for tenure
+ "detached",
+ "house",
+ "pre-1900",
+ False,
+ True,
+ False,
+ 45
+ )
+
+ def test_classify_floor_area(self):
+ # Setup
+ thresholds = [10, 20, 30, 40, 50]
+ solar_photo_supply = SolarPhotoSupply([], {})
+
+ # Test Case 1: Valid floor area
+ floor_area = 25
+ expected_decile = 2
+ result = solar_photo_supply.classify_floor_area(floor_area, thresholds)
+ self.assertEqual(result, expected_decile, "Decile classification did not match expected result")
+
+ # Test Case 2: Out of range floor area
+ floor_area = 60
+ expected_decile = len(thresholds)
+ result = solar_photo_supply.classify_floor_area(floor_area, thresholds)
+ self.assertEqual(result, expected_decile, "Decile classification for out of range value is incorrect")
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/etl/testing_data/estimate_epc.py b/etl/testing_data/estimate_epc.py
new file mode 100644
index 00000000..cd91a540
--- /dev/null
+++ b/etl/testing_data/estimate_epc.py
@@ -0,0 +1,194 @@
+from pathlib import Path
+from random import choices, sample
+
+import os
+import pandas as pd
+from tqdm import tqdm
+from dotenv import load_dotenv
+from utils.logger import setup_logger
+from backend.SearchEpc import SearchEpc, vartypes
+from BaseUtility import Definitions
+from etl.epc.settings import BUILT_FORM_REMAP
+
+ENV_FILE = Path(__file__).parent / "backend" / ".env"
+
+logger = setup_logger()
+
+DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"
+DIR_SAMPLE_SIZE = 500
+N_DIRECTORIES = 50
+
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+load_dotenv(ENV_FILE)
+
+CATETORICALS_TO_IGNORE = [
+ "postcode", "constituency", "local-authority", "built-form", "property-type", "address1", "constituency-label",
+ "building-reference-number", "address2", "posttown", "transaction-type", "lmk-key", "address3",
+ "local-authority-label", "county",
+]
+
+
+def check_numeric_performance(estimated_value, actual_value):
+ # If we don't have anything to compare against, return None
+ if pd.isnull(actual_value):
+ return None
+
+ if pd.isnull(estimated_value):
+ return 1
+
+ if actual_value == 0 and estimated_value == 0:
+ return 0
+
+ if actual_value == 0 and estimated_value != 0:
+ return 1
+
+ return abs(estimated_value - actual_value) / actual_value
+
+
+def app():
+ """
+ This script is used to test the EPC estimation process.
+ """
+
+ numerical_vartypes = {key: value for key, value in vartypes.items() if value in ["float", "Int64"]}
+ str_var_types = {key: value for key, value in vartypes.items() if value == "str"}
+ # Make sure we have missed any keys
+ if len(numerical_vartypes) + len(str_var_types) != len(vartypes):
+ raise ValueError("Not all vartypes have been accounted for")
+
+ # Drop some keys that aren't important
+ for k in CATETORICALS_TO_IGNORE:
+ str_var_types.pop(k, None)
+
+ directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
+
+ directory_sample = choices(directories, k=N_DIRECTORIES)
+
+ results = []
+
+ for directory in tqdm(directory_sample):
+ filepath = directory / "certificates.csv"
+ df = pd.read_csv(filepath, low_memory=False)
+ df["UPRN"] = df["UPRN"].astype("Int64").astype("str")
+ df = df[~pd.isnull(df["UPRN"])]
+
+ # uprn_sample = sample(df["UPRN"].unique().tolist(), DIR_SAMPLE_SIZE)
+ # Take a fixed sample based on the first DIR_SAMPLE_SIZE uprns
+ uprn_sample = sorted(df["UPRN"].unique().tolist())[:DIR_SAMPLE_SIZE]
+ df_sample = df[df["UPRN"].isin(uprn_sample)]
+ # Take the record with the newest LODGEMENT_DATETIME by uprn
+ df_sample = df_sample.sort_values("LODGEMENT_DATETIME", ascending=False).drop_duplicates("UPRN")
+ # Convert the columns to lower case and replace underscores with hyphens, the same as the api
+ df_sample.columns = df_sample.columns.str.lower().str.replace("_", "-")
+
+ # For each epc, we test the estimation process
+ for _, epc in df_sample.iterrows():
+ epc = epc.to_dict()
+ address1 = epc["address1"]
+ postcode = epc["postcode"]
+
+ # Get all EPCs for this urpn and we make sure they get dropped from the estimate_epc function
+ epcs_for_uprn = df[df["UPRN"] == epc["uprn"]]
+ lmks_to_drop = epcs_for_uprn["LMK_KEY"].tolist()
+ searcher = SearchEpc(address1, postcode, auth_token=EPC_AUTH_TOKEN, os_api_key="")
+ searcher.uprn = epc["uprn"]
+
+ # Perform the same remapping for built-form as in the Property class for this test, in case we get (e.g.)
+ # Enclosed End-Terrace
+ built_form = BUILT_FORM_REMAP.get(epc["built-form"], epc["built-form"])
+ if ((epc["property-type"] == "Maisonette") & (built_form == "Detached")) or (
+ built_form in Definitions.DATA_ANOMALY_MATCHES
+ ):
+ built_form = ""
+
+ estimated_epc = searcher.estimate_epc(
+ property_type=epc["property-type"], built_form=built_form, lmks_to_drop=lmks_to_drop
+ )
+
+ # We now compare the difference between the estimated and original
+ # TODO: We can convert windows and lighting to numeric versions and estimate how close we are
+ numeric_performance = {
+ key: check_numeric_performance(estimated_epc[key], epc[key]) for key, value in
+ numerical_vartypes.items()
+ }
+
+ # Remove Nones
+ numeric_performance = {key: value for key, value in numeric_performance.items() if value is not None}
+ # Get an average
+ numeric_performance = sum(numeric_performance.values()) / len(numeric_performance)
+ numeric_success = 1 - numeric_performance
+
+ # categorical performance
+ categorical_performance = {
+ key: 0 if estimated_epc[key] != epc[key] else 1 for key, value in str_var_types.items()
+ }
+ # Get an average
+ categorical_success = sum(categorical_performance.values()) / len(categorical_performance)
+
+ results.append(
+ {
+ "uprn": epc["uprn"],
+ "numeric_success": numeric_success,
+ "categorical_success": categorical_success,
+ "property_type": epc["property-type"],
+ "built_form": epc["built-form"],
+ "tenure": epc["tenure"],
+ }
+ )
+
+ # Get aggregate performance figures
+ results_df = pd.DataFrame(results)
+ results_df["tenure"] = results_df["tenure"].replace("Rented (social)", "rental (social)")
+
+ avg_numeric_succes = results_df["numeric_success"].median()
+ avg_categorical_sucess = results_df["categorical_success"].median()
+
+ # With 20 nearest homes
+ # 0.7718100840549558
+ # 0.5116279069767442
+ # 100 nearest homes
+ # 0.7859617377809409
+ # 0.5348837209302325
+
+ # Fixed sample, sqrt weights
+
+ # Group by tenure
+ by_tenure = results_df.groupby("tenure").agg(
+ {"numeric_success": "median", "categorical_success": "median", "uprn": "count"}
+ )
+ pd.set_option('display.max_rows', 500)
+ pd.set_option('display.max_columns', 500)
+ pd.set_option('display.width', 1000)
+
+ # With 20 nearest homes
+ # numeric_success categorical_success uprn
+ # tenure
+ # NO DATA! 0.847840 0.581395 278
+ # Not defined - use in the case of a new dwelling... 0.930282 0.651163 617
+ # Owner-occupied 0.770330 0.511628 2588
+ # Rented (private) 0.791885 0.558140 1232
+ # owner-occupied 0.741088 0.488372 10912
+ # rental (private) 0.749064 0.488372 3252
+ # rental (social) 0.822109 0.581395 3878
+ # unknown 0.895840 0.627907 1820
+
+ # 100 nearest homes
+ # tenure
+ # NO DATA! 0.899566 0.604651 233
+ # Not defined - use in the case of a new dwelling... 0.927518 0.674419 608
+ # Owner-occupied 0.777026 0.511628 3167
+ # Rented (private) 0.805646 0.534884 1316
+ # owner-occupied 0.762180 0.488372 10835
+ # rental (private) 0.760503 0.511628 3181
+ # rental (social) 0.830057 0.604651 3705
+ # unknown 0.899948 0.627907 1571
+
+ # By property type - we also want to see how many properties we have for each property type
+ by_property_type = results_df.groupby("property_type").agg(
+ {"numeric_success": "median", "categorical_success": "median", "uprn": "count"}
+ )
+ # By property_type & built form
+ by_property_type_built_form = results_df.groupby(["property_type", "built_form"]).agg(
+ {"numeric_success": "median", "categorical_success": "median", "uprn": "count"}
+ )
diff --git a/etl/testing_data/no_epc_input.py b/etl/testing_data/no_epc_input.py
new file mode 100644
index 00000000..0745ff7a
--- /dev/null
+++ b/etl/testing_data/no_epc_input.py
@@ -0,0 +1,42 @@
+"""
+This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
+testing
+"""
+import pandas as pd
+from utils.s3 import save_csv_to_s3
+
+USER_ID = 8
+PORTFOLIO_ID = 57
+
+
+def app():
+ """
+ This portfolio is for testing windows recommendations
+ :return:
+ """
+
+ test_file = pd.DataFrame(
+ [
+ {"address": "21 Butler House", "postcode": "E2 0PN", "Notes": None},
+ {"address": "22 Butler House", "postcode": "E2 0PN", "Notes": None},
+ {"address": "23 Butler House", "postcode": "E2 0PN", "Notes": None},
+ {"address": "24 Butler House", "postcode": "E2 0PN", "Notes": None},
+ ]
+ )
+
+ # Store the data in s3
+ filename = f"{USER_ID}/{PORTFOLIO_ID}/no_epc.csv"
+ save_csv_to_s3(
+ dataframe=test_file,
+ bucket_name="retrofit-plan-inputs-dev",
+ file_name=filename
+ )
+
+ body = {
+ "portfolio_id": str(PORTFOLIO_ID),
+ "housing_type": "Social",
+ "goal": "Increase EPC",
+ "goal_value": "A",
+ "trigger_file_path": filename
+ }
+ print(body)
diff --git a/etl/testing_data/windows_portfolio.py b/etl/testing_data/windows_portfolio.py
new file mode 100644
index 00000000..356d107e
--- /dev/null
+++ b/etl/testing_data/windows_portfolio.py
@@ -0,0 +1,43 @@
+"""
+This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
+testing
+"""
+import pandas as pd
+from utils.s3 import save_csv_to_s3
+
+USER_ID = 8
+PORTFOLIO_ID = 56
+
+
+def app():
+ """
+ This portfolio is for testing windows recommendations
+ :return:
+ """
+
+ test_file = pd.DataFrame(
+ [
+ {"address": "3 Church Terrace", "postcode": "LE13 0PW", "Notes": None},
+ {"address": "3, Main Street, Redmile", "postcode": "NG13 0GA", "Notes": None},
+ {"address": "Manor House, Kennel Lane, Reepham", "postcode": "LN3 4DZ", "Notes": None},
+ {"address": "13 Main Street", "postcode": "LE14 2JU", "Notes": None},
+ {"address": "8 The Crescent, Coston Road, Buckminster", "postcode": "NG33 5SF", "Notes": None},
+ ]
+ )
+
+ # Store the data in s3
+ filename = f"{USER_ID}/{PORTFOLIO_ID}/windows_portfolio_inputs.csv"
+ save_csv_to_s3(
+ dataframe=test_file,
+ bucket_name="retrofit-plan-inputs-dev",
+ file_name=filename
+ )
+
+ body = {
+ "portfolio_id": str(PORTFOLIO_ID),
+ "housing_type": "Social",
+ "goal": "Increase EPC",
+ "goal_value": "A",
+ "trigger_file_path": filename
+ }
+ print(body)
diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index 0d9031b2..106f4453 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -18,6 +18,25 @@ regional_labour_variations = [
{"Region": "Northern Ireland", "Adjustment_Factor": 0.76}
]
+# This data is based on the MCS database
+MCS_SOLAR_PV_COST_DATA = {
+ "last_updated": "2024-01-04",
+ "average_cost_per_kwh": 2013.94,
+ "average_cost_per_kwh-Outer London": 2618.75,
+ "average_cost_per_kwh-Inner London": 2618.75,
+ "average_cost_per_kwh-South East England": 2083.33,
+ "average_cost_per_kwh-South West England": 2113,
+ "average_cost_per_kwh-East of England": 1973.86,
+ "average_cost_per_kwh-East Midlands": 1981.86,
+ "average_cost_per_kwh-West Midlands": 1926.55,
+ "average_cost_per_kwh-North East England": 2028.49,
+ "average_cost_per_kwh-North West England": 1620.42,
+ "average_cost_per_kwh-Yorkshire and the Humber": 2060.9,
+ "average_cost_per_kwh-Wales": 1898.83,
+ "average_cost_per_kwh-Scotland": 1967.97,
+ "average_cost_per_kwh-Northern Ireland": 2126.09,
+}
+
class Costs:
"""
@@ -42,7 +61,7 @@ class Costs:
# We use a higher contingency rate for internal wall insulation because of the potential for issues with moving
# fittings and trimming doors, as well as scope for damage to the existing wall during preparation.
- IWI_CONTINGENCY = 0.15
+ IWI_CONTINGENCY = 0.2
# Where there is more uncertainty, a higher contingency rate is used
HIGH_RISK_CONTINGENCY = 0.2
@@ -58,12 +77,22 @@ class Costs:
# have a preliminaries of 12-14% so we use 12% as the median for the preliminaries rate.
# For External wall insulation (EWI), we use 15% as the preliminaries rate if we think the property might
# need scaffolding, otherwise we use 12%. This is to account for any site preparation that might be required
- EWI_NO_SCAFFOLDING_PRELIMINARIES = 0.15
- EWI_SCAFFOLDING_PRELIMINARIES = 0.20
+ EWI_NO_SCAFFOLDING_PRELIMINARIES = 0.2
+ EWI_SCAFFOLDING_PRELIMINARIES = 0.25
VAT_RATE = 0.2
PROFIT_MARGIN = 0.2
+ # Based on this greenmatch article, on average, a Sash window is around 50% more expensive than a casement window.
+ # Therefore, for a conservative cost estimate, and allowance for a more premium window type, we inflate the material
+ # cost of the windows to allow for a sash window type
+ # https://www.greenmatch.co.uk/windows/double-glazing/cost
+ SASH_WINDOW_INFLATION_FACTOR = 1.5
+
+ # Typically, secondary glazing can be installed for 25% of the cost of double glazed windows - to be conservative,
+ # we scale the cost by half
+ SECONDARY_GLAZING_SCALING_FACTOR = 0.5
+
def __init__(self, property_instance):
"""
Initializes the Costs class with a property instance.
@@ -147,12 +176,16 @@ class Costs:
"""
material_cost_per_m2 = material["material_cost"]
+ # We inflate material costs due to recent price increases
+ material_cost_per_m2 = material_cost_per_m2 * 1.5
+
base_material_cost = material_cost_per_m2 * floor_area
labour_cost = material["labour_cost"] * floor_area * self.labour_adjustment_factor
subtotal_before_profit = base_material_cost + labour_cost
- contingency_cost = subtotal_before_profit * self.CONTINGENCY
+ # We use high risk contingency because of the possibility of access issues and clearing existing insulation
+ contingency_cost = subtotal_before_profit * self.HIGH_RISK_CONTINGENCY
preliminaries_cost = subtotal_before_profit * self.PRELIMINARIES
profit_cost = subtotal_before_profit * self.PROFIT_MARGIN
@@ -719,3 +752,121 @@ class Costs:
"labour_days": labour_days,
"labour_cost": labour_costs
}
+
+ def window_glazing(self, number_of_windows, material, is_secondary_glazing=False):
+ """
+ We characterise the jobs to be done for window glazing as the following:
+ 1) Initial Assessment and Measurements: Before removing the existing window, it's essential to assess the
+ condition of the window frame and opening. Precise measurements are taken to ensure the new double glazed
+ windows fit perfectly.
+
+ 2) Remove the Existing Window: This involves carefully dismantling and removing the old single glazed window. It
+ requires skill to avoid damaging the surrounding wall and the window frame (if it's to be reused).
+
+ 3) Dispose of the Existing Window: The old window, especially if it's a single glazed unit, needs to be
+ disposed of responsibly. Glass and other materials should be recycled where possible.
+
+ 4) Surface Preparation: The window opening might need some preparation, especially if there's damage or if
+ adjustments are needed to accommodate the new window. This can include repairing or replacing parts of the
+ window frame, sealing gaps, and ensuring the opening is level and square.
+
+ 5) Install the Window Frame (if new frames are used): In many cases, double glazed windows come with their
+ frames. These need to be installed securely into the window opening. This process involves aligning, leveling,
+ and fixing the frame in place.
+
+ 6) Install the Window Sill: If a new window sill is required, it is installed at this stage. It needs to be
+ correctly aligned with the frame and securely attached.
+
+ 7) Install the Double Glazed Glass Units: The glass units are carefully inserted into the frame. This step
+ requires precision to ensure a snug fit without causing stress on the glass, which could lead to cracking or
+ breaking.
+
+ 8) Sealing and Weatherproofing: After the glass units are in place, it's crucial to seal around the frame and
+ between the glass and frame to ensure there are no drafts and that the installation is weather-tight. This
+ typically involves applying silicone sealant or other appropriate sealing materials.
+
+ 9) Finishing Touches: This includes any cosmetic work, such as trimming, painting, or staining the frame and
+ sill to match the rest of the property. It might also involve cleaning up any mess created during the
+ installation.
+
+ 10) Inspection and Testing: Finally, the new windows should be inspected to ensure they open, close, and lock
+ correctly. This is also a good time to check for any gaps or issues with the sealing.
+
+ For this cost estimation process, we factor in initial assement into the preliminaries
+
+ """
+
+ material_cost = material["material_cost"] * number_of_windows
+
+ labour_cost = (
+ material["labour_cost"] * number_of_windows * self.labour_adjustment_factor
+ )
+ multiplier = self.SECONDARY_GLAZING_SCALING_FACTOR if is_secondary_glazing else (
+ self.SASH_WINDOW_INFLATION_FACTOR)
+
+ subtotal = (material_cost + labour_cost) * multiplier
+
+ contingency_cost = subtotal * self.CONTINGENCY
+ preliminaries_cost = subtotal * self.PRELIMINARIES
+ profit_cost = subtotal * self.PROFIT_MARGIN
+
+ subtotal_before_vat = subtotal + contingency_cost + preliminaries_cost + profit_cost
+
+ vat_cost = subtotal_before_vat * self.VAT_RATE
+
+ total_cost = subtotal_before_vat + vat_cost
+
+ labour_hours = material["labour_hours_per_unit"] * number_of_windows
+ labour_hours = labour_hours * self.SECONDARY_GLAZING_SCALING_FACTOR if is_secondary_glazing else labour_hours
+
+ # Assume a team of 2
+ labour_days = (labour_hours / 8) / 2
+
+ return {
+ "total": total_cost,
+ "subtotal": subtotal_before_vat,
+ "vat": vat_cost,
+ "contingency": contingency_cost,
+ "preliminaries": preliminaries_cost,
+ "material": material_cost,
+ "profit": profit_cost,
+ "labour_hours": labour_hours,
+ "labour_cost": labour_cost,
+ "labour_days": labour_days
+ }
+
+ def solar_pv(self, wattage: float):
+
+ """
+ Calculates the total cost for solar PV based data provided by the MCS dashboard, which contains
+ costing data for installations of renewable and clean energy measures.
+
+ The data in the dashboard is filtered on domestic building installations and then the data across the
+ various regions is manually collected. There is currently no automated way to get the data from the MCS
+ dashboard
+
+ Price can also be benchmarked against this checkatrade article:
+ https://www.checkatrade.com/blog/cost-guides/cost-of-solar-panel-installation/
+ :param wattage: Peak wattage of the solar PV system
+ :return:
+ """
+
+ # Get the cost data relevant to the region
+ regional_cost = MCS_SOLAR_PV_COST_DATA["-".join(["average_cost_per_kwh", self.region])]
+
+ kw = wattage / 1000
+ total_cost = kw * regional_cost
+
+ subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
+ vat = total_cost - subtotal_before_vat
+
+ # Labour hours are based on estimates from online research but an average team seems to consist of 3 people
+ # and most jobs take around 2 days. Assuming an 8 hour day for 3 people across 2 days, gives us 72 hours of
+ # labour
+ return {
+ "total": total_cost,
+ "subtotal": subtotal_before_vat,
+ "vat": vat,
+ "labour_hours": 72,
+ "labour_days": 2,
+ }
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 60cdb696..2b35ffea 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -6,6 +6,8 @@ from recommendations.RoofRecommendations import RoofRecommendations
from recommendations.VentilationRecommendations import VentilationRecommendations
from recommendations.FireplaceRecommendations import FireplaceRecommendations
from recommendations.LightingRecommendations import LightingRecommendations
+from recommendations.SolarPvRecommendations import SolarPvRecommendations
+from recommendations.WindowsRecommendations import WindowsRecommendations
from backend.ml_models.AnnualBillSavings import AnnualBillSavings
@@ -35,6 +37,8 @@ class Recommendations:
)
self.fireplace_recommender = FireplaceRecommendations(property_instance=property_instance)
self.lighting_recommender = LightingRecommendations(property_instance=property_instance, materials=materials)
+ self.windows_recommender = WindowsRecommendations(property_instance=property_instance, materials=materials)
+ self.solar_recommender = SolarPvRecommendations(property_instance=property_instance)
def recommend(self):
@@ -77,6 +81,16 @@ class Recommendations:
if self.lighting_recommender.recommendation:
property_recommendations.append(self.lighting_recommender.recommendation)
+ # Windows recommendations
+ self.windows_recommender.recommend()
+ if self.windows_recommender.recommendation:
+ property_recommendations.append(self.windows_recommender.recommendation)
+
+ # Solar recommendations
+ self.solar_recommender.recommend()
+ if self.solar_recommender.recommendation:
+ property_recommendations.append(self.solar_recommender.recommendation)
+
# We insert temporary ids into the recommendations which is important for the optimiser later
property_recommendations = self.insert_temp_recommendation_id(property_recommendations)
@@ -148,6 +162,8 @@ class Recommendations:
# For the moment, we cap the number of SAP points that can be achieved by ventilation at 2
rec["sap_points"] = min(rec["sap_points"], VentilationRecommendations.SAP_LIMIT)
+ # Round to 2 decimal places
+ rec["sap_points"] = round(rec["sap_points"], 2)
rec["co2_equivalent_savings"] = float(property_instance.data["co2-emissions-current"]) - new_carbon
# Energy consumption current is per meter squared, so we need to multiply by the floor area to get
diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
new file mode 100644
index 00000000..01cd4f17
--- /dev/null
+++ b/recommendations/SolarPvRecommendations.py
@@ -0,0 +1,65 @@
+import numpy as np
+from recommendations.Costs import Costs
+
+
+class SolarPvRecommendations:
+ # Approximate area of the solar panels
+ SOLAR_PANEL_AREA = 1.6
+ # Wattage per panel
+ SOLAR_PANEL_WATTAGE = 360
+
+ def __init__(self, property_instance):
+ """
+ :param property_instance: Instance of the Property class, for the home associated to property_id
+ """
+
+ self.property = property_instance
+ self.costs = Costs(self.property)
+
+ self.recommendation = []
+
+ def recommend(self):
+ """
+ We check if a property is potentially suitable for solar PV based on the following criteria:
+ - The property is a house or bungalow
+ - The property has a flat or pitched roof
+ - The property does not have existing solar pv
+ :return:
+ """
+
+ is_valid_property_type = self.property.data["property-type"] in ["House", "Bungalow"]
+ is_valid_roof_type = (
+ self.property.roof["is_flat"] or self.property.roof["is_pitched"] or self.property.roof["is_roof_room"]
+ )
+ # If there is no existing solar PV, the photo-supply field will be None or a missing value
+ has_no_existing_solar_pv = self.property.data["photo-supply"] in [
+ None, 0, self.property.DATA_ANOMALY_MATCHES
+ ]
+
+ if not is_valid_property_type or not is_valid_roof_type or not has_no_existing_solar_pv:
+ return
+
+ # We now have a property which is potentially suitable for solar PV
+ number_solar_panels = np.floor(self.property.solar_pv_roof_area / self.SOLAR_PANEL_AREA)
+ solar_panel_wattage = number_solar_panels * self.SOLAR_PANEL_WATTAGE
+
+ # Given the wattage, we estimate the cost of the solar PV system. This is based on the MCS database
+ # of solar PV installations
+ cost_result = self.costs.solar_pv(wattage=solar_panel_wattage)
+
+ kw = int(np.round(solar_panel_wattage / 1000))
+
+ self.recommendation = [
+ {
+ "parts": [],
+ "type": "solar_pv",
+ "description": f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) panel system on the roof",
+ "starting_u_value": None,
+ "new_u_value": None,
+ "sap_points": None,
+ **cost_result,
+ # This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we scale
+ # back up here
+ "photo_supply": 100 * self.property.solar_pv_percentage
+ }
+ ]
diff --git a/recommendations/WindowsRecommendations.py b/recommendations/WindowsRecommendations.py
new file mode 100644
index 00000000..b6ecd099
--- /dev/null
+++ b/recommendations/WindowsRecommendations.py
@@ -0,0 +1,97 @@
+from typing import List
+
+import numpy as np
+
+from backend.Property import Property
+from recommendations.Costs import Costs
+
+
+class WindowsRecommendations:
+ # If the property has existing glazing, we scale down the number of windows that need to be glazed
+ COVERAGE_MAP = {
+ # If most of the windows have already been glazed, we assume that 2/3 are glazed and 1/2 are remaining to be
+ # glazed
+ "most": 0.33,
+ # If glazing is partial, we assume 50/50 split between glazed and unglazed
+ "partial": 0.5
+ }
+
+ def __init__(self, property_instance: Property, materials: List):
+ self.property = property_instance
+ self.costs = Costs(self.property)
+
+ self.recommendation = []
+
+ self.glazing_material = [
+ material for material in materials if material["type"] == "windows_glazing"
+ ]
+
+ if len(self.glazing_material) != 1:
+ raise ValueError("There should only be one window glazing material")
+ self.glazing_material = self.glazing_material[0]
+
+ def recommend(self):
+ """
+ This method will recommend the best possible glazing options for a property.
+
+ In order to do this, we need to estimate the number of windows that the home has. This information will be
+ stored in the property object, under property.number_of_windows
+ :return:
+ """
+
+ # If the property is in a conservation area or is a listed building, it becomes more difficult to install
+ # double glazing. Therefore, we don't recommend it. It is still possible but is not practical as it
+ # requires planning permission and might require a more expensive window type, such as timber.
+
+ number_of_windows = self.property.number_of_windows
+ is_secondary_glazing = self.property.restricted_measures or (
+ self.property.windows["glazing_type"] == "secondary"
+ )
+
+ if not number_of_windows:
+ raise ValueError("Number of windows not specified")
+
+ if self.property.windows["has_glazing"] & (self.property.windows["glazing_coverage"] == "full"):
+ return
+
+ # We scale the number of windows based on the proportion of existing glazing
+ if self.property.data["multi-glaze-proportion"] != "":
+ n_windows_scalar = 1 - (int(self.property.data["multi-glaze-proportion"]) / 100)
+ else:
+ n_windows_scalar = self.COVERAGE_MAP.get(self.property.windows["glazing_coverage"], 1)
+
+ number_of_windows *= n_windows_scalar
+ number_of_windows = np.ceil(number_of_windows)
+
+ # We then price the job based on the number of windows that there are
+ cost_result = self.costs.window_glazing(
+ number_of_windows=number_of_windows,
+ material=self.glazing_material,
+ is_secondary_glazing=is_secondary_glazing
+ )
+
+ glazing_type = "secondary glazing" if is_secondary_glazing else "double glazing"
+ if self.property.windows["glazing_coverage"] in ["partial", "most"]:
+ description = f"Install {glazing_type} to the remaining windows"
+ else:
+ description = f"Install {glazing_type} to all windows"
+
+ if self.property.is_listed:
+ description += ". Secondary glazing recommended due to listed building status"
+ elif self.property.is_heritage:
+ description += ". Secondary glazing recommended due to herigate building status"
+ elif self.property.in_conservation_area:
+ description += ". Secondary glazing recommended due to conservation area status"
+
+ self.recommendation = [
+ {
+ "parts": [],
+ "type": "windows_glazing",
+ "description": description,
+ "starting_u_value": None,
+ "new_u_value": None,
+ "sap_points": None,
+ **cost_result,
+ "is_secondary_glazing": is_secondary_glazing
+ }
+ ]
diff --git a/recommendations/recommendation_utils.py b/recommendations/recommendation_utils.py
index 01ef8480..872a1c5b 100644
--- a/recommendations/recommendation_utils.py
+++ b/recommendations/recommendation_utils.py
@@ -1,4 +1,5 @@
import math
+from datetime import datetime
from copy import deepcopy
from typing import Union
@@ -565,7 +566,7 @@ def estimate_external_wall_area(num_floors, floor_height, perimeter, built_form)
'Detached': 4,
}
- exposed_wall_area = total_wall_area * (number_exposed_walls[built_form] / 4)
+ exposed_wall_area = total_wall_area * (number_exposed_walls.get(built_form, 3) / 4)
return exposed_wall_area
@@ -669,3 +670,87 @@ def esimtate_pitched_roof_area(floor_area: float, floor_height: float) -> float:
area = 2 * (slope * wall_width)
return area
+
+
+def estimate_windows(
+ property_type, built_form, construction_age_band, floor_area, number_habitable_rooms, extension_count
+):
+ # Base window count based on habitable rooms
+ window_count = number_habitable_rooms
+
+ # Additional windows for non-habitable rooms (e.g., kitchen, bathroom)
+ # Assuming most houses will have at least one kitchen and one bathroom
+ # Scale non-habitable windows with the number of habitable rooms
+ non_habitable_base = 2 # Base for kitchen and bathroom
+ extra_non_habitable = max(0, (number_habitable_rooms - 3) // 2) # Extra for large houses
+ window_count += non_habitable_base + extra_non_habitable
+
+ # Adjustments based on built form and property type
+ if property_type in ["House", "Bungalow"] and built_form in ["Semi-Detached", "Detached"]:
+ built_form_lookup = {
+ "Semi-Detached": 3,
+ "Detached": 4,
+ }
+ else:
+ # For Flats and Maisonettes, adjustments might be less
+ built_form_lookup = {
+ "Mid-Terrace": 0,
+ "End-Terrace": 1,
+ "Semi-Detached": 1,
+ "Detached": 2,
+ }
+ window_count += built_form_lookup.get(built_form, 0)
+
+ # Adjust for floor area (larger floor area might indicate more rooms/windows)
+ if floor_area < 85: # Small to medium properties
+ # Standard window count likely sufficient
+ pass
+ elif 85 <= floor_area <= 120: # Medium to large properties
+ # More rooms or larger rooms likely, potentially more windows
+ window_count += 1
+ elif floor_area > 120: # Very large properties
+ # Likely to have significantly more or larger rooms
+ window_count += 2
+
+ # Adjust for construction age band
+ if construction_age_band in ["England and Wales: before 1900", "England and Wales: 1900-1929"]:
+ # Older houses with smaller, more numerous windows
+ window_count += 1
+
+ # Adjust for extensions (each extension might add windows)
+ window_count += extension_count
+
+ # Adjustments for specific property types
+ if property_type in ["Flat", "Maisontte"]:
+ # Flats might have fewer windows due to shared walls
+ # Maisonettes might follow a similar pattern to flats or small houses
+ window_count -= 1
+
+ # Ensure window count is not negative
+ if window_count < 0:
+ raise ValueError("Window count cannot be negative.")
+
+ return window_count
+
+
+def calculate_cavity_age(newest_epc, older_epcs, cleaned):
+ all_epcs = [newest_epc] + older_epcs
+
+ df = []
+ for x in all_epcs:
+ # Get the cleaned mapping
+ mapped = [y for y in cleaned["walls-description"] if y["original_description"] == x["walls-description"]]
+ if not mapped:
+ continue
+ df.append(
+ {
+ **mapped[0],
+ "inspection-date": x["lodgement-date"],
+ }
+ )
+
+ df = pd.DataFrame(df)
+ df = df[df["is_cavity_wall"] & df["is_filled_cavity"]]
+
+ cavity_age = (datetime.now() - pd.to_datetime(df["inspection-date"].max())).days
+ return cavity_age
diff --git a/recommendations/tests/test_costs.py b/recommendations/tests/test_costs.py
index 1d519b91..402e38eb 100644
--- a/recommendations/tests/test_costs.py
+++ b/recommendations/tests/test_costs.py
@@ -1,6 +1,7 @@
from recommendations.Costs import Costs
from unittest.mock import Mock
import datetime
+import pytest
class TestCosts:
@@ -58,9 +59,9 @@ class TestCosts:
)
assert loft_results == {
- 'total': 430.21445040000003, 'subtotal': 358.512042, 'vat': 71.70240840000001,
- 'contingency': 25.608003000000004, 'preliminaries': 25.608003000000004, 'material': 198.29923000000002,
- 'profit': 51.21600600000001, 'labour_hours': 3.685, 'labour_cost': 57.7808, 'labour_days': 0.460625
+ 'total': 639.4133610000001, 'subtotal': 532.8444675000001, 'vat': 106.56889350000002,
+ 'contingency': 71.045929, 'preliminaries': 35.5229645, 'material': 297.448845, 'profit': 71.045929,
+ 'labour_hours': 3.685, 'labour_cost': 57.7808, 'labour_days': 0.460625
}
def test_internal_wall_insulation(self):
@@ -176,11 +177,9 @@ class TestCosts:
)
assert iwi_results == {
- 'total': 6650.889456921851, 'subtotal': 5542.407880768209, 'vat': 1108.4815761536418,
- 'contingency': 573.3525393898148, 'preliminaries': 382.2350262598765,
- 'material': 1747.488000615996,
- 'profit': 764.470052519753, 'labour_hours': 88.23759388401297,
- 'labour_days': 2.757424808875405,
+ 'total': 6880.2304726777775, 'subtotal': 5733.525393898148, 'vat': 1146.7050787796295,
+ 'contingency': 764.470052519753, 'preliminaries': 382.2350262598765, 'material': 1747.488000615996,
+ 'profit': 764.470052519753, 'labour_hours': 88.23759388401297, 'labour_days': 2.757424808875405,
'labour_cost': 1927.1602026551818
}
@@ -414,8 +413,8 @@ class TestCosts:
)
assert ewi_results == {
- 'total': 14561.688989159393, 'subtotal': 12134.740824299493, 'vat': 2426.948164859899,
- 'contingency': 808.9827216199662, 'preliminaries': 1617.9654432399325, 'material': 4020.565147410677,
+ 'total': 15047.078622131372, 'subtotal': 12539.232185109477, 'vat': 2507.8464370218953,
+ 'contingency': 808.9827216199662, 'preliminaries': 2022.4568040499155, 'material': 4020.565147410677,
'profit': 1617.9654432399325, 'labour_hours': 187.02533486285358, 'labour_days': 5.8445417144641745,
'labour_cost': 3921.5600094613983
}
@@ -499,3 +498,48 @@ class TestCosts:
'labour_hours': 24.79, 'labour_days': 1.549375, 'labour_cost': 186.9032}
assert costs.labour_adjustment_factor == 0.88
+
+ # Mock property instance for regional tests
+ @pytest.fixture(params=[
+ ("Northamptonshire", "East Midlands", 7927.44),
+ ("Greater London Authority", "Inner London", 10475.0),
+ ("Adur", "South East England", 8333.32),
+ ("Bournemouth", "South West England", 8452),
+ ("Basildon", "East of England", 7895.44),
+ ("Birmingham", "West Midlands", 7706.2),
+ ("County Durham", "North East England", 8113.96),
+ ("Allerdale", "North West England", 6481.68),
+ ("York", "Yorkshire and the Humber", 8243.6),
+ ("Cardiff", "Wales", 7595.32),
+ ("Glasgow City", "Scotland", 7871.88),
+ ("Belfast", "Northern Ireland", 8504.36)
+ ])
+ def mock_property_with_region(self, request):
+ county, region, expected_cost = request.param
+ mock_property = Mock()
+ mock_property.data = {"county": county}
+ return mock_property, region, expected_cost
+
+ # Test for different wattages
+ @pytest.mark.parametrize("wattage, expected_cost", [
+ (3000, 5945.58),
+ (4000, 7927.44),
+ (5000, 9909.3),
+ (6000, 11891.16),
+ ])
+ def test_solar_pv_different_wattages(self, wattage, expected_cost):
+ mock_property = Mock()
+ mock_property.data = {"county": "Mansfield"}
+ costs = Costs(mock_property)
+ result = costs.solar_pv(wattage)
+ assert result['total'] == pytest.approx(expected_cost, rel=0.01)
+
+ def test_solar_pv_regional_variation(self, mock_property_with_region):
+ # Test for regional cost variations
+ property_instance, expected_region, expected_cost = mock_property_with_region
+ costs = Costs(property_instance)
+
+ assert costs.region == expected_region
+
+ result = costs.solar_pv(4000) # Testing with a fixed wattage of 4000
+ assert result['total'] == pytest.approx(expected_cost, rel=0.01)
diff --git a/recommendations/tests/test_data/materials.py b/recommendations/tests/test_data/materials.py
index d7241be5..187d1401 100644
--- a/recommendations/tests/test_data/materials.py
+++ b/recommendations/tests/test_data/materials.py
@@ -942,8 +942,24 @@ materials = [
'https://www.hamuch.com/cost/led-spot-light#:~:text=It%20costs%20an%20average%20of,'
'will%20drive%20up%20the%20cost.',
'created_at': datetime.datetime(2023, 11, 28, 22, 49, 12, 244907), 'is_active': True, 'prime_material_cost': None,
- 'material_cost': 20.0, 'labour_cost': 46.0, 'labour_hours_per_unit': 0.8, 'plant_cost': 0.0, 'total_cost': 66.0,
+ 'material_cost': 20.0, 'labour_cost': 15.0, 'labour_hours_per_unit': 0.8, 'plant_cost': 0.0, 'total_cost': 66.0,
'notes': 'We estimate the unit economics from the checkatrade article. We assume that the average job consists '
'of installing 6 lights based on the hamuch article. We use the median value of 400 for a job of 6 '
- 'lights'}
+ 'lights'},
+ {'id': 1235, 'type': 'windows_glazing',
+ 'description': 'uPVC windows; Profile 22 or other equal and approved; reinforced where appropriate with '
+ 'aluminium alloy; in refurbishment work, including standard ironmongery; sills and factory glazed '
+ 'with low-e 24 mm double glazing; removing existing windows and fixing new in position; including '
+ 'lugs plugged and screwed to brickwork or blockwork; Casement/fixed light; including vents; '
+ 'e.p.d.m. glazing gaskets and weather seals; 1770 mm × 1200 mm; ref P312WW',
+ 'depth': 0.0, 'depth_unit': None, 'cost': None, 'cost_unit': 'gbp_per_unit', 'r_value_per_mm': None,
+ 'r_value_unit': 'square_meter_kelvin_per_watt', 'thermal_conductivity': None, 'thermal_conductivity_unit': None,
+ 'link': 'SPONs',
+ 'created_at': datetime.datetime(2023, 11, 28, 22, 49, 12, 244907),
+ 'is_active': True, 'prime_material_cost': 176.55,
+ 'material_cost': 182.25, 'labour_cost': 163.36, 'labour_hours_per_unit': 6.5, 'plant_cost': 0.0,
+ 'total_cost': 345.61,
+ 'notes': 'This is the cost of removal of existing windows and installation of new windows. This is a casement '
+ 'style window, which is the most common but also the cheapest style. In the cost estimation framework, '
+ 'we can inflate prices for different finishes, to be conservative on price.'}
]
diff --git a/recommendations/tests/test_fireplace_recommendations.py b/recommendations/tests/test_fireplace_recommendations.py
index 570fbb5c..a91d6697 100644
--- a/recommendations/tests/test_fireplace_recommendations.py
+++ b/recommendations/tests/test_fireplace_recommendations.py
@@ -6,7 +6,7 @@ from recommendations.FireplaceRecommendations import FireplaceRecommendations
class TestFirepaceRecommendations:
def test_no_fireplaces(self):
- property_instance = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
+ property_instance = Property(id=0, address="fake", postcode="fake")
property_instance.data = {
"number-open-fireplaces": 0
}
@@ -22,7 +22,7 @@ class TestFirepaceRecommendations:
assert recommender.recommendation is None
def test_one_fireplace(self):
- property_instance = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
+ property_instance = Property(id=0, address="fake", postcode="fake")
property_instance.data = {
"number-open-fireplaces": 1
}
@@ -40,7 +40,7 @@ class TestFirepaceRecommendations:
assert recommender.recommendation[0]["total"] == 300
def test_multiple_fireplaces(self):
- property_instance = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
+ property_instance = Property(id=0, address="fake", postcode="fake")
property_instance.data = {
"number-open-fireplaces": 3
}
diff --git a/recommendations/tests/test_floor_recommendations.py b/recommendations/tests/test_floor_recommendations.py
index 43e98d60..555f9a27 100644
--- a/recommendations/tests/test_floor_recommendations.py
+++ b/recommendations/tests/test_floor_recommendations.py
@@ -21,16 +21,6 @@ class TestFloorRecommendations:
) as f:
return pickle.load(f)
- @pytest.fixture
- def mock_floor_rec_instance(self):
- # Creating a mock instance of WallRecommendations with the necessary attributes
- property_mock = Mock()
- property_mock.full_sap_epc = {"lodgement-date": "2000-01-01"}
- property_mock.data = {"county": "York"}
-
- mock_wall_rec_instance = FloorRecommendations(property_mock, materials)
- return mock_wall_rec_instance
-
def test_init(self, input_properties):
input_properties[0].insulation_floor_area = 50
input_properties[0].insulation_wall_area = 90
@@ -68,6 +58,7 @@ class TestFloorRecommendations:
input_properties[2].wall_type = "solid brick"
input_properties[2].floor_type = "suspended"
input_properties[2].number_of_floors = 1
+ input_properties[2].floor_level = 0
recommender = FloorRecommendations(property_instance=input_properties[2], materials=materials)
assert recommender.estimated_u_value is None
@@ -93,6 +84,8 @@ class TestFloorRecommendations:
input_properties[3].insulation_floor_area = 100
input_properties[3].insulation_wall_area = 100
input_properties[3].number_of_floors = 1
+ input_properties[3].floor_level = 0
+
recommender = FloorRecommendations(property_instance=input_properties[3], materials=materials)
assert recommender.estimated_u_value is None
recommender.recommend()
@@ -114,6 +107,7 @@ class TestFloorRecommendations:
input_properties[4].wall_type = "solid brick"
input_properties[4].floor_type = "solid"
input_properties[4].number_of_floors = 1
+ input_properties[4].floor_level = 0
# In this case, we have no county, so in this case, it should yse the local-authority-label if possible
input_properties[4].data["county"] = ""
diff --git a/recommendations/tests/test_lighting_recommendations.py b/recommendations/tests/test_lighting_recommendations.py
index 06d1163f..964f1da0 100644
--- a/recommendations/tests/test_lighting_recommendations.py
+++ b/recommendations/tests/test_lighting_recommendations.py
@@ -9,7 +9,7 @@ from recommendations.tests.test_data.materials import materials
class TestLightingRecommendations:
def test_init_invalid_materials(self):
- input_property0 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
+ input_property0 = Property(id=1, postcode="F4k3 6", address="623 fake street")
input_property0.lighting = {"low_energy_proportion": 0}
input_property0.data = {"county": "Greater London Authority"}
# Test for invalid materials
@@ -18,7 +18,7 @@ class TestLightingRecommendations:
def test_recommend_no_action_needed(self):
# Case where no recommendation is needed
- input_property1 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
+ input_property1 = Property(id=1, postcode="F4k3 6", address="623 fake street")
input_property1.lighting = {"low_energy_proportion": 100}
input_property1.data = {"county": "Greater London Authority"}
@@ -28,7 +28,7 @@ class TestLightingRecommendations:
def test_recommend_action_needed(self):
# Case where recommendation is needed
- input_property1 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
+ input_property1 = Property(id=1, postcode="F4k3 6", address="623 fake street")
input_property1.lighting = {"low_energy_proportion": 100}
input_property1.data = {"county": "Greater London Authority"}
input_property1.lighting = {"low_energy_proportion": 0.80}
@@ -40,8 +40,7 @@ class TestLightingRecommendations:
assert lr.recommendation == [
{'parts': [], 'type': 'low_energy_lighting', 'description': 'Install low energy lighting in 4 outlets',
- 'starting_u_value': None, 'new_u_value': None, 'sap_points': 0.4, 'total': 458.976, 'subtotal': 382.48,
- 'vat': 76.49600000000001, 'contingency': 27.320000000000007, 'preliminaries': 27.320000000000007,
- 'material': 80.0, 'profit': 54.640000000000015, 'labour_hours': 3.2, 'labour_days': 0.4,
- 'labour_cost': 193.20000000000002}
+ 'starting_u_value': None, 'new_u_value': None, 'sap_points': 0.4, 'total': 240.24,
+ 'subtotal': 200.20000000000002, 'vat': 40.040000000000006, 'contingency': 14.3, 'preliminaries': 14.3,
+ 'material': 80.0, 'profit': 28.6, 'labour_hours': 3.2, 'labour_days': 0.4, 'labour_cost': 63.0}
]
diff --git a/recommendations/tests/test_recommendation_utils.py b/recommendations/tests/test_recommendation_utils.py
index aefc70b0..559a51b2 100644
--- a/recommendations/tests/test_recommendation_utils.py
+++ b/recommendations/tests/test_recommendation_utils.py
@@ -427,3 +427,106 @@ def test_external_wall_area():
for num_floors, floor_height, perimeter, built_form, expected in test_cases:
result = recommendation_utils.estimate_external_wall_area(num_floors, floor_height, perimeter, built_form)
assert result == expected, f"Test failed for {built_form}: Expected {expected}, got {result}"
+
+
+def test_estimate_windows():
+ # Based on data from an EPR that has 4 windows
+ windows_case_1 = recommendation_utils.estimate_windows(
+ property_type="Flat",
+ built_form="Semi-Detached",
+ construction_age_band="England and Wales: 1976-1982",
+ floor_area=37,
+ number_habitable_rooms=2,
+ extension_count=0,
+ )
+
+ assert windows_case_1 == 4, f"Expected 4 windows, got {windows_case_1}"
+
+ # Based on data from an EPR that has 7 winows, however two of the windows were very small, having areas of
+ # 0.21m^2 and 0.3m^2 respectively. We see 6 as a reasonable estimate for the number of windows
+ windows_case_2 = recommendation_utils.estimate_windows(
+ property_type="House",
+ built_form="Mid-Terrace",
+ construction_age_band="England and Wales: 1950-1966",
+ floor_area=69,
+ number_habitable_rooms=4,
+ extension_count=0,
+ )
+
+ assert windows_case_2 == 6, f"Expected 6 windows, got {windows_case_2}"
+
+ # Based on data from an EPR on a bungalow, that has 6 windows. Two of the windows are small, both have a 0.4m^2 area
+ # and so 5 windows is an acceptable estimate
+ windows_case_3 = recommendation_utils.estimate_windows(
+ property_type="Bungalow",
+ built_form="Mid-Terrace",
+ construction_age_band="England and Wales: 1967-1975",
+ floor_area=56,
+ number_habitable_rooms=3,
+ extension_count=0,
+ )
+
+ assert windows_case_3 == 5, f"Expected 5 windows, got {windows_case_3}"
+
+ # Based on data from an EPR on a end terrace house that has 8 windows. One of the windows is very small, with an
+ # area of 0.25 m^2 and so 7 windows is an acceptable estimate
+ windows_case_4 = recommendation_utils.estimate_windows(
+ property_type="House",
+ built_form="End-Terrace",
+ construction_age_band="England and Wales: 1967-1975",
+ floor_area=77.28,
+ number_habitable_rooms=4,
+ extension_count=0,
+ )
+
+ assert windows_case_4 == 7, f"Expected 7 windows, got {windows_case_4}"
+
+ # Based on data from an EPR on a Semi-detatched house that has 11 windows based on the associated condition report
+ # Right now, we estimate 12 windows for this property
+ windows_case_5 = recommendation_utils.estimate_windows(
+ property_type="House",
+ built_form="Semi-Detached",
+ construction_age_band="England and Wales: 1950-1966",
+ floor_area=88.4,
+ number_habitable_rooms=5,
+ extension_count=0,
+ )
+
+ assert windows_case_5 == 12, f"Expected 12 windows, got {windows_case_5}"
+
+ # Based on Khalim's flat which has 3 windows. There is no construction age band on the EPC. The windows are large
+ # so an estimate of 5 windows is a reasonable estimate
+ windows_case_6 = recommendation_utils.estimate_windows(
+ property_type="Flat",
+ built_form="",
+ construction_age_band="",
+ floor_area=100,
+ number_habitable_rooms=3,
+ extension_count=0,
+ )
+
+ assert windows_case_6 == 5, f"Expected 5 windows, got {windows_case_6}"
+
+ # Based on an EPR semi detatched house though we don't have the exact number of windows. We estimate 10
+ windows_case_7 = recommendation_utils.estimate_windows(
+ property_type="House",
+ built_form="Semi-Detached",
+ construction_age_band="England and Wales: 1967-1975",
+ floor_area=85,
+ number_habitable_rooms=4,
+ extension_count=0,
+ )
+
+ assert windows_case_7 == 10, f"Expected 10 windows, got {windows_case_7}"
+
+ # Base on Khalim's parents flat
+ windows_case_8 = recommendation_utils.estimate_windows(
+ property_type="Flat",
+ built_form="End-Terrace",
+ construction_age_band="",
+ floor_area=50,
+ number_habitable_rooms=3,
+ extension_count=0,
+ )
+
+ assert windows_case_8 == 5, f"Expected 5 windows, got {windows_case_8}"
diff --git a/recommendations/tests/test_roof_recommendations.py b/recommendations/tests/test_roof_recommendations.py
index 903f598b..75b7ddb2 100644
--- a/recommendations/tests/test_roof_recommendations.py
+++ b/recommendations/tests/test_roof_recommendations.py
@@ -1,5 +1,4 @@
from backend.Property import Property
-from unittest.mock import Mock
from recommendations.RoofRecommendations import RoofRecommendations
from recommendations.tests.test_data.materials import materials
@@ -7,7 +6,7 @@ from recommendations.tests.test_data.materials import materials
class TestRoofRecommendations:
def test_loft_insulation_recommendation_no_insulation(self):
- property_instance = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
+ property_instance = Property(id=0, address="fake", postcode="fake")
property_instance.age_band = "F"
property_instance.insulation_floor_area = 100
property_instance.roof = {
@@ -32,7 +31,7 @@ class TestRoofRecommendations:
assert len(roof_recommender.recommendations)
def test_loft_insulation_recommendation_50mm_insulation(self):
- property_instance2 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
+ property_instance2 = Property(id=0, address="fake", postcode="fake")
property_instance2.age_band = "F"
property_instance2.insulation_floor_area = 100
property_instance2.roof = {
@@ -54,11 +53,11 @@ class TestRoofRecommendations:
assert len(roof_recommender2.recommendations) == 1
- assert roof_recommender2.recommendations[0]["total"] == 1310.56464
+ assert roof_recommender2.recommendations[0]["total"] == 1936.9206000000004
assert roof_recommender2.recommendations[0]["new_u_value"] == 0.14
assert roof_recommender2.recommendations[0]["starting_u_value"] == 0.68
- property_instance3 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
+ property_instance3 = Property(id=0, address="fake", postcode="fake")
property_instance3.age_band = "F"
property_instance3.insulation_floor_area = 100
property_instance3.roof = {
@@ -83,7 +82,7 @@ class TestRoofRecommendations:
assert roof_recommender3.recommendations[0]["parts"][0]["depth"] == 270
def test_loft_insulation_recommendation_150mm_insulation(self):
- property_instance4 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
+ property_instance4 = Property(id=0, address="fake", postcode="fake")
property_instance4.age_band = "F"
property_instance4.insulation_floor_area = 100
property_instance4.roof = {
@@ -105,12 +104,12 @@ class TestRoofRecommendations:
assert len(roof_recommender4.recommendations) == 4
- assert roof_recommender4.recommendations[0]["total"] == 788.0544
+ assert roof_recommender4.recommendations[0]["total"] == 1128.744
assert roof_recommender4.recommendations[0]["new_u_value"] == 0.15
assert roof_recommender4.recommendations[0]["starting_u_value"] == 0.3
assert roof_recommender4.recommendations[0]["parts"][0]["depth"] == 150
- property_instance5 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
+ property_instance5 = Property(id=0, address="fake", postcode="fake")
property_instance5.age_band = "F"
property_instance5.insulation_floor_area = 100
property_instance5.roof = {
@@ -137,7 +136,7 @@ class TestRoofRecommendations:
def test_loft_insulation_recommendation_270mm_insulation(self):
# We shouldn't recommend anything in this case
- property_instance6 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
+ property_instance6 = Property(id=0, address="fake", postcode="fake")
property_instance6.age_band = "F"
property_instance6.insulation_floor_area = 100
property_instance6.roof = {
@@ -278,7 +277,7 @@ class TestRoofRecommendations:
# "Insulate your room roof with 270mm of Example room roof insulation"
def test_flat_no_insulation(self):
- property_instance11 = Property(id=11, address1="fake", postcode="fake", epc_client=Mock())
+ property_instance11 = Property(id=11, address="fake", postcode="fake")
property_instance11.age_band = "D"
property_instance11.insulation_floor_area = 33.5
property_instance11.perimeter = 24
@@ -307,7 +306,7 @@ class TestRoofRecommendations:
"Insulate the home's flat roof with 150mm of Ecotherm Eco-Versal General Purpose Insulation Board"
def test_flat_insulated(self):
- property_instance12 = Property(id=12, address1="fake", postcode="fake", epc_client=Mock())
+ property_instance12 = Property(id=12, address="fake", postcode="fake")
property_instance12.age_band = "D"
property_instance12.insulation_floor_area = 40
property_instance12.perimeter = 30
@@ -331,7 +330,7 @@ class TestRoofRecommendations:
assert not roof_recommender12.recommendations
def test_flat_limited_insulation(self):
- property_instance13 = Property(id=12, address1="fake", postcode="fake", epc_client=Mock())
+ property_instance13 = Property(id=12, address="fake", postcode="fake")
property_instance13.age_band = "D"
property_instance13.insulation_floor_area = 40
property_instance13.perimeter = 40
@@ -363,7 +362,7 @@ class TestRoofRecommendations:
"Insulate the home's flat roof with 150mm of Ecotherm Eco-Versal General Purpose Insulation Board"
def test_property_above(self):
- property_instance14 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
+ property_instance14 = Property(id=0, address="fake", postcode="fake")
property_instance14.age_band = "F"
property_instance14.insulation_floor_area = 100
property_instance14.roof = {
diff --git a/recommendations/tests/test_solar_pv_recommendations.py b/recommendations/tests/test_solar_pv_recommendations.py
new file mode 100644
index 00000000..f2436cb1
--- /dev/null
+++ b/recommendations/tests/test_solar_pv_recommendations.py
@@ -0,0 +1,79 @@
+import pytest
+from recommendations.SolarPvRecommendations import SolarPvRecommendations
+from backend.Property import Property
+
+
+class TestSolarPvRecommendations:
+ @pytest.fixture
+ def property_instance_invalid_type(self):
+ # Setup the property_instance with an invalid property type
+ property_instance_invalid_type = Property(id=1, address="", postcode="")
+ property_instance_invalid_type.data = {
+ "property-type": "InvalidType", "county": "Broxbourne", "photo-supply": None
+ }
+ property_instance_invalid_type.roof = {"is_flat": False, "is_pitched": False, "is_roof_room": False}
+ return property_instance_invalid_type
+
+ @pytest.fixture
+ def property_instance_invalid_roof(self):
+ # Setup the property_instance with invalid roof type
+ property_instance_invalid_roof = Property(id=1, address="", postcode="")
+ property_instance_invalid_roof.data = {
+ "county": "Huntingdonshire", "property-type": "House", "photo-supply": None
+ }
+ property_instance_invalid_roof.roof = {"is_flat": False, "is_pitched": False, "is_roof_room": False}
+ return property_instance_invalid_roof
+
+ @pytest.fixture
+ def property_instance_has_solar_pv(self):
+ # Setup the property_instance without existing solar pv
+ property_instance_has_solar_pv = Property(id=1, address="", postcode="")
+ property_instance_has_solar_pv.data = {"photo-supply": "40", "county": "Huntingdonshire",
+ "property-type": "House"}
+ property_instance_has_solar_pv.roof = {"is_flat": True}
+ return property_instance_has_solar_pv
+
+ @pytest.fixture
+ def property_instance_valid_all(self):
+ # Setup a valid property_instance that passes all conditions
+ property_instance_valid_all = Property(id=1, address="", postcode="")
+ property_instance_valid_all.solar_pv_roof_area = 20
+ property_instance_valid_all.solar_pv_percentage = 40
+ property_instance_valid_all.data = {"property-type": "House", "photo-supply": None, "county": "Huntingdonshire"}
+ property_instance_valid_all.roof = {"is_flat": True}
+ return property_instance_valid_all
+
+ def test_invalid_property_type(self, property_instance_invalid_type):
+ solar_pv = SolarPvRecommendations(property_instance_invalid_type)
+ solar_pv.recommend()
+ assert not solar_pv.recommendation
+
+ def test_invalid_roof_type(self, property_instance_invalid_roof):
+ solar_pv = SolarPvRecommendations(property_instance_invalid_roof)
+ solar_pv.recommend()
+ assert not solar_pv.recommendation
+
+ def test_existing_solar_pv(self, property_instance_has_solar_pv):
+ solar_pv = SolarPvRecommendations(property_instance_has_solar_pv)
+ solar_pv.recommend()
+ assert not solar_pv.recommendation
+
+ def test_valid_all_conditions(self, property_instance_valid_all):
+ solar_pv = SolarPvRecommendations(property_instance_valid_all)
+ solar_pv.recommend()
+ assert solar_pv.recommendation == [
+ {
+ 'parts': [],
+ 'type': 'solar_pv',
+ 'description': 'Install a 4 kilowatt-peak (kWp) solar photovoltaic (PV) panel system on the roof',
+ 'starting_u_value': None,
+ 'new_u_value': None,
+ 'sap_points': None,
+ 'total': 8527.0752,
+ 'subtotal': 7105.896,
+ 'vat': 1421.1791999999996,
+ 'labour_hours': 72,
+ 'labour_days': 2,
+ 'photo_supply': 4000
+ }
+ ]
diff --git a/recommendations/tests/test_ventilation_recommendations.py b/recommendations/tests/test_ventilation_recommendations.py
index 893bb01a..3242b1d1 100644
--- a/recommendations/tests/test_ventilation_recommendations.py
+++ b/recommendations/tests/test_ventilation_recommendations.py
@@ -1,5 +1,4 @@
from backend.Property import Property
-from unittest.mock import Mock
from recommendations.VentilationRecommendations import VentilationRecommendations
from recommendations.tests.test_data.materials import materials
@@ -7,7 +6,7 @@ from recommendations.tests.test_data.materials import materials
class TestVentilationRecommendations:
def test_natural_ventilation(self):
- input_property1 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
+ input_property1 = Property(id=1, postcode="F4k3 6", address="623 fake street")
input_property1.data = {"mechanical-ventilation": "natural"}
recommender = VentilationRecommendations(
@@ -28,7 +27,7 @@ class TestVentilationRecommendations:
assert recommender.recommendation[0]["parts"][0]["quantity"] == 2
def test_missing_ventilation(self):
- input_property2 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
+ input_property2 = Property(id=1, postcode="F4k3 6", address="623 fake street")
input_property2.data = {"mechanical-ventilation": None}
recommender2 = VentilationRecommendations(
@@ -49,7 +48,7 @@ class TestVentilationRecommendations:
assert recommender2.recommendation[0]["parts"][0]["quantity"] == 2
def test_nodata_ventilation(self):
- input_property3 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
+ input_property3 = Property(id=1, postcode="F4k3 6", address="623 fake street")
input_property3.data = {"mechanical-ventilation": "NO DATA!!"}
recommender3 = VentilationRecommendations(
@@ -70,7 +69,7 @@ class TestVentilationRecommendations:
assert recommender3.recommendation[0]["parts"][0]["quantity"] == 2
def test_existing_ventilation_1(self):
- input_property4 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
+ input_property4 = Property(id=1, postcode="F4k3 6", address="623 fake street")
input_property4.data = {"mechanical-ventilation": 'mechanical, extract only'}
recommender4 = VentilationRecommendations(
@@ -86,7 +85,7 @@ class TestVentilationRecommendations:
assert recommender4.has_ventilaion
def test_existing_ventilation_2(self):
- input_property5 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
+ input_property5 = Property(id=1, postcode="F4k3 6", address="623 fake street")
input_property5.data = {"mechanical-ventilation": 'mechanical, supply and extract'}
recommender5 = VentilationRecommendations(
diff --git a/recommendations/tests/test_wall_recommendations.py b/recommendations/tests/test_wall_recommendations.py
index 0258e592..bfc681f5 100644
--- a/recommendations/tests/test_wall_recommendations.py
+++ b/recommendations/tests/test_wall_recommendations.py
@@ -231,7 +231,7 @@ class TestWallRecommendationsBase:
class TestCavityWallRecommensations:
def test_fill_empty_cavity(self):
- input_property = Property(id=1, postcode="F4k3", address1="123 fake street", epc_client=Mock())
+ input_property = Property(id=1, postcode="F4k3", address="123 fake street")
input_property.walls = {
'original_description': 'Cavity wall, as built, no insulation (assumed)',
'clean_description': 'Cavity wall, as built, no insulation',
@@ -265,7 +265,7 @@ class TestCavityWallRecommensations:
assert np.isclose(recommender.recommendations[1]["total"], 2004.6600000000003)
def test_fill_partial_filled_cavity(self):
- input_property = Property(id=1, postcode="F4k3", address1="123 fake street", epc_client=Mock())
+ input_property = Property(id=1, postcode="F4k3", address="123 fake street")
input_property.walls = {
'original_description': 'Cavity wall, as built, partial insulation (assumed)',
'clean_description': 'Cavity wall, as built, partial insulation',
@@ -299,7 +299,7 @@ class TestCavityWallRecommensations:
assert np.isclose(recommender.recommendations[1]["total"], 1999.9350000000002)
def test_system_built_wall(self):
- input_property2 = Property(id=1, postcode="F4k3 2", address1="223 fake street", epc_client=Mock())
+ input_property2 = Property(id=1, postcode="F4k3 2", address="223 fake street")
input_property2.walls = {
'original_description': 'System built, as built, no insulation (assumed)',
'clean_description': 'System built, as built, no insulation',
@@ -331,22 +331,22 @@ class TestCavityWallRecommensations:
assert len(recommender2.recommendations) == 9
assert recommender2.estimated_u_value == 1
assert np.isclose(recommender2.recommendations[0]["new_u_value"], 0.19)
- assert np.isclose(recommender2.recommendations[0]["total"], 15899.9616)
+ assert np.isclose(recommender2.recommendations[0]["total"], 16429.960320000002)
assert recommender2.recommendations[0]["parts"][0]["type"] == "external_wall_insulation"
assert recommender2.recommendations[0]["parts"][0]["depth"] == 100
assert np.isclose(recommender2.recommendations[8]["new_u_value"], 0.23)
- assert np.isclose(recommender2.recommendations[8]["total"], 10916.3424)
+ assert np.isclose(recommender2.recommendations[8]["total"], 11292.768)
assert recommender2.recommendations[8]["parts"][0]["type"] == "internal_wall_insulation"
assert recommender2.recommendations[8]["parts"][0]["depth"] == 72.5
assert np.isclose(recommender2.recommendations[6]["new_u_value"], 0.29)
- assert np.isclose(recommender2.recommendations[6]["total"], 10621.934399999998)
+ assert np.isclose(recommender2.recommendations[6]["total"], 10988.208)
assert recommender2.recommendations[6]["parts"][0]["type"] == "internal_wall_insulation"
assert recommender2.recommendations[6]["parts"][0]["depth"] == 52.5
def test_timber_frame_wall(self):
- input_property3 = Property(id=1, postcode="F4k3 2", address1="223 fake street", epc_client=Mock())
+ input_property3 = Property(id=1, postcode="F4k3 2", address="223 fake street")
input_property3.walls = {
'original_description': 'Timber frame, as built, no insulation (assumed)',
'clean_description': 'Timber frame, as built, no insulation',
@@ -378,17 +378,17 @@ class TestCavityWallRecommensations:
assert len(recommender3.recommendations) == 6
assert recommender3.estimated_u_value == 1.9
assert np.isclose(recommender3.recommendations[0]["new_u_value"], 0.2)
- assert np.isclose(recommender3.recommendations[0]["total"], 13117.46832)
+ assert np.isclose(recommender3.recommendations[0]["total"], 13554.717263999999)
assert recommender3.recommendations[0]["parts"][0]["type"] == "external_wall_insulation"
assert recommender3.recommendations[0]["parts"][0]["depth"] == 100.0
assert np.isclose(recommender3.recommendations[1]["new_u_value"], 0.23)
- assert np.isclose(recommender3.recommendations[1]["total"], 34070.50944)
+ assert np.isclose(recommender3.recommendations[1]["total"], 35206.19308800001)
assert recommender3.recommendations[1]["parts"][0]["type"] == "external_wall_insulation"
assert recommender3.recommendations[1]["parts"][0]["depth"] == 150.0
def test_granite_or_whinstone_wall(self):
- input_property4 = Property(id=1, postcode="F4k3 2", address1="223 fake street", epc_client=Mock())
+ input_property4 = Property(id=1, postcode="F4k3 2", address="223 fake street")
input_property4.walls = {
'original_description': 'Granite or whinstone, as built, no insulation (assumed)',
'clean_description': 'Granite or whinstone, as built, no insulation',
@@ -420,17 +420,17 @@ class TestCavityWallRecommensations:
assert len(recommender4.recommendations) == 6
assert recommender4.estimated_u_value == 2.3
assert np.isclose(recommender4.recommendations[0]["new_u_value"], 0.21)
- assert np.isclose(recommender4.recommendations[0]["total"], 28562.514352)
+ assert np.isclose(recommender4.recommendations[0]["total"], 29547.42864)
assert recommender4.recommendations[0]["parts"][0]["type"] == "external_wall_insulation"
assert recommender4.recommendations[0]["parts"][0]["depth"] == 100
assert np.isclose(recommender4.recommendations[1]["new_u_value"], 0.23)
- assert np.isclose(recommender4.recommendations[1]["total"], 74186.52678400002)
+ assert np.isclose(recommender4.recommendations[1]["total"], 76744.68288000001)
assert recommender4.recommendations[1]["parts"][0]["type"] == "external_wall_insulation"
assert recommender4.recommendations[1]["parts"][0]["depth"] == 150
def test_cob_wall(self):
- input_property5 = Property(id=1, postcode="F4k3 2", address1="223 fake street", epc_client=Mock())
+ input_property5 = Property(id=1, postcode="F4k3 2", address="223 fake street")
input_property5.walls = {
'original_description': 'Cob, as built',
'clean_description': 'Cob, as built',
@@ -462,17 +462,17 @@ class TestCavityWallRecommensations:
assert len(recommender5.recommendations) == 5
assert recommender5.estimated_u_value == 0.8
assert np.isclose(recommender5.recommendations[0]["new_u_value"], 0.29)
- assert np.isclose(recommender5.recommendations[0]["total"], 8665.040384000002)
+ assert np.isclose(recommender5.recommendations[0]["total"], 8963.834880000002)
assert recommender5.recommendations[0]["parts"][0]["type"] == "external_wall_insulation"
assert recommender5.recommendations[0]["parts"][0]["depth"] == 50
assert np.isclose(recommender5.recommendations[3]["new_u_value"], 0.26)
- assert np.isclose(recommender5.recommendations[3]["total"], 20078.742992)
+ assert np.isclose(recommender5.recommendations[3]["total"], 20771.11344)
assert recommender5.recommendations[3]["parts"][0]["type"] == "internal_wall_insulation"
assert recommender5.recommendations[3]["parts"][0]["depth"] == 100
def test_sandstone_or_limestone_wall(self):
- input_property6 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
+ input_property6 = Property(id=1, postcode="F4k3 6", address="623 fake street")
input_property6.walls = {
'original_description': 'Sandstone or limestone, as built, no insulation (assumed)',
'clean_description': 'Sandstone or limestone, as built, no insulation',
@@ -504,16 +504,16 @@ class TestCavityWallRecommensations:
assert len(recommender6.recommendations) == 9
assert recommender6.estimated_u_value == 1
assert np.isclose(recommender6.recommendations[0]["new_u_value"], 0.19)
- assert np.isclose(recommender6.recommendations[0]["total"], 44829.0584)
+ assert np.isclose(recommender6.recommendations[0]["total"], 46374.888000000006)
assert recommender6.recommendations[0]["parts"][0]["type"] == "external_wall_insulation"
assert recommender6.recommendations[0]["parts"][0]["depth"] == 100
assert np.isclose(recommender6.recommendations[2]["new_u_value"], 0.21)
- assert np.isclose(recommender6.recommendations[2]["total"], 116436.25280000002)
+ assert np.isclose(recommender6.recommendations[2]["total"], 120451.29600000002)
assert recommender6.recommendations[2]["parts"][0]["type"] == "external_wall_insulation"
assert recommender6.recommendations[2]["parts"][0]["depth"] == 150
assert np.isclose(recommender6.recommendations[4]["new_u_value"], 0.28)
- assert np.isclose(recommender6.recommendations[4]["total"], 91267.0136)
+ assert np.isclose(recommender6.recommendations[4]["total"], 94414.15199999999)
assert recommender6.recommendations[4]["parts"][0]["type"] == "internal_wall_insulation"
assert recommender6.recommendations[4]["parts"][0]["depth"] == 100
diff --git a/recommendations/tests/test_window_recommendations.py b/recommendations/tests/test_window_recommendations.py
new file mode 100644
index 00000000..664a1e39
--- /dev/null
+++ b/recommendations/tests/test_window_recommendations.py
@@ -0,0 +1,252 @@
+from recommendations.WindowsRecommendations import WindowsRecommendations
+from backend.Property import Property
+from recommendations.tests.test_data.materials import materials
+
+
+class TestWindowRecommendations:
+
+ def test_fully_single_glazed(self):
+ """
+ For this property, we expect all windows to be single glazed and should recommend full double glazing
+ :return:
+ """
+
+ property_1 = Property(
+ id=1,
+ postcode='1',
+ address='1',
+ data={
+ "county": "Wychavon",
+ "multi-glaze-proportion": 0,
+ "uprn": 0
+ }
+ )
+ property_1.windows = {
+ 'original_description': 'Single glazed', 'has_glazing': False, 'glazing_coverage': 'full',
+ 'glazing_type': 'single',
+ 'no_data': False
+ }
+ property_1.number_of_windows = 7
+
+ recommender = WindowsRecommendations(property_instance=property_1, materials=materials)
+
+ assert not recommender.recommendation
+
+ recommender.recommend()
+
+ assert recommender.recommendation == [
+ {'parts': [], 'type': 'windows_glazing', 'description': 'Install double glazing to all windows',
+ 'starting_u_value': None, 'new_u_value': None, 'sap_points': None, 'total': 5721.943248,
+ 'subtotal': 4768.28604, 'vat': 953.6572080000001, 'contingency': 340.59186, 'preliminaries': 340.59186,
+ 'material': 1275.75, 'profit': 681.18372, 'labour_hours': 45.5, 'labour_cost': 994.8624,
+ 'labour_days': 2.84375, 'is_secondary_glazing': False}]
+
+ def test_partial_double_glazed(self):
+ """
+ For this property, the double glazing is describes as partial, therefore we recommend completion of
+ double glazing
+ :return:
+ """
+
+ property_2 = Property(
+ id=1,
+ postcode='1',
+ address='1',
+ data={
+ "county": "Wychavon",
+ "multi-glaze-proportion": 33,
+ "uprn": 0
+ }
+ )
+ property_2.windows = {'original_description': 'Mostly double glazing', 'has_glazing': True,
+ 'glazing_coverage': 'most',
+ 'glazing_type': 'double', 'no_data': False}
+ property_2.number_of_windows = 7
+
+ recommender2 = WindowsRecommendations(property_instance=property_2, materials=materials)
+
+ assert not recommender2.recommendation
+
+ recommender2.recommend()
+
+ assert recommender2.recommendation == [
+ {'parts': [], 'type': 'windows_glazing', 'description': 'Install double glazing to the remaining windows',
+ 'starting_u_value': None, 'new_u_value': None, 'sap_points': None, 'total': 4087.10232,
+ 'subtotal': 3405.9186, 'vat': 681.18372, 'contingency': 243.2799, 'preliminaries': 243.2799,
+ 'material': 911.25, 'profit': 486.5598, 'labour_hours': 32.5, 'labour_cost': 710.6160000000001,
+ 'labour_days': 2.03125, 'is_secondary_glazing': False}]
+
+ def test_fully_double_glazed(self):
+ """
+ This property has full double glazing so we shouldn't recommend anything
+ :return:
+ """
+
+ property_3 = Property(
+ id=1,
+ postcode='1',
+ address='1',
+ data={
+ "county": "Wychavon",
+ "multi-glaze-proportion": 80,
+ "uprn": 0
+ }
+ )
+ property_3.windows = {'original_description': 'Fully double glazed', 'has_glazing': True,
+ 'glazing_coverage': 'full',
+ 'glazing_type': 'double', 'no_data': False}
+ property_3.number_of_windows = 7
+
+ recommender3 = WindowsRecommendations(property_instance=property_3, materials=materials)
+
+ assert not recommender3.recommendation
+
+ recommender3.recommend()
+
+ assert not recommender3.recommendation
+
+ def test_fully_secondary_glazed(self):
+ property_4 = Property(
+ id=1,
+ postcode='1',
+ address='1',
+ data={
+ "county": "Wychavon",
+ "multi-glaze-proportion": 100,
+ "uprn": 0
+ }
+ )
+ property_4.windows = {'original_description': 'Full secondary glazing', 'has_glazing': True,
+ 'glazing_coverage': 'full',
+ 'glazing_type': 'secondary', 'no_data': False}
+ property_4.number_of_windows = 7
+
+ recommender4 = WindowsRecommendations(property_instance=property_4, materials=materials)
+
+ assert not recommender4.recommendation
+
+ recommender4.recommend()
+
+ assert not recommender4.recommendation
+
+ def test_partial_secondary_glazing(self):
+ property_5 = Property(
+ id=1,
+ postcode='1',
+ address='1',
+ data={
+ "county": "Wychavon",
+ "multi-glaze-proportion": 50,
+ "uprn": 0
+ }
+ )
+ property_5.windows = {'original_description': 'Partial secondary glazing', 'has_glazing': True,
+ 'glazing_coverage': 'partial',
+ 'glazing_type': 'secondary', 'no_data': False}
+ property_5.number_of_windows = 7
+
+ recommender5 = WindowsRecommendations(property_instance=property_5, materials=materials)
+
+ assert not recommender5.recommendation
+
+ recommender5.recommend()
+
+ assert recommender5.recommendation == [
+ {'parts': [], 'type': 'windows_glazing',
+ 'description': 'Install secondary glazing to the remaining windows',
+ 'starting_u_value': None, 'new_u_value': None, 'sap_points': None, 'total': 1089.893952,
+ 'subtotal': 908.24496, 'vat': 181.64899200000002, 'contingency': 64.87464, 'preliminaries': 64.87464,
+ 'material': 729.0, 'profit': 129.74928, 'labour_hours': 13.0, 'labour_cost': 568.4928,
+ 'labour_days': 0.8125, 'is_secondary_glazing': True}]
+
+ def test_single_glazed_restricted_measures(self):
+ property_6 = Property(
+ id=1,
+ postcode='1',
+ address='1',
+ data={
+ "county": "Wychavon",
+ "multi-glaze-proportion": 0,
+ "uprn": 0
+ }
+ )
+ property_6.windows = {'original_description': 'Single glazed', 'has_glazing': False, 'glazing_coverage': None,
+ 'glazing_type': 'single',
+ 'no_data': False}
+ property_6.number_of_windows = 7
+ property_6.restricted_measures = True
+ property_6.is_heritage = True
+
+ recommender6 = WindowsRecommendations(property_instance=property_6, materials=materials)
+
+ assert not recommender6.recommendation
+
+ recommender6.recommend()
+
+ assert recommender6.recommendation == [
+ {'parts': [], 'type': 'windows_glazing',
+ 'description': 'Install secondary glazing to all windows. Secondary '
+ 'glazing recommended due to herigate building status',
+ 'starting_u_value': None, 'new_u_value': None, 'sap_points': None,
+ 'total': 1907.314416, 'subtotal': 1589.42868, 'vat': 317.885736,
+ 'contingency': 113.53062, 'preliminaries': 113.53062,
+ 'material': 1275.75, 'profit': 227.06124, 'labour_hours': 22.75,
+ 'labour_cost': 994.8624, 'labour_days': 1.421875, 'is_secondary_glazing': True}
+ ]
+
+ def test_full_triple_glazed(self):
+ property_7 = Property(
+ id=1,
+ postcode='1',
+ address='1',
+ data={
+ "county": "Wychavon",
+ "multi-glaze-proportion": 100,
+ "uprn": 0
+ }
+ )
+ property_7.windows = {'original_description': 'Fully triple glazed', 'has_glazing': True,
+ 'glazing_coverage': 'full',
+ 'glazing_type': 'triple', 'no_data': False}
+ property_7.number_of_windows = 7
+
+ recommender7 = WindowsRecommendations(property_instance=property_7, materials=materials)
+
+ assert not recommender7.recommendation
+
+ recommender7.recommend()
+
+ assert not recommender7.recommendation
+
+ def test_partial_triple_glazed(self):
+ """
+ We should just recommend double glazing to the remaining windows, since it's a cheaper option
+ """
+
+ property_8 = Property(
+ id=1,
+ postcode='1',
+ address='1',
+ data={
+ "county": "Wychavon",
+ "multi-glaze-proportion": 80,
+ "uprn": 1
+ }
+ )
+ property_8.windows = {'original_description': 'Mostly triple glazing', 'has_glazing': True,
+ 'glazing_coverage': 'most',
+ 'glazing_type': 'triple', 'no_data': False}
+ property_8.number_of_windows = 7
+
+ recommender8 = WindowsRecommendations(property_instance=property_8, materials=materials)
+
+ assert not recommender8.recommendation
+
+ recommender8.recommend()
+
+ assert recommender8.recommendation == [
+ {'parts': [], 'type': 'windows_glazing', 'description': 'Install double glazing to the remaining windows',
+ 'starting_u_value': None, 'new_u_value': None, 'sap_points': None, 'total': 1634.840928,
+ 'subtotal': 1362.36744, 'vat': 272.47348800000003, 'contingency': 97.31196, 'preliminaries': 97.31196,
+ 'material': 364.5, 'profit': 194.62392, 'labour_hours': 13.0, 'labour_cost': 284.2464,
+ 'labour_days': 0.8125, 'is_secondary_glazing': False}]