mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
fixed merge conflict with refactor
This commit is contained in:
commit
90ba851cc0
57 changed files with 5589 additions and 419 deletions
5
.gitignore
vendored
5
.gitignore
vendored
|
|
@ -265,4 +265,7 @@ model_data/simulation_system/predictions/
|
|||
.idea/misc.iml
|
||||
|
||||
adhoc
|
||||
adhoc/*
|
||||
adhoc/*
|
||||
|
||||
etl-router-venv/
|
||||
refactor_datasets/
|
||||
7
backend/DbClient.py
Normal file
7
backend/DbClient.py
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
class DbClient:
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
This class handles interaction with the database
|
||||
"""
|
||||
pass
|
||||
105
backend/OrdnanceSurvey.py
Normal file
105
backend/OrdnanceSurvey.py
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
from functools import lru_cache
|
||||
import urllib.parse
|
||||
import requests
|
||||
from utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
class OrdnanceSuveyClient:
|
||||
|
||||
def __init__(self, address, postcode, api_key):
|
||||
"""
|
||||
This class is tasked with interaction with the ordnance survey API.
|
||||
:param address: The address for the property to search for
|
||||
:param postcode: The postcode for the property to search for
|
||||
"""
|
||||
|
||||
self.address = address
|
||||
self.postcode = postcode
|
||||
self.full_address = ", ".join([self.address, self.postcode])
|
||||
self.api_key = api_key
|
||||
|
||||
self.results = None
|
||||
|
||||
self.most_relevant_result = None
|
||||
self.property_type = None
|
||||
self.built_form = None
|
||||
# This will be postcode and address, as returned by the ordnance survey
|
||||
self.address_os = None
|
||||
self.postcode_os = None
|
||||
|
||||
def set_places_address(self):
|
||||
"""
|
||||
Given a response from the places api, this function will set the address and postcode of the property
|
||||
"""
|
||||
|
||||
if self.most_relevant_result is None:
|
||||
raise ValueError("No results found - run get_places_api first")
|
||||
|
||||
self.address_os = self.most_relevant_result["ADDRESS"]
|
||||
self.postcode_os = self.most_relevant_result["POSTCODE"]
|
||||
# We strip out the postcode from the address as this is already stored separately
|
||||
self.address_os = self.address_os.replace(self.postcode_os, "").strip()
|
||||
# Remove trailing comma
|
||||
self.address_os = self.address_os.rstrip(",").strip()
|
||||
# Convert to title case
|
||||
self.address_os = self.address_os.title()
|
||||
# Make sure postcode is upper case
|
||||
self.postcode_os = self.postcode_os.upper()
|
||||
|
||||
@lru_cache(maxsize=128)
|
||||
def get_places_api(self):
|
||||
"""
|
||||
This method is tasked with getting the places api from the Ordnance Survey.
|
||||
"""
|
||||
|
||||
if not self.api_key:
|
||||
raise ValueError("Ordnance Survey API key not specified")
|
||||
|
||||
encoded_address_query = urllib.parse.quote(self.full_address)
|
||||
url = (f"https://api.os.uk/search/places/v1/find?query={encoded_address_query}&key="
|
||||
f"{self.api_key}")
|
||||
response = requests.get(url)
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
results = data['results']
|
||||
self.results = results
|
||||
|
||||
# Extract some details about the best match
|
||||
self.most_relevant_result = self.results[0]["DPA"]
|
||||
|
||||
self.parse_classification_code(self.most_relevant_result["CLASSIFICATION_CODE"])
|
||||
self.set_places_address()
|
||||
|
||||
else:
|
||||
logger.info("Could not find any results for the provided address and postcode")
|
||||
|
||||
return {"status": response.status_code}
|
||||
|
||||
def parse_classification_code(self, classification_code: str):
|
||||
"""
|
||||
This function will convert the classification code, returned by the OS places api, to a property type that is
|
||||
compatible with the EPC database.
|
||||
|
||||
The various classifications cane be found here:
|
||||
https://osdatahub.os.uk/docs/places/technicalSpecification
|
||||
|
||||
Under LPI Output, CLASSIFICATION_CODE is described, and a link is provided to the full table of classifications
|
||||
For these purposes, we do not need the full classification as this includes non-residential properties. We only
|
||||
parse the ones of interest to us
|
||||
:return:
|
||||
"""
|
||||
|
||||
value_map = {
|
||||
# In the OS api, "RD" is a "Dwelling" however this is not valid property type in the EPC database
|
||||
'RD': {},
|
||||
'RD02': {'property_type': 'House', 'built_form': 'Detatched'},
|
||||
'RD03': {'property_type': 'House', 'built_form': 'Semi-Detatched'},
|
||||
'RD04': {'property_type': 'House', 'built_form': 'Mid-Terrace'},
|
||||
'RD06': {'property_type': 'Flat'},
|
||||
}
|
||||
|
||||
mapped = value_map.get(classification_code, {})
|
||||
self.property_type = mapped.get("property_type", "")
|
||||
self.built_form = mapped.get("built_form", "")
|
||||
|
|
@ -9,18 +9,17 @@ from etl.epc.DataProcessor import EPCDataProcessor
|
|||
from etl.epc.Dataset import TrainingDataset
|
||||
from etl.epc.settings import LATEST_FIELD, MANDATORY_FIXED_FEATURES, POTENTIAL_COLUMNS, EFFICIENCY_FEATURES, BUILT_FORM_REMAP
|
||||
from etl.epc_clean.epc_attributes.all_cleaners import all_cleaner_map
|
||||
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
|
||||
from utils.logger import setup_logger
|
||||
from utils.s3 import read_dataframe_from_s3_parquet
|
||||
from epc_api.client import EpcClient
|
||||
from BaseUtility import Definitions
|
||||
from recommendations.rdsap_tables import england_wales_age_band_lookup, FLOOR_LEVEL_MAP
|
||||
from recommendations.recommendation_utils import (
|
||||
estimate_perimeter, get_wall_type, estimate_external_wall_area, esimtate_pitched_roof_area
|
||||
estimate_perimeter, get_wall_type, estimate_external_wall_area, esimtate_pitched_roof_area, estimate_windows
|
||||
)
|
||||
|
||||
|
||||
ENVIRONMENT = os.environ.get('ENVIRONMENT', 'dev')
|
||||
EPC_AUTH_TOKEN = os.environ.get('EPC_AUTH_TOKEN')
|
||||
DATA_BUCKET = os.environ.get('DATA_BUCKET', 'retrofit-data-dev' if ENVIRONMENT == 'dev' else None)
|
||||
|
||||
logger = setup_logger()
|
||||
|
|
@ -51,13 +50,14 @@ class Property(Definitions):
|
|||
|
||||
spatial = None
|
||||
|
||||
def __init__(self, id, postcode, address1, epc_record, data=None):
|
||||
def __init__(self, id, postcode, address, epc_record, data=None):
|
||||
|
||||
self.epc_record = epc_record
|
||||
|
||||
self.id = id
|
||||
|
||||
self.address = address
|
||||
self.postcode = postcode
|
||||
self.address1 = address1
|
||||
self.data = {k.replace("_", "-"): v for k,v in epc_record.get("prepared_epc").items()}
|
||||
self.old_data = epc_record.get("old_data")
|
||||
self.property_dimensions = None
|
||||
|
|
@ -112,6 +112,9 @@ class Property(Definitions):
|
|||
self.insulation_floor_area = None
|
||||
self.number_lighting_outlets = epc_record.prepared_epc.get("fixed_lighting_outlets_count")
|
||||
self.floor_level = None
|
||||
self.number_of_windows = None
|
||||
self.solar_pv_roof_area = None
|
||||
self.solar_pv_percentage = None
|
||||
|
||||
self.current_adjusted_energy = None
|
||||
self.expected_adjusted_energy = None
|
||||
|
|
@ -177,81 +180,51 @@ class Property(Definitions):
|
|||
recommendation_record["walls_insulation_thickness_ending"] = "above average"
|
||||
recommendation_record["walls_energy_eff_ending"] = "Good"
|
||||
else:
|
||||
if recommendation_record["walls_thermal_transmittance_ending"] is None:
|
||||
raise ValueError("We should not have a None value for the u value")
|
||||
wind_turbine_count = int(wind_turbine_count)
|
||||
|
||||
if recommendation_record["walls_insulation_thickness_ending"] is None:
|
||||
recommendation_record["walls_insulation_thickness_ending"] = "none"
|
||||
self.wind_turbine = {
|
||||
"wind_turbine": wind_turbine_count,
|
||||
}
|
||||
|
||||
# Update description to indicate it's insulate
|
||||
if recommendation["type"] in ["solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation"]:
|
||||
if len(recommendation["parts"]) > 1:
|
||||
raise NotImplementedError("Have more than 1 floor insulation part - handle this case")
|
||||
def set_count_variables(self):
|
||||
|
||||
recommendation_record["floor_thermal_transmittance_ending"] = recommendation["new_u_value"]
|
||||
# We don't really see above average for this in the training data
|
||||
recommendation_record["floor_insulation_thickness_ending"] = "average"
|
||||
recommendation_record["floor_energy_eff_ending"] = "Good"
|
||||
else:
|
||||
if recommendation_record["floor_thermal_transmittance_ending"] is None:
|
||||
raise ValueError("We should not have a None value for the u value")
|
||||
"""
|
||||
For EPC fields that are just counts, we'll set them here
|
||||
These are fields that are integers but may contain additional values such as "" so we can't do a direct
|
||||
conversion straight to an integer
|
||||
:return:
|
||||
"""
|
||||
|
||||
if recommendation_record["floor_insulation_thickness_ending"] is None:
|
||||
recommendation_record["floor_insulation_thickness_ending"] = "none"
|
||||
fields = {
|
||||
"number_of_open_fireplaces": "number-open-fireplaces",
|
||||
"number_of_extensions": "extension-count",
|
||||
"number_of_storeys": "flat-storey-count",
|
||||
"number_of_rooms": "number-habitable-rooms",
|
||||
}
|
||||
|
||||
if recommendation["type"] in ["loft_insulation", "room_roof_insulation", "flat_roof_insulation"]:
|
||||
recommendation_record["roof_thermal_transmittance_ending"] = recommendation["new_u_value"]
|
||||
null_attributes = ["number_of_storeys", "number_of_rooms"]
|
||||
|
||||
parts = recommendation["parts"]
|
||||
if len(parts) != 1:
|
||||
raise ValueError("More than one part for roof insulation - investiage me")
|
||||
for attribute, epc_field in fields.items():
|
||||
value = self.data["extension-count"]
|
||||
if value == "" or value in self.DATA_ANOMALY_MATCHES:
|
||||
if attribute in null_attributes:
|
||||
value = None
|
||||
else:
|
||||
value = 0
|
||||
else:
|
||||
value = int(value)
|
||||
|
||||
# This is based on the values we have in the training data
|
||||
valid_numeric_values = [
|
||||
12, 25, 50, 75, 100, 150, 200, 250, 270, 300, 350, 400
|
||||
]
|
||||
setattr(self, attribute, value)
|
||||
|
||||
proposed_depth = int(parts[0]["depth"])
|
||||
if proposed_depth not in valid_numeric_values:
|
||||
# Take the nearest value for scoring
|
||||
proposed_depth = min(valid_numeric_values, key=lambda x: abs(x - proposed_depth))
|
||||
|
||||
recommendation_record["roof_insulation_thickness_ending"] = str(proposed_depth)
|
||||
recommendation_record["roof_energy_eff_ending"] = "Very Good"
|
||||
else:
|
||||
# Fill missing roof u-values - this fill is not based on recommended upgrades
|
||||
if recommendation_record["roof_thermal_transmittance_ending"] is None:
|
||||
raise ValueError("We should not have a None value for the u value")
|
||||
|
||||
if recommendation_record["roof_insulation_thickness_ending"] is None:
|
||||
recommendation_record["roof_insulation_thickness_ending"] = "none"
|
||||
|
||||
if recommendation["type"] == "mechanical_ventilation":
|
||||
recommendation_record["mechanical_ventilation_ending"] = 'mechanical, extract only'
|
||||
|
||||
if recommendation["type"] == "sealing_open_fireplace":
|
||||
recommendation_record["number_open_fireplaces_ending"] = 0
|
||||
|
||||
if recommendation["type"] == "low_energy_lighting":
|
||||
recommendation_record["low_energy_lighting_ending"] = 100
|
||||
recommendation_record["lighting_energy_eff_starting"] = "Very Good"
|
||||
|
||||
if recommendation["type"] not in [
|
||||
"mechanical_ventilation", "sealing_open_fireplace", "low_energy_lighting",
|
||||
"internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
|
||||
"loft_insulation", "room_roof_insulation", "flat_roof_insulation",
|
||||
"solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation"
|
||||
]:
|
||||
raise NotImplementedError("Implement me")
|
||||
|
||||
return recommendation_record
|
||||
|
||||
|
||||
def get_components(self, cleaned):
|
||||
def get_components(self, cleaned, photo_supply_lookup, floor_area_decile_thresholds):
|
||||
"""
|
||||
Given the cleaning that has been performed, we'll use this to identify the property
|
||||
components, from roof to walls to windows, heating and hot water
|
||||
:param cleaned: This is the dictionary of components found in cleaner.cleaned
|
||||
:param photo_supply_lookup: This is the lookup table for the photo supply, used to estimate the percentage
|
||||
of the roof that is suitable for solar panels
|
||||
:param floor_area_decile_thresholds: This is the decile thresholds for the floor area, used in estimating the
|
||||
solar pv roof area
|
||||
:return:
|
||||
"""
|
||||
|
||||
|
|
@ -301,6 +274,10 @@ class Property(Definitions):
|
|||
self.set_wall_type()
|
||||
self.set_floor_type()
|
||||
self.set_floor_level()
|
||||
self.set_windows_count()
|
||||
self.set_solar_panel_area(
|
||||
photo_supply_lookup=photo_supply_lookup, floor_area_decile_thresholds=floor_area_decile_thresholds
|
||||
)
|
||||
|
||||
def set_spatial(self, spatial: pd.DataFrame):
|
||||
"""
|
||||
|
|
@ -368,7 +345,7 @@ class Property(Definitions):
|
|||
"""
|
||||
Utility function for usage in the lambda, for preparing the _rating fields
|
||||
"""
|
||||
return rating_lookup[field].value if field not in cls.DATA_ANOMALY_MATCHES else None
|
||||
return rating_lookup[field].value if (field not in cls.DATA_ANOMALY_MATCHES) and (field is not None) else None
|
||||
|
||||
def get_property_details_epc(self, portfolio_id: int, rating_lookup):
|
||||
|
||||
|
|
@ -409,6 +386,7 @@ class Property(Definitions):
|
|||
"primary_energy_consumption": self.energy["primary_energy_consumption"],
|
||||
"co2_emissions": self.energy["co2_emissions"],
|
||||
"adjusted_energy_consumption": self.current_adjusted_energy,
|
||||
"estimated": self.data.get("estimated", False)
|
||||
}
|
||||
|
||||
return property_details_epc
|
||||
|
|
@ -664,7 +642,7 @@ class Property(Definitions):
|
|||
:return:
|
||||
"""
|
||||
|
||||
if self.data["fixed-lighting-outlets-count"] == "":
|
||||
if self.data["fixed-lighting-outlets-count"] in [None, ""]:
|
||||
|
||||
# We check old EPCs and the full SAP EPC
|
||||
|
||||
|
|
@ -693,3 +671,52 @@ class Property(Definitions):
|
|||
"""
|
||||
self.current_adjusted_energy = current_adjusted_energy
|
||||
self.expected_adjusted_energy = expected_adjusted_energy
|
||||
|
||||
def set_windows_count(self):
|
||||
"""
|
||||
Using the estimate_windows function, this method will set the number of windows in the property
|
||||
:return:
|
||||
"""
|
||||
|
||||
self.number_of_windows = estimate_windows(
|
||||
property_type=self.data["property-type"],
|
||||
built_form=self.data["built-form"],
|
||||
construction_age_band=self.construction_age_band,
|
||||
floor_area=self.floor_area,
|
||||
number_habitable_rooms=self.number_of_rooms,
|
||||
extension_count=float(self.data["extension-count"]),
|
||||
)
|
||||
|
||||
def set_solar_panel_area(self, photo_supply_lookup, floor_area_decile_thresholds):
|
||||
"""
|
||||
Sets the approximate area of the solar panels
|
||||
:return:
|
||||
"""
|
||||
|
||||
if (self.insulation_floor_area is None) and (self.pitched_roof_area is None):
|
||||
raise ValueError(
|
||||
"Need to set insulation floor area and pitched roof area before setting solar pv roof area"
|
||||
)
|
||||
|
||||
photo_supply_matched = SolarPhotoSupply.filter_photo_supply_lookup(
|
||||
photo_supply_lookup=photo_supply_lookup,
|
||||
floor_area_decile_thresholds=floor_area_decile_thresholds,
|
||||
tenure=self.data["tenure"],
|
||||
built_form=self.data["built-form"],
|
||||
property_type=self.data["property-type"],
|
||||
construction_age_band=self.construction_age_band,
|
||||
is_flat=self.roof["is_flat"],
|
||||
is_pitched=self.roof["is_pitched"],
|
||||
is_roof_room=self.roof["is_roof_room"],
|
||||
floor_area=self.floor_area
|
||||
)
|
||||
|
||||
percentage_of_roof = photo_supply_matched["photo_supply_median"].mean()
|
||||
percentage_of_roof = percentage_of_roof / 100
|
||||
|
||||
self.solar_pv_roof_area = (
|
||||
self.insulation_floor_area * percentage_of_roof if self.roof["is_flat"] else
|
||||
self.pitched_roof_area * percentage_of_roof
|
||||
)
|
||||
|
||||
self.solar_pv_percentage = percentage_of_roof
|
||||
|
|
|
|||
|
|
@ -1,12 +1,114 @@
|
|||
import os
|
||||
import time
|
||||
import re
|
||||
|
||||
import usaddress
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from epc_api.client import EpcClient
|
||||
from backend.OrdnanceSurvey import OrdnanceSuveyClient
|
||||
from BaseUtility import Definitions
|
||||
from utils.logger import setup_logger
|
||||
from typing import List
|
||||
from fuzzywuzzy import process
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
vartypes = {
|
||||
'low-energy-fixed-light-count': "Int64",
|
||||
# 'address': 'str',
|
||||
# 'uprn-source': 'str',
|
||||
'floor-height': 'float',
|
||||
'heating-cost-potential': 'float',
|
||||
'unheated-corridor-length': 'float',
|
||||
'hot-water-cost-potential': 'float',
|
||||
'construction-age-band': 'str',
|
||||
'potential-energy-rating': 'str',
|
||||
'mainheat-energy-eff': 'str',
|
||||
'windows-env-eff': 'str',
|
||||
'lighting-energy-eff': 'str',
|
||||
'environment-impact-potential': "Int64",
|
||||
'glazed-type': 'str',
|
||||
'heating-cost-current': 'float',
|
||||
'address3': 'str',
|
||||
'mainheatcont-description': 'str',
|
||||
'sheating-energy-eff': 'str',
|
||||
'property-type': 'str',
|
||||
'local-authority-label': 'str',
|
||||
'fixed-lighting-outlets-count': "Int64",
|
||||
'energy-tariff': 'str',
|
||||
'mechanical-ventilation': 'str',
|
||||
'hot-water-cost-current': 'str',
|
||||
'county': 'str',
|
||||
'postcode': 'str',
|
||||
'solar-water-heating-flag': 'str',
|
||||
'constituency': 'str',
|
||||
'co2-emissions-potential': 'float',
|
||||
'number-heated-rooms': 'float',
|
||||
'floor-description': 'str',
|
||||
'energy-consumption-potential': 'float',
|
||||
'local-authority': 'str',
|
||||
'built-form': 'str',
|
||||
'number-open-fireplaces': "Int64",
|
||||
'windows-description': 'str',
|
||||
'glazed-area': 'str',
|
||||
# 'inspection-date': str,
|
||||
'mains-gas-flag': 'str',
|
||||
'co2-emiss-curr-per-floor-area': 'float',
|
||||
'address1': 'str',
|
||||
'heat-loss-corridor': 'str',
|
||||
'flat-storey-count': "Int64",
|
||||
'constituency-label': 'str',
|
||||
'roof-energy-eff': 'str',
|
||||
'total-floor-area': 'float',
|
||||
'building-reference-number': 'str',
|
||||
'environment-impact-current': 'float',
|
||||
'co2-emissions-current': 'float',
|
||||
'roof-description': 'str',
|
||||
'floor-energy-eff': 'str',
|
||||
'number-habitable-rooms': 'float',
|
||||
'address2': 'str',
|
||||
'hot-water-env-eff': 'str',
|
||||
'posttown': 'str',
|
||||
'mainheatc-energy-eff': 'str',
|
||||
'main-fuel': 'str',
|
||||
'lighting-env-eff': 'str',
|
||||
'windows-energy-eff': 'str',
|
||||
'floor-env-eff': 'str',
|
||||
'sheating-env-eff': 'str',
|
||||
'lighting-description': 'str',
|
||||
'roof-env-eff': 'str',
|
||||
'walls-energy-eff': 'str',
|
||||
'photo-supply': 'float',
|
||||
'lighting-cost-potential': 'float',
|
||||
'mainheat-env-eff': 'str',
|
||||
'multi-glaze-proportion': 'float',
|
||||
'main-heating-controls': 'str',
|
||||
# 'lodgement-datetime',
|
||||
'flat-top-storey': 'str',
|
||||
'current-energy-rating': 'str',
|
||||
'secondheat-description': 'str',
|
||||
'walls-env-eff': 'str',
|
||||
'transaction-type': 'str',
|
||||
# 'uprn': "Int64",
|
||||
'current-energy-efficiency': 'float',
|
||||
'energy-consumption-current': 'float',
|
||||
'mainheat-description': 'str',
|
||||
'lighting-cost-current': 'float',
|
||||
# 'lodgement-date',
|
||||
'extension-count': "Int64",
|
||||
'mainheatc-env-eff': 'str',
|
||||
'lmk-key': 'str',
|
||||
'wind-turbine-count': "Int64",
|
||||
'tenure': 'str',
|
||||
'floor-level': 'str',
|
||||
'potential-energy-efficiency': "Int64",
|
||||
'hot-water-energy-eff': 'str',
|
||||
'low-energy-lighting': 'float',
|
||||
'walls-description': 'str',
|
||||
'hotwater-description': 'str'
|
||||
}
|
||||
|
||||
|
||||
class SearchEpc:
|
||||
"""
|
||||
|
|
@ -38,53 +140,127 @@ class SearchEpc:
|
|||
self,
|
||||
address1: str,
|
||||
postcode: str,
|
||||
address2: str = None,
|
||||
address3: str = None,
|
||||
address4: str = None,
|
||||
max_retries: int = None
|
||||
auth_token: str,
|
||||
os_api_key: str,
|
||||
full_address: str | None = None,
|
||||
max_retries: int = None,
|
||||
uprn: [int, None] = None,
|
||||
size=None,
|
||||
property_type=None,
|
||||
):
|
||||
"""
|
||||
Address lines 1 and postcode are mandatory fields. The other address lines are optional
|
||||
but can be used to find the epc for the home, if address1 and postcode are insufficient
|
||||
:param address1: string, propery's address line 1
|
||||
:param postcode: string, propery's postcode
|
||||
:param address2: string, optional, propery's address line 2
|
||||
:param address3: string, optional, propery's address line 3
|
||||
:param address4: string, optional, propery's address line 4
|
||||
:param full_address: string, optional parameter, the full address of the property
|
||||
:param max_retries: int, optional, number of retries to make when searching the api
|
||||
:param uprn: int, optional, the uprn of the property
|
||||
:param size: int, optional, the number of results to return. If not provided, defaults to 25 which is the api's
|
||||
default
|
||||
:param property_type: str, optional, the property type of the property, if known before hand
|
||||
"""
|
||||
|
||||
self.address1 = address1
|
||||
self.postcode = postcode
|
||||
self.address2 = address2
|
||||
self.address3 = address3
|
||||
self.address4 = address4
|
||||
self.full_address = full_address
|
||||
self.uprn = uprn
|
||||
self.house_number = self.get_house_number(self.address1)
|
||||
self.numeric_house_number = self.extract_numeric_housenumber_part(self.house_number)
|
||||
|
||||
self.max_retries = max_retries if max_retries is not None else self.MAX_RETRIES
|
||||
|
||||
self.client = EpcClient(auth_token=os.getenv("EPC_AUTH_TOKEN"))
|
||||
self.client = EpcClient(auth_token=auth_token)
|
||||
self.ordnance_survey_client = OrdnanceSuveyClient(
|
||||
address=self.address1, postcode=self.postcode, api_key=os_api_key
|
||||
)
|
||||
|
||||
self.data = None
|
||||
self.newest_epc = None
|
||||
self.older_epcs = None
|
||||
self.full_sap_epc = None
|
||||
|
||||
def search(self):
|
||||
# These are the address and postcode values, which we store in the database
|
||||
self.address_clean = None
|
||||
self.postcode_clean = None
|
||||
|
||||
self.size = size if size is not None else 25
|
||||
|
||||
self.property_type = property_type
|
||||
|
||||
@classmethod
|
||||
def get_house_number(cls, address: str) -> str | None:
|
||||
"""
|
||||
This method will use the usaddress library to parse an address and extract the house number
|
||||
:return:
|
||||
"""
|
||||
|
||||
parsed = usaddress.parse(address)
|
||||
parsed_house_number = [x for x in parsed if (x[1] == "AddressNumber")]
|
||||
parsed_house_number = parsed_house_number[0][0] if parsed_house_number else None
|
||||
|
||||
if parsed_house_number is None:
|
||||
# Because usaddress isn't optimal for parsing addresses with some prefixes such as 'Flat',
|
||||
# we also add a custom approach
|
||||
|
||||
# Pattern to look for 'Flat' or 'Apartment' followed by a number, or just a number at the beginning
|
||||
pattern = r'(?i)(?:flat|apartment)\s*(\d+)|^\s*(\d+)'
|
||||
|
||||
match = re.search(pattern, address)
|
||||
|
||||
if match:
|
||||
# Return the first non-None group found
|
||||
return next(g for g in match.groups() if g is not None)
|
||||
else:
|
||||
return None
|
||||
|
||||
# Remove training commas
|
||||
parsed_house_number = parsed_house_number.replace(",", "")
|
||||
|
||||
return parsed_house_number
|
||||
|
||||
@staticmethod
|
||||
def extract_numeric_housenumber_part(house_number: str | None) -> int | None:
|
||||
# Regular expression to find the first occurrence of one or more digits
|
||||
|
||||
if house_number is None:
|
||||
return None
|
||||
|
||||
match = re.search(r'\d+', house_number)
|
||||
|
||||
if match:
|
||||
return int(match.group())
|
||||
else:
|
||||
return None
|
||||
|
||||
def get_epc(self, params=None, size=None):
|
||||
# Get the EPC data with retries
|
||||
size = size if size is not None else self.size
|
||||
if params is None:
|
||||
if self.uprn:
|
||||
params = {"uprn": self.uprn}
|
||||
else:
|
||||
params = {"address": self.address1, "postcode": self.postcode}
|
||||
|
||||
for retry in range(self.max_retries):
|
||||
try:
|
||||
response = self.client.domestic.search(
|
||||
params={"address": self.address1, "postcode": self.postcode}
|
||||
)
|
||||
|
||||
if "uprn" in params:
|
||||
# We use the direct call method inside, since we need to implement uprn as a valid
|
||||
# parameter for the search function
|
||||
url = os.path.join(self.client.domestic.host, "search")
|
||||
response = self.client.domestic.call(method="get", url=url, params=params)
|
||||
else:
|
||||
response = self.client.domestic.search(params=params, size=size)
|
||||
|
||||
if response:
|
||||
self.data = response
|
||||
return self.SUCCESS
|
||||
|
||||
if retry > 0:
|
||||
print("Failed previous attempt but retry successful")
|
||||
logger.info("Failed previous attempt but retry successful")
|
||||
# If we got nothing, final try
|
||||
if not response:
|
||||
# TODO: Make a call to OS uprn service and get the address' uprn, just in case there is an
|
||||
# issue with how we are searching the api
|
||||
|
||||
return {
|
||||
"status": 204,
|
||||
"message": "no data",
|
||||
|
|
@ -127,7 +303,6 @@ class SearchEpc:
|
|||
if len(uprns) == 1:
|
||||
return rows
|
||||
|
||||
logger.error("Multiple UPRNS found - we should use an alternate method of searching - TODO")
|
||||
if property_type is not None:
|
||||
# We can do a filter on the property type
|
||||
rows_filtered = [r for r in rows if r["property-type"] == property_type]
|
||||
|
|
@ -147,7 +322,24 @@ class SearchEpc:
|
|||
|
||||
return rows
|
||||
|
||||
def retrieve(self, property_type=None, address=None):
|
||||
@staticmethod
|
||||
def format_address(newest_epc):
|
||||
"""
|
||||
Format address and postcode for storage in the database
|
||||
"""
|
||||
postcode = newest_epc["postcode"]
|
||||
address = newest_epc["address"]
|
||||
|
||||
# Format them
|
||||
address = address.replace(postcode, "").strip()
|
||||
address = address.rstrip(",").strip()
|
||||
address = address.title()
|
||||
|
||||
postcode = postcode.upper()
|
||||
|
||||
return address, postcode
|
||||
|
||||
def extract_epc_data(self, address=None):
|
||||
|
||||
"""
|
||||
Given a successful search, this method will format the data and return it
|
||||
|
|
@ -163,7 +355,7 @@ class SearchEpc:
|
|||
# Firstly, we should only have 1 urpn so if we have multiple, we'll need to filter down the
|
||||
# property further
|
||||
|
||||
rows = self.filter_rows(rows, property_type=property_type, address=None)
|
||||
rows = self.filter_rows(rows, property_type=self.property_type, address=None)
|
||||
rows = self.filter_rows(rows, property_type=None, address=address)
|
||||
|
||||
# We now check for a full sap epc:
|
||||
|
|
@ -173,7 +365,26 @@ class SearchEpc:
|
|||
# Finally, we identify the newest epc and the rest, and then return
|
||||
newest_epc, older_epcs = self.filter_newest_epc(list_of_epcs=rows)
|
||||
|
||||
return newest_epc, older_epcs, full_sap_epc
|
||||
# Retrieve postcode and address
|
||||
address_epc, postcode_epc = self.format_address(newest_epc=newest_epc)
|
||||
|
||||
# Ge the uprn from the newest record for this home
|
||||
uprns = {r["uprn"] for r in rows if r["uprn"]}
|
||||
# We can sometimes have no uprn for a property
|
||||
if (len(uprns) == 0) and len(rows) > 0:
|
||||
logger.warning("Found data but missing uprn")
|
||||
elif len(uprns) != 1:
|
||||
# There is a possibility that we have multiple UPRNs for a single property, which is an error
|
||||
addresses = {r["address"] for r in rows}
|
||||
if len(addresses) == 1:
|
||||
# Take the uprn from the most recent
|
||||
uprns = {newest_epc["uprn"]}
|
||||
else:
|
||||
raise ValueError("Multiple UPRNs found - investigate me")
|
||||
|
||||
uprn = uprns.pop() if uprns else None
|
||||
|
||||
return newest_epc, older_epcs, full_sap_epc, address_epc, postcode_epc, uprn
|
||||
|
||||
@staticmethod
|
||||
def filter_newest_epc(list_of_epcs: List):
|
||||
|
|
@ -186,8 +397,334 @@ class SearchEpc:
|
|||
return {}, []
|
||||
|
||||
if len(newest_response) != 1:
|
||||
raise Exception("More than one result found for this address - investigate me")
|
||||
# It is possible (but rare, and likely an error on EPC lodgement) that we have multiple EPCs that
|
||||
# were lodged at the exact same time. In this case, we will take the first one
|
||||
newest_response = [newest_response[0]]
|
||||
|
||||
older_epcs = [epc for epc in list_of_epcs if epc["lmk-key"] != newest_response[0]["lmk-key"]]
|
||||
|
||||
return newest_response[0], older_epcs
|
||||
|
||||
@staticmethod
|
||||
def _get_epc_mode(col: str, epc_data: pd.DataFrame):
|
||||
"""
|
||||
Simple method to extract the mode value from the EPC data
|
||||
:param col: name of the column to take the mode of
|
||||
:param epc_data: pandas dataframe of epc data
|
||||
"""
|
||||
|
||||
mode_value = epc_data[[col]].mode(dropna=True)
|
||||
if len(mode_value) != 1:
|
||||
raise NotImplementedError("TODO: Handle multiple modes")
|
||||
mode_value = mode_value.iloc[0][col]
|
||||
|
||||
return mode_value
|
||||
|
||||
def fetch_nearby_epcs(
|
||||
self, initial_postcode: str,
|
||||
lmks_to_drop: list[str] | None = None,
|
||||
built_form: str = "",
|
||||
property_type: str = ""
|
||||
):
|
||||
"""
|
||||
Fetches and processes EPC data for a given initial postcode, applying successive trimming
|
||||
to the postcode and filtering the data until a non-empty result set is found.
|
||||
|
||||
The function queries the EPC API with the provided postcode, and if no data is found or
|
||||
if the data doesn't meet certain criteria, it progressively shortens the postcode by
|
||||
removing the last character and retries the query. This process continues until a valid
|
||||
set of EPC data is obtained or the postcode is exhausted.
|
||||
|
||||
Additional filtering is applied to the obtained EPC data based on 'lmk-key', 'built-form',
|
||||
and 'property-type'. The data is also processed to extract and numerically interpret house
|
||||
numbers, calculate house number distances, and apply weights based on these distances.
|
||||
|
||||
:param initial_postcode: The initial full postcode for the EPC data query.
|
||||
:param lmks_to_drop: List of 'lmk-key' values to be excluded from the EPC data.
|
||||
:param built_form: The 'built-form' value to be used for filtering the EPC data.
|
||||
:param property_type: The 'property-type' value to be used for filtering the EPC data.
|
||||
:return:
|
||||
"""
|
||||
|
||||
property_type_api_map = {
|
||||
"Bungalow": "bungalow",
|
||||
"Flat": "flat",
|
||||
"House": "house",
|
||||
"Maisonette": "maisonette",
|
||||
"Park home": "park home",
|
||||
}
|
||||
|
||||
postcode = initial_postcode
|
||||
while postcode:
|
||||
# Fetch data from EPC API
|
||||
params = {"postcode": postcode}
|
||||
if property_type:
|
||||
params["property-type"] = property_type_api_map[property_type]
|
||||
|
||||
# We take the 20 nearest homes of the relevant type, so not to pull in too many irrelevant homes
|
||||
epc_response = self.get_epc(params=params, size=100)
|
||||
|
||||
if epc_response["status"] == 200:
|
||||
epc_data = pd.DataFrame(self.data["rows"])
|
||||
|
||||
if lmks_to_drop is not None:
|
||||
epc_data = epc_data[~epc_data["lmk-key"].isin(lmks_to_drop)]
|
||||
|
||||
if not epc_data.empty:
|
||||
# Further processing of the EPC data
|
||||
epc_data['lodgement-datetime'] = pd.to_datetime(epc_data['lodgement-datetime'], format='mixed')
|
||||
epc_data = epc_data.sort_values("lodgement-datetime", ascending=False).groupby("uprn").head(1)
|
||||
epc_data["house_number"] = epc_data["address"].apply(lambda add1: self.get_house_number(add1))
|
||||
epc_data["numeric_house_number"] = epc_data["house_number"].apply(
|
||||
lambda house_num: self.extract_numeric_housenumber_part(house_num)
|
||||
)
|
||||
|
||||
if self.numeric_house_number is None:
|
||||
# If we don't have a house number, we treat all weights as equal
|
||||
epc_data["weight"] = 1
|
||||
else:
|
||||
epc_data["house_number_distance"] = abs(
|
||||
epc_data["numeric_house_number"] - self.numeric_house_number
|
||||
)
|
||||
# # We add 1, just in case we have a 0 weight (e.g. comparing house number 7a to 7b, or 9A to 9)
|
||||
# epc_data["weight"] = 1 / (epc_data["house_number_distance"] + 1)
|
||||
# # If we have a home without a house number, fill that weight with average
|
||||
# epc_data["weight"] = epc_data["weight"].fillna(epc_data["weight"].mean())
|
||||
# # Finally, we might not have any house numbers whatsoever so everything could be
|
||||
# # missing, so we fill with 1
|
||||
# epc_data["weight"] = epc_data["weight"].fillna(1)
|
||||
# TODO: Testing
|
||||
# If the postcode is different from the initial postcode, it doesn't make sense to have
|
||||
# any weightings
|
||||
if all(pd.isnull(epc_data["house_number_distance"])) or (postcode != initial_postcode):
|
||||
epc_data["weight"] = 1
|
||||
else:
|
||||
epc_data["weight"] = 1 / np.sqrt(epc_data["house_number_distance"] + 1)
|
||||
epc_data["weight"] = epc_data["weight"].fillna(epc_data["weight"].mean())
|
||||
|
||||
estimation_property_type = self._estimate_str(
|
||||
key="property-type", estimation_data=epc_data
|
||||
) if property_type == "" else property_type
|
||||
|
||||
epc_built_form = self._estimate_str(
|
||||
key="built-form",
|
||||
estimation_data=epc_data[epc_data["property-type"] == estimation_property_type]
|
||||
)
|
||||
|
||||
if built_form == "Semi-Detached" and epc_built_form in ["End-Terraced", "Mid-Terraced"]:
|
||||
estimation_built_form = "End-Terraced"
|
||||
elif (built_form == "") or (pd.isnull(built_form)):
|
||||
estimation_built_form = epc_built_form
|
||||
else:
|
||||
estimation_built_form = built_form
|
||||
|
||||
# We handle some edge cases experiences with maisonettes - if built form is detatched, just filter
|
||||
# on maisonette
|
||||
# We also add some additional logic for Park homes, because they are far less common than other
|
||||
# property types
|
||||
|
||||
is_maisonette_with_bad_built_form = (estimation_property_type == "Maisonette") & (
|
||||
estimation_built_form in ["Detached", "Semi-Detached"]
|
||||
)
|
||||
|
||||
is_park_home_without_built_form = (estimation_property_type == "Park home") & (
|
||||
sum(epc_data["built-form"] == estimation_built_form) == 0
|
||||
)
|
||||
|
||||
has_missing_built_form = not estimation_built_form
|
||||
|
||||
if is_maisonette_with_bad_built_form or is_park_home_without_built_form or has_missing_built_form:
|
||||
epc_data = epc_data[epc_data["property-type"] == estimation_property_type]
|
||||
else:
|
||||
epc_data = epc_data[
|
||||
(epc_data["built-form"] == estimation_built_form) & (
|
||||
epc_data["property-type"] == estimation_property_type)
|
||||
]
|
||||
|
||||
if not epc_data.empty:
|
||||
return epc_data # Return the filtered data if it's not empty
|
||||
|
||||
# Shorten the postcode by one character for the next iteration
|
||||
postcode = postcode[:-1].rstrip()
|
||||
|
||||
# If loop finishes without a valid response, raise an exception
|
||||
raise Exception("Unable to find postcode data after trimming - investigate me")
|
||||
|
||||
def estimate_epc(self, property_type, built_form, lmks_to_drop=None):
|
||||
"""
|
||||
For a property that does not have an EPC, we retrieve the EPC data for the closest properties
|
||||
and estimate the EPC for the property in question.
|
||||
|
||||
Note - do we have postcodes with just a single address? We would need to use a different approach
|
||||
to find the closest homes
|
||||
:param property_type: This is the property type of the property we are estimating, that can be retrieved from
|
||||
the ordnance survey api
|
||||
:param built_form: This is the built form of the property we are estimating, that can be retrieved from
|
||||
the ordnance survey api
|
||||
:param lmks_to_drop: This is a list of LMK keys that should be dropped from the estimation process. This
|
||||
is used as an override for testing, to drop EPCs for the property we are testing
|
||||
:return:
|
||||
"""
|
||||
|
||||
# From the ordnance survey data, we want to determine the property type and then use only similar property
|
||||
# types for the estimation process
|
||||
epc_data = self.fetch_nearby_epcs(
|
||||
initial_postcode=self.postcode,
|
||||
lmks_to_drop=lmks_to_drop,
|
||||
built_form=built_form,
|
||||
property_type=property_type
|
||||
)
|
||||
|
||||
# For each attribute, we need to determine the datatype and use an appropriate method
|
||||
# to estimate.
|
||||
estimated_epc = {}
|
||||
for key, vartype in vartypes.items():
|
||||
epc_data[key] = np.where(pd.isnull(epc_data[key]), None, epc_data[key])
|
||||
epc_data[key] = np.where(epc_data[key] == "", None, epc_data[key])
|
||||
estimation_data = epc_data[[key, "weight", "lodgement-datetime"]].copy()
|
||||
estimation_data = estimation_data[~pd.isnull(estimation_data[key])]
|
||||
estimation_data = estimation_data[~estimation_data[key].isin(Definitions.DATA_ANOMALY_MATCHES)]
|
||||
if vartype == "Int64":
|
||||
# We have some edge cases where we get the error "invalid literal for int() with base 10: '1.0'"
|
||||
# so this handles this
|
||||
estimation_data[key] = estimation_data[key].astype(float).astype(vartype)
|
||||
else:
|
||||
estimation_data[key] = estimation_data[key].astype(vartype)
|
||||
|
||||
if estimation_data.shape[0] == 0:
|
||||
estimated_epc[key] = None
|
||||
continue
|
||||
|
||||
if vartype == "Int64":
|
||||
estimated_value = self._estimate_int(estimation_data, key)
|
||||
elif vartype == "float":
|
||||
estimated_value = self._estimate_float(estimation_data, key)
|
||||
elif vartype == "str":
|
||||
estimated_value = self._estimate_str(estimation_data, key)
|
||||
else:
|
||||
raise NotImplementedError("estimation method not implemented for type")
|
||||
|
||||
estimated_epc[key] = estimated_value
|
||||
|
||||
# Insert an estimated lodgement datetime, with a weighted average
|
||||
estimated_epc["lodgement-datetime"] = self.calculate_weighted_lodgement_datetime(epc_data=epc_data)
|
||||
# Extract logement date
|
||||
estimated_epc["lodgement-date"] = estimated_epc["lodgement-datetime"].strftime("%Y-%m-%d")
|
||||
|
||||
estimated_epc["postcode"] = self.postcode
|
||||
estimated_epc["uprn"] = self.uprn
|
||||
estimated_epc["address"] = self.full_address
|
||||
# Indicate that this epc was estimated
|
||||
estimated_epc["estimated"] = True
|
||||
|
||||
return estimated_epc
|
||||
|
||||
@staticmethod
|
||||
def calculate_weighted_lodgement_datetime(epc_data):
|
||||
numeric_dates = pd.to_datetime(epc_data['lodgement-datetime']).view('int64')
|
||||
|
||||
# Calculate the weighted sum of dates
|
||||
weighted_sum = (numeric_dates * epc_data['weight']).sum()
|
||||
|
||||
# Calculate the sum of weights
|
||||
total_weights = epc_data['weight'].sum()
|
||||
|
||||
# Calculate the weighted mean in numeric format
|
||||
weighted_mean_numeric = weighted_sum / total_weights
|
||||
|
||||
# Convert the numeric weighted mean back to datetime
|
||||
weighted_mean_datetime = pd.to_datetime(weighted_mean_numeric)
|
||||
|
||||
return weighted_mean_datetime
|
||||
|
||||
@staticmethod
|
||||
def _estimate_int(estimation_data, key):
|
||||
return round(np.average(a=estimation_data[key], weights=estimation_data["weight"]))
|
||||
|
||||
@staticmethod
|
||||
def _estimate_float(estimation_data, key):
|
||||
return round(np.average(a=estimation_data[key], weights=estimation_data["weight"]), 2)
|
||||
|
||||
@staticmethod
|
||||
def _estimate_str(estimation_data, key):
|
||||
agg = estimation_data.groupby(key)["weight"].sum().reset_index()
|
||||
agg = agg[agg["weight"] == agg["weight"].max()]
|
||||
if agg.shape[0] != 1:
|
||||
# If we have multiple modes, we take the more recent data on average
|
||||
recent_grouped = estimation_data[
|
||||
estimation_data[key].isin(agg[key].values)
|
||||
].groupby(key)["lodgement-datetime"].mean()
|
||||
|
||||
newest_group = recent_grouped.idxmax()
|
||||
return newest_group
|
||||
|
||||
return agg[key].values[0]
|
||||
|
||||
def find_property(self, skip_os=False):
|
||||
"""
|
||||
This method will attempt to identify a property. It will, at first, use the EPC api to try and
|
||||
find the EPC for the property and the associated UPRN. If this fails, it will use the Ordnance Survey API to
|
||||
find the UPRN of the address.
|
||||
|
||||
Because no result may have been provided by the EPC api because of formatting issues with the address,
|
||||
if the ordnance survey api is used and the uprn retrieved, the EPC api is queried again with the UPRN, just
|
||||
as a final check to see if there is any EPC data.
|
||||
|
||||
If there is no EPC data, the epc data will be estimated based on the surrounding properties
|
||||
"""
|
||||
|
||||
# Step 1: use the epc api to find the property and uprn
|
||||
response = self.get_epc()
|
||||
|
||||
if response["status"] == 200:
|
||||
(
|
||||
self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean, self.uprn
|
||||
) = self.extract_epc_data(address=self.full_address)
|
||||
return
|
||||
|
||||
# Step 2: If we don't have an EPC, we use the ordnance survey api to find the uprn
|
||||
if skip_os:
|
||||
if self.ordnance_survey_client.property_type is not None:
|
||||
# We can try and estimate
|
||||
estimated_epc = self.estimate_epc(
|
||||
property_type=self.ordnance_survey_client.property_type,
|
||||
built_form=self.ordnance_survey_client.built_form
|
||||
)
|
||||
self.newest_epc = estimated_epc
|
||||
self.older_epcs = []
|
||||
self.full_sap_epc = {}
|
||||
|
||||
# Finally, set a standardised address 1 and postcode
|
||||
self.address_clean = self.ordnance_survey_client.address_os
|
||||
self.postcode_clean = self.ordnance_survey_client.postcode_os
|
||||
return
|
||||
|
||||
os_response = self.ordnance_survey_client.get_places_api()
|
||||
|
||||
if os_response["status"] != 200:
|
||||
# Investigate this if it happens
|
||||
raise Exception("Unable to find property - investigate me")
|
||||
|
||||
# Step 3: Now that we have a urpn, do another check against the epc api, this time searching with the uprn
|
||||
self.uprn = self.ordnance_survey_client.most_relevant_result["UPRN"]
|
||||
response = self.get_epc()
|
||||
if response["status"] == 200:
|
||||
(
|
||||
self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean, self.uprn
|
||||
) = self.extract_epc_data()
|
||||
return
|
||||
|
||||
# Step 4: If we still don't have an EPC, we estimate the EPC data
|
||||
self.full_address = self.ordnance_survey_client.most_relevant_result["ADDRESS"]
|
||||
estimated_epc = self.estimate_epc(
|
||||
property_type=self.ordnance_survey_client.property_type,
|
||||
built_form=self.ordnance_survey_client.built_form
|
||||
)
|
||||
self.newest_epc = estimated_epc
|
||||
self.older_epcs = []
|
||||
self.full_sap_epc = {}
|
||||
|
||||
# Finally, set a standardised address 1 and postcode
|
||||
self.address_clean = self.ordnance_survey_client.address_os
|
||||
self.postcode_clean = self.ordnance_survey_client.postcode_os
|
||||
return
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ class Settings(BaseSettings):
|
|||
HEAT_PREDICTIONS_BUCKET: str
|
||||
PLAN_TRIGGER_BUCKET: str
|
||||
EPC_AUTH_TOKEN: str
|
||||
ORDNANCE_SURVEY_API_KEY: str
|
||||
DB_HOST: str
|
||||
DB_PASSWORD: str
|
||||
DB_USERNAME: str
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ from backend.app.db.models.portfolio import (
|
|||
from sqlalchemy.orm.exc import NoResultFound
|
||||
|
||||
|
||||
def create_property(session: Session, portfolio_id: int, address: str, postcode: str) -> (int, bool):
|
||||
def create_property(session: Session, portfolio_id: int, address: str, postcode: str, uprn: str) -> (int, bool):
|
||||
"""
|
||||
This function will create a record for the property in the database if it does not exist.
|
||||
If it does exist, it will just update the updated_at field.
|
||||
|
|
@ -25,7 +25,7 @@ def create_property(session: Session, portfolio_id: int, address: str, postcode:
|
|||
try:
|
||||
# Attempt to fetch the existing property
|
||||
existing_property = session.query(PropertyModel).filter_by(
|
||||
address=address, postcode=postcode, portfolio_id=portfolio_id
|
||||
uprn=uprn, portfolio_id=portfolio_id
|
||||
).one()
|
||||
|
||||
# Update the 'updated_at' field
|
||||
|
|
@ -43,6 +43,7 @@ def create_property(session: Session, portfolio_id: int, address: str, postcode:
|
|||
address=address,
|
||||
postcode=postcode,
|
||||
portfolio_id=portfolio_id,
|
||||
uprn=uprn,
|
||||
creation_status=PropertyCreationStatus.LOADING,
|
||||
status=PortfolioStatus.ASSESSMENT.value,
|
||||
has_pre_condition_report=False,
|
||||
|
|
|
|||
|
|
@ -19,7 +19,6 @@ class MaterialType(enum.Enum):
|
|||
flat_roof_insulation = "flat_roof_insulation"
|
||||
room_roof_insulation = "room_roof_insulation"
|
||||
windows_glazing = "windows_glazing"
|
||||
|
||||
|
||||
iwi_wall_demolition = "iwi_wall_demolition"
|
||||
iwi_vapour_barrier = "iwi_vapour_barrier"
|
||||
|
|
|
|||
|
|
@ -153,6 +153,7 @@ class PropertyDetailsEpcModel(Base):
|
|||
primary_energy_consumption = Column(Float)
|
||||
co2_emissions = Column(Float)
|
||||
adjusted_energy_consumption = Column(Float)
|
||||
estimated = Column(Boolean, default=False)
|
||||
|
||||
|
||||
class PropertyDetailsSpatial(Base):
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import numpy as np
|
|||
import pandas as pd
|
||||
from epc_api.client import EpcClient
|
||||
from etl.epc.Record import EPCRecord
|
||||
from backend.SearchEpc import SearchEpc
|
||||
from fastapi import APIRouter, Depends
|
||||
from sqlalchemy.exc import IntegrityError, OperationalError
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
|
@ -30,6 +31,8 @@ from backend.ml_models.api import ModelApi
|
|||
from backend.Property import Property
|
||||
from etl.epc.DataProcessor import EPCDataProcessor
|
||||
from etl.epc.settings import COLUMNS_TO_MERGE_ON
|
||||
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
|
||||
|
||||
from recommendations.optimiser.CostOptimiser import CostOptimiser
|
||||
from recommendations.optimiser.GainOptimiser import GainOptimiser
|
||||
from recommendations.optimiser.optimiser_functions import prepare_input_measures
|
||||
|
|
@ -43,54 +46,6 @@ logger = setup_logger()
|
|||
|
||||
BATCH_SIZE = 5
|
||||
|
||||
class DummyDownloader:
|
||||
|
||||
def __init__(self, postcode, address1, id, epc_client):
|
||||
self.id = id
|
||||
self.postcode = postcode
|
||||
self.address1 = address1
|
||||
|
||||
self.data = None
|
||||
self.old_data = None
|
||||
|
||||
self.epc_client = epc_client
|
||||
|
||||
def search_address_epc(self):
|
||||
"""
|
||||
This method searches for an address in the EPC database and returns the first result
|
||||
:return: property data
|
||||
"""
|
||||
if self.data:
|
||||
return
|
||||
|
||||
# This will fail if a property does not have an EPC - this has been documented as a case to handle
|
||||
response = self.epc_client.domestic.search(params={"address": self.address1, "postcode": self.postcode})
|
||||
|
||||
# Check if we have a full sap EPC
|
||||
self.full_sap_epc = [r for r in response["rows"] if r["transaction-type"] == "new dwelling"]
|
||||
self.full_sap_epc = self.full_sap_epc[0] if self.full_sap_epc else self.full_sap_epc
|
||||
|
||||
if len(response["rows"]) > 1:
|
||||
newest_response = [
|
||||
r for r in response["rows"] if
|
||||
r["lodgement-datetime"] == max([x["lodgement-datetime"] for x in response["rows"]])
|
||||
]
|
||||
if len(newest_response) > 1:
|
||||
raise Exception("More than one result found for this address - investigate me")
|
||||
|
||||
# We'll keep old EPCs in case it contains information, not present on the newest one
|
||||
self.old_data = [epc for epc in response["rows"] if epc["lmk-key"] != newest_response[0]["lmk-key"]]
|
||||
|
||||
response["rows"] = newest_response
|
||||
|
||||
self.data = response["rows"][0]
|
||||
# For the moment, if we don't have a UPRN, we don't do anything about it, however we'll handle this in
|
||||
# the future by using the Ordnance Survey places API
|
||||
if not self.data["uprn"]:
|
||||
logger.warning("We do not have a UPRN for this property")
|
||||
else:
|
||||
self.uprn = int(self.data["uprn"])
|
||||
|
||||
router = APIRouter(
|
||||
prefix="/plan",
|
||||
tags=["plan"],
|
||||
|
|
@ -103,37 +58,34 @@ router = APIRouter(
|
|||
@router.post("/trigger")
|
||||
async def trigger_plan(body: PlanTriggerRequest):
|
||||
logger.info("Connecting to db")
|
||||
# session = sessionmaker(bind=db_engine)()
|
||||
session = sessionmaker(bind=db_engine)()
|
||||
created_at = datetime.now().isoformat()
|
||||
|
||||
try:
|
||||
session.begin()
|
||||
logger.info("Getting the inputs")
|
||||
Body = {'portfolio_id': '56', 'housing_type': 'Social', 'goal': 'Increase EPC', 'goal_value': 'A', 'trigger_file_path': '8/56/windows_portfolio_inputs.csv'}
|
||||
body = PlanTriggerRequest(**Body)
|
||||
epc_client = EpcClient(auth_token=get_settings().EPC_AUTH_TOKEN)
|
||||
plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path)
|
||||
uprn_filenames = read_dataframe_from_s3_parquet(
|
||||
bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
|
||||
)
|
||||
cleaning_data = read_parquet_from_s3(
|
||||
bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
|
||||
)
|
||||
|
||||
input_properties = []
|
||||
|
||||
for config in plan_input:
|
||||
# We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
|
||||
# TODO: implment validation. We should also standardise postcode and address in some fashion as
|
||||
# a postcode of abcdef would be considered different to ABCDEF
|
||||
|
||||
epc_searcher = SearchEpc(
|
||||
address1=config["address"],
|
||||
postcode=config["postcode"],
|
||||
auth_token=get_settings().EPC_AUTH_TOKEN,
|
||||
os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY
|
||||
)
|
||||
epc_searcher.find_property()
|
||||
# Create a record in db
|
||||
property_id, is_new = create_property(
|
||||
session, portfolio_id=body.portfolio_id, address=config['address'], postcode=config['postcode']
|
||||
session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
|
||||
)
|
||||
# if a new record was not created, we don't produduce recommendations
|
||||
if not is_new:
|
||||
continue
|
||||
# TODO: Need to add heat demand target
|
||||
|
||||
create_property_targets(
|
||||
session,
|
||||
|
|
@ -143,29 +95,21 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
heat_demand_target=None
|
||||
)
|
||||
|
||||
epc_downloader = DummyDownloader(id=0, epc_client=epc_client, postcode=config['postcode'], address1=config['address'])
|
||||
epc_downloader.search_address_epc()
|
||||
|
||||
epc_records ={
|
||||
'original_epc': epc_downloader.data.copy(),
|
||||
'full_sap_epc': epc_downloader.full_sap_epc.copy() if epc_downloader.full_sap_epc else [],
|
||||
'old_data': epc_downloader.old_data.copy() if epc_downloader.old_data else []
|
||||
'original_epc': epc_searcher.newest_epc,
|
||||
'full_sap_epc': epc_searcher.full_sap_epc,
|
||||
'old_data': epc_searcher.old_data,
|
||||
}
|
||||
|
||||
prepared_epc = EPCRecord(epc_records=epc_records, run_mode="newdata", cleaning_data=cleaning_data) # This uses all the epc records to clean the data
|
||||
|
||||
p = Property(
|
||||
|
||||
input_properties.append(
|
||||
Property(
|
||||
id=property_id,
|
||||
address1=config['address'],
|
||||
postcode=config['postcode'],
|
||||
epc_record=prepared_epc,
|
||||
)
|
||||
|
||||
logger.info("Getting spatial data")
|
||||
|
||||
p.get_spatial_data(uprn_filenames)
|
||||
input_properties.append(
|
||||
p
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -180,10 +124,19 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
materials = get_materials(session)
|
||||
cleaned = get_cleaned()
|
||||
|
||||
logger.info("Getting components and epc recommendations")
|
||||
uprn_filenames = read_dataframe_from_s3_parquet(
|
||||
bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
|
||||
)
|
||||
cleaning_data = read_dataframe_from_s3_parquet(
|
||||
bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
|
||||
)
|
||||
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket=get_settings().DATA_BUCKET)
|
||||
|
||||
# TODO: Move this to a class. We probably want a Recommender class which takes the injects the optimisers
|
||||
# in as a dependency and then the optimisers can take the input measures in as part of the setup() method
|
||||
logger.info("Getting spatial data")
|
||||
for p in input_properties:
|
||||
p.get_spatial_data(uprn_filenames)
|
||||
|
||||
logger.info("Getting components and epc recommendations")
|
||||
|
||||
recommendations = {}
|
||||
recommendations_scoring_data = []
|
||||
|
|
@ -192,7 +145,7 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
for p in input_properties:
|
||||
|
||||
# Property recommendations
|
||||
p.get_components(cleaned)
|
||||
p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
|
||||
|
||||
recommender = Recommendations(property_instance=p, materials=materials)
|
||||
property_recommendations = recommender.recommend()
|
||||
|
|
|
|||
|
|
@ -175,11 +175,34 @@ def create_recommendation_scoring_data(
|
|||
scoring_dict["LOW_ENERGY_LIGHTING_ENDING"] = 100
|
||||
scoring_dict["LIGHTING_ENERGY_EFF_STARTING"] = "Very Good"
|
||||
|
||||
if recommendation["type"] == "windows_glazing":
|
||||
scoring_dict["MULTI_GLAZE_PROPORTION_ENDING"] = 100
|
||||
scoring_dict["WINDOWS_ENERGY_EFF_ENDING"] = "Average"
|
||||
|
||||
is_secondary_glazing = recommendation["is_secondary_glazing"]
|
||||
|
||||
if scoring_dict["glazing_type_ENDING"] == "multiple":
|
||||
pass
|
||||
elif scoring_dict["glazing_type_ENDING"] == "single":
|
||||
scoring_dict["glazing_type_ENDING"] = "secondary" if is_secondary_glazing else "double"
|
||||
elif scoring_dict["glazing_type_ENDING"] == "double":
|
||||
scoring_dict["glazing_type_ENDING"] = "multiple" if is_secondary_glazing else "double"
|
||||
elif scoring_dict["glazing_type_ENDING"] == "secondary":
|
||||
scoring_dict["glazing_type_ENDING"] = "secondary" if is_secondary_glazing else "multiple"
|
||||
elif scoring_dict["glazing_type_ENDING"] in ["triple", "high performance"]:
|
||||
scoring_dict["glazing_type_ENDING"] = "multiple"
|
||||
else:
|
||||
raise ValueError("Invalid glazing type - implement me")
|
||||
|
||||
if recommendation["type"] == "solar_pv":
|
||||
scoring_dict["PHOTO_SUPPLY_ENDING"] = recommendation["photo_supply"]
|
||||
|
||||
if recommendation["type"] not in [
|
||||
"mechanical_ventilation", "sealing_open_fireplace", "low_energy_lighting",
|
||||
"internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
|
||||
"loft_insulation", "room_roof_insulation", "flat_roof_insulation",
|
||||
"solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation"
|
||||
"solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation",
|
||||
"windows_glazing", "solar_pv"
|
||||
]:
|
||||
raise NotImplementedError("Implement me")
|
||||
|
||||
|
|
|
|||
|
|
@ -121,19 +121,6 @@ def epc_to_sap_lower_bound(epc: str):
|
|||
raise ValueError("EPC rating should be between A and G")
|
||||
|
||||
|
||||
def read_parquet_from_s3(bucket_name, file_key):
|
||||
client = boto3.client('s3')
|
||||
|
||||
# Get the object
|
||||
s3_object = client.get_object(Bucket=bucket_name, Key=file_key)
|
||||
|
||||
# Read the CSV body into a DataFrame
|
||||
csv_body = s3_object["Body"].read()
|
||||
df = pd.read_parquet(BytesIO(csv_body))
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def save_dataframe_to_s3_parquet(df, bucket_name, file_key):
|
||||
"""
|
||||
Save a pandas DataFrame to S3 as a Parquet file.
|
||||
|
|
|
|||
|
|
@ -19,7 +19,9 @@ class PropertyValuation:
|
|||
100070505235: 344000, # Based on Zoopla's estimation of 131 School road, which is also semi-detached
|
||||
100070513306: 182000, # Based on Zoopla's estimation of 61 Simmons Drive
|
||||
100071306896: 77000, # Based on Flat 2 of 44 Wedgewood Road on Zoopla
|
||||
100021192109: 650000 # Based on Zoopla
|
||||
100021192109: 650000, # Based on Zoopla
|
||||
766249482: 358000, # Based on Zoopla estimate for 19 Spring Lane, 3 bedroom semi-detached
|
||||
100120703802: 277000, # Based on Zoopla
|
||||
}
|
||||
|
||||
# We base our valuation uplifts on a number of sources
|
||||
|
|
@ -93,7 +95,13 @@ class PropertyValuation:
|
|||
value = cls.UPRN_VALUE_LOOKUP.get(property_instance.uprn)
|
||||
|
||||
if not value:
|
||||
raise ValueError("Have not implemented valuation for this property")
|
||||
return {
|
||||
"current_value": None,
|
||||
"lower_bound_increased_value": None,
|
||||
"upper_bound_increased_value": None,
|
||||
"average_increased_value": None,
|
||||
"average_increase": None
|
||||
}
|
||||
|
||||
current_epc = property_instance.data["current-energy-rating"]
|
||||
# We get the spectrum of ratings between the current and target EPC
|
||||
|
|
@ -119,4 +127,5 @@ class PropertyValuation:
|
|||
"lower_bound_increased_value": value * (1 + min_increase),
|
||||
"upper_bound_increased_value": value * (1 + max_increase),
|
||||
"average_increased_value": value * (1 + avg_increase),
|
||||
"average_increase": value * (1 + avg_increase) - value
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,8 +2,7 @@ import pandas as pd
|
|||
import requests
|
||||
from requests.exceptions import RequestException
|
||||
from utils.logger import setup_logger
|
||||
from utils.s3 import save_dataframe_to_s3_parquet
|
||||
from backend.app.utils import read_parquet_from_s3
|
||||
from utils.s3 import save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
|
@ -125,7 +124,7 @@ class ModelApi:
|
|||
|
||||
# Retrieve the predictions
|
||||
predictions_df = pd.DataFrame(
|
||||
read_parquet_from_s3(
|
||||
read_dataframe_from_s3_parquet(
|
||||
bucket_name=predictions_bucket,
|
||||
file_key=response["storage_filepath"].split(predictions_bucket + "/")[1]
|
||||
)
|
||||
|
|
|
|||
|
|
@ -35,4 +35,5 @@ mip==1.15.0
|
|||
boto3==1.28.3
|
||||
pandas==1.5.3
|
||||
pyarrow==12.0.1
|
||||
textblob
|
||||
textblob
|
||||
usaddress==0.5.10
|
||||
|
|
@ -9,6 +9,7 @@ from etl.epc_clean.EpcClean import EpcClean
|
|||
mock_epc_response = {
|
||||
"rows": [
|
||||
{
|
||||
"tenure": "rental (social)",
|
||||
"lmk-key": 1,
|
||||
"uprn": 1,
|
||||
"number-habitable-rooms": 5,
|
||||
|
|
@ -17,7 +18,7 @@ mock_epc_response = {
|
|||
"inspection-date": "2023-06-01",
|
||||
'lodgement-datetime': '2023-06-01 20:29:01',
|
||||
"some-other-key": "some-value",
|
||||
"roof-description": "Roof Description",
|
||||
"roof-description": "pitched, no insulation",
|
||||
"walls-description": "Walls Description",
|
||||
"windows-description": "Windows Description",
|
||||
"mainheat-description": "Main Heating Description",
|
||||
|
|
@ -37,7 +38,8 @@ mock_epc_response = {
|
|||
"floor-height": 2.5,
|
||||
"total-floor-area": 100,
|
||||
"construction-age-band": "England and Wales: 1967-1975",
|
||||
"floor-description": "Floor Description"
|
||||
"floor-description": "Floor Description",
|
||||
"floor-level": "Ground"
|
||||
},
|
||||
{
|
||||
"lmk-key": 2,
|
||||
|
|
@ -68,7 +70,8 @@ mock_epc_response = {
|
|||
"floor-height": 2.5,
|
||||
"total-floor-area": 100,
|
||||
"construction-age-band": "England and Wales: 1967-1975",
|
||||
"floor-description": "Floor Description"
|
||||
"floor-description": "Floor Description",
|
||||
"floor-level": "Ground"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -100,7 +103,8 @@ mock_epc_response_dupe = {
|
|||
"floor-height": 2.5,
|
||||
"total-floor-area": 100,
|
||||
"construction-age-band": "England and Wales: 1967-1975",
|
||||
"floor-description": "Floor Description"
|
||||
"floor-description": "Floor Description",
|
||||
"floor-level": "Ground"
|
||||
},
|
||||
{
|
||||
"lmk-key": 2,
|
||||
|
|
@ -128,7 +132,8 @@ mock_epc_response_dupe = {
|
|||
"floor-height": 2.5,
|
||||
"total-floor-area": 100,
|
||||
"construction-age-band": "England and Wales: 1967-1975",
|
||||
"floor-description": "Floor Description"
|
||||
"floor-description": "Floor Description",
|
||||
"floor-level": "Ground"
|
||||
},
|
||||
{
|
||||
"lmk-key": 3,
|
||||
|
|
@ -156,36 +161,62 @@ mock_epc_response_dupe = {
|
|||
"floor-height": 2.5,
|
||||
"total-floor-area": 100,
|
||||
"construction-age-band": "England and Wales: 1967-1975",
|
||||
"floor-description": "Floor Description"
|
||||
"floor-description": "Floor Description",
|
||||
"floor-level": "Ground"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
class TestProperty:
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def property_instance(self, mock_epc_client, mock_cleaner):
|
||||
property_instance = Property(1, "AB12CD", "Test Address", epc_client=mock_epc_client)
|
||||
def mock_photo_supply_lookup(self):
|
||||
return pd.DataFrame(
|
||||
[
|
||||
dict(
|
||||
tenure="rental (social)",
|
||||
built_form="Detached",
|
||||
property_type="House",
|
||||
construction_age_band="England and Wales: 1967-1975",
|
||||
is_flat=False,
|
||||
is_pitched=True,
|
||||
is_roof_room=False,
|
||||
floor_area_decile=2,
|
||||
photo_supply_median=40
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def mock_floor_area_decile_thresholds(self):
|
||||
return pd.DataFrame(
|
||||
{"floor_area_decile_thresholds": [0, 10, 30, 50]}
|
||||
)
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def property_instance(self, mock_cleaner):
|
||||
property_instance = Property(id=1, postcode="AB12CD", address="Test Address", data=mock_epc_response["rows"][0])
|
||||
return property_instance
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def property_instance_dupe_data(self, mock_epc_client_dupe_data):
|
||||
property_instance_dupe_data = Property(2, "AB12CD", "Test Address", epc_client=mock_epc_client_dupe_data)
|
||||
def property_instance_dupe_data(self):
|
||||
property_instance_dupe_data = Property(id=2, postcode="AB12CD", address="Test Address")
|
||||
return property_instance_dupe_data
|
||||
|
||||
@pytest.fixture
|
||||
def mock_epc_client(self):
|
||||
mock_epc_client = Mock(spec=EpcClient(auth_token="mocked_auth_token"))
|
||||
mock_epc_client.domestic.search.return_value = mock_epc_response.copy()
|
||||
mock_epc_client.auth_token = "mocked_auth_token"
|
||||
return mock_epc_client
|
||||
|
||||
@pytest.fixture
|
||||
def mock_epc_client_dupe_data(self):
|
||||
mock_epc_client_dupe_data = Mock(spec=EpcClient(auth_token="mocked_auth_token"))
|
||||
mock_epc_client_dupe_data.domestic.search.return_value = mock_epc_response_dupe.copy()
|
||||
mock_epc_client_dupe_data.auth_token = "mocked_auth_token"
|
||||
return mock_epc_client_dupe_data
|
||||
# @pytest.fixture
|
||||
# def mock_epc_client(self):
|
||||
# mock_epc_client = Mock(spec=EpcClient(auth_token="mocked_auth_token"))
|
||||
# mock_epc_client.domestic.search.return_value = mock_epc_response.copy()
|
||||
# mock_epc_client.auth_token = "mocked_auth_token"
|
||||
# return mock_epc_client
|
||||
#
|
||||
# @pytest.fixture
|
||||
# def mock_epc_client_dupe_data(self):
|
||||
# mock_epc_client_dupe_data = Mock(spec=EpcClient(auth_token="mocked_auth_token"))
|
||||
# mock_epc_client_dupe_data.domestic.search.return_value = mock_epc_response_dupe.copy()
|
||||
# mock_epc_client_dupe_data.auth_token = "mocked_auth_token"
|
||||
# return mock_epc_client_dupe_data
|
||||
|
||||
@pytest.fixture
|
||||
def mock_cleaner(self):
|
||||
|
|
@ -224,7 +255,11 @@ class TestProperty:
|
|||
}
|
||||
|
||||
mock_cleaner.cleaned = {
|
||||
"roof-description": [{"original_description": "Roof Description"}],
|
||||
"roof-description": [
|
||||
{"original_description": "Roof Description"},
|
||||
{"original_description": "pitched, no insulation", "is_pitched": True, "is_flat": False,
|
||||
"is_roof_room": False}
|
||||
],
|
||||
"walls-description": [walls_data],
|
||||
"windows-description": [{"original_description": "Windows Description"}],
|
||||
"mainheat-description": [{"original_description": "Main Heating Description"}],
|
||||
|
|
@ -235,37 +270,32 @@ class TestProperty:
|
|||
}
|
||||
return mock_cleaner
|
||||
|
||||
def test_init(self, mock_epc_client):
|
||||
inst1 = Property(0, "AB12CD", "Test Address", epc_client=mock_epc_client)
|
||||
# Should be mocked auth token
|
||||
assert inst1.epc_client.auth_token == "mocked_auth_token"
|
||||
def test_init(self):
|
||||
inst1 = Property(0, postcode="AB12CD", address="Test Address")
|
||||
|
||||
inst2 = Property(3, "AB12CD", "Test Address", epc_client=mock_epc_client)
|
||||
assert inst2.epc_client.auth_token
|
||||
assert inst1.data is None
|
||||
|
||||
inst3 = Property(4, "AB12CD", "Test Address", data={"some": "data"}, epc_client=mock_epc_client)
|
||||
assert inst3.data == {"some": "data"}
|
||||
inst2 = Property(3, "AB12CD", "Test Address")
|
||||
assert inst2.id == 3
|
||||
|
||||
data = inst3.search_address_epc()
|
||||
assert data is None
|
||||
inst3 = Property(4, "AB12CD", "Test Address", data={"some": "data", "uprn": 123})
|
||||
assert inst3.data == {"some": "data", "uprn": 123}
|
||||
|
||||
def test_search_address_epc(self, property_instance):
|
||||
# Call the method to test
|
||||
property_instance.search_address_epc()
|
||||
|
||||
# Verify that the correct data is being returned
|
||||
assert property_instance.data == mock_epc_response["rows"][0]
|
||||
|
||||
def test_search_address_epc_multiple_results(self, property_instance_dupe_data, mock_epc_client_dupe_data):
|
||||
with pytest.raises(Exception, match="More than one result found for this address - investigate me"):
|
||||
property_instance_dupe_data.search_address_epc()
|
||||
|
||||
def test_get_components(self, property_instance, mock_cleaner, mock_epc_client):
|
||||
property_instance.search_address_epc()
|
||||
property_instance.get_components(mock_cleaner.cleaned)
|
||||
def test_get_components(
|
||||
self, property_instance, mock_cleaner, mock_photo_supply_lookup, mock_floor_area_decile_thresholds
|
||||
):
|
||||
property_instance.get_components(
|
||||
mock_cleaner.cleaned,
|
||||
photo_supply_lookup=mock_photo_supply_lookup,
|
||||
floor_area_decile_thresholds=mock_floor_area_decile_thresholds
|
||||
)
|
||||
|
||||
# Verify that the components are set correctly
|
||||
assert property_instance.roof == {"original_description": "Roof Description"}
|
||||
assert property_instance.roof == {
|
||||
'original_description': 'pitched, no insulation', 'is_pitched': True,
|
||||
'is_flat': False, 'is_roof_room': False
|
||||
}
|
||||
|
||||
assert property_instance.walls == {
|
||||
"original_description": "Walls Description",
|
||||
"is_cavity_wall": True,
|
||||
|
|
@ -289,24 +319,15 @@ class TestProperty:
|
|||
|
||||
# Verify that ValueError is raised when EpcClean doesn't contain cleaned data
|
||||
with pytest.raises(ValueError, match="Cleaner does not contain cleaned data"):
|
||||
property_instance.get_components(mock_cleaner.cleaned)
|
||||
property_instance.get_components(mock_cleaner.cleaned, pd.DataFrame(), pd.DataFrame())
|
||||
|
||||
def test_get_components_no_data(self, property_instance, mock_cleaner):
|
||||
def test_get_components_no_attributes(
|
||||
self, property_instance, mock_cleaner, mock_photo_supply_lookup, mock_floor_area_decile_thresholds
|
||||
):
|
||||
# Modify the mock cleaner to have no attributes for a specific description
|
||||
mock_cleaner.cleaned = {
|
||||
"roof-description": []
|
||||
}
|
||||
|
||||
# Verify that ValueError is raised when no attributes are found
|
||||
with pytest.raises(ValueError, match="Property does not contain data"):
|
||||
property_instance.get_components(mock_cleaner.cleaned)
|
||||
|
||||
def test_get_components_no_attributes(self, property_instance, mock_cleaner):
|
||||
# Modify the mock cleaner to have no attributes for a specific description
|
||||
mock_cleaner.cleaned = {
|
||||
"roof-description": []
|
||||
}
|
||||
property_instance.search_address_epc()
|
||||
property_instance.data["roof-description"] = "Pitched, no insulation"
|
||||
property_instance.walls = {
|
||||
"original_description": "Walls Description",
|
||||
|
|
@ -327,14 +348,17 @@ class TestProperty:
|
|||
}
|
||||
|
||||
# Assert backup cleaning has been applied
|
||||
property_instance.get_components(mock_cleaner.cleaned)
|
||||
property_instance.get_components(
|
||||
mock_cleaner.cleaned, mock_photo_supply_lookup, mock_floor_area_decile_thresholds
|
||||
)
|
||||
|
||||
assert property_instance.roof["clean_description"] == "Pitched, no insulation"
|
||||
assert property_instance.roof["is_pitched"]
|
||||
|
||||
def test_get_components_multiple_attributes(self, property_instance, mock_cleaner):
|
||||
def test_get_components_multiple_attributes(
|
||||
self, property_instance, mock_cleaner, mock_photo_supply_lookup, mock_floor_area_decile_thresholds
|
||||
):
|
||||
# This shouldn't happen - it would mean a cleaning error
|
||||
property_instance.search_address_epc()
|
||||
property_instance.data["roof-description"] = "Roof Description"
|
||||
cleaned = {
|
||||
"roof-description": [
|
||||
|
|
@ -345,10 +369,10 @@ class TestProperty:
|
|||
|
||||
# Verify that ValueError is raised when multiple attributes are found
|
||||
with pytest.raises(ValueError, match="Either No attributes or multiple found for roof-description"):
|
||||
property_instance.get_components(cleaned)
|
||||
property_instance.get_components(cleaned, mock_photo_supply_lookup, mock_floor_area_decile_thresholds)
|
||||
|
||||
def test_set_spatial(self, mock_epc_client):
|
||||
prop = Property(1, "AB12CD", "Test Address", mock_epc_client)
|
||||
def test_set_spatial(self):
|
||||
prop = Property(1, postcode="AB12CD", address="Test Address")
|
||||
|
||||
spatial1 = pd.DataFrame([{
|
||||
'X_COORDINATE': 411143.0, 'Y_COORDINATE': 281701.0, 'LATITUDE': 52.4331896, 'LONGITUDE': -1.8375238,
|
||||
|
|
@ -362,7 +386,7 @@ class TestProperty:
|
|||
assert prop.is_heritage
|
||||
assert prop.restricted_measures
|
||||
|
||||
prop2 = Property(1, "AB12CD", "Test Address", mock_epc_client)
|
||||
prop2 = Property(1, "AB12CD", "Test Address")
|
||||
|
||||
spatial2 = pd.DataFrame([{
|
||||
'X_COORDINATE': 411143.0, 'Y_COORDINATE': 281701.0, 'LATITUDE': 52.4331896, 'LONGITUDE': -1.8375238,
|
||||
|
|
@ -376,10 +400,10 @@ class TestProperty:
|
|||
assert not prop2.is_heritage
|
||||
assert not prop2.restricted_measures
|
||||
|
||||
def test_set_floor_level(self, mock_epc_client):
|
||||
def test_set_floor_level(self):
|
||||
# In this case, we have a flat which looks looks it's on the first floor, but it's actually on the ground
|
||||
# floor, so we should set floor_level to 0
|
||||
prop = Property(1, "AB12CD", "Test Address", mock_epc_client)
|
||||
prop = Property(1, postcode="AB12CD", address="Test Address")
|
||||
prop.data = {'floor-level': '01', 'property-type': 'Flat'}
|
||||
prop.floor = {
|
||||
'original_description': 'Solid, no insulation (assumed)', 'clean_description': 'Solid, no insulation',
|
||||
|
|
@ -395,7 +419,7 @@ class TestProperty:
|
|||
|
||||
# This property is labelled as being on the ground floor but actually has another property below
|
||||
# so we set floor level to 1
|
||||
prop2 = Property(1, "AB12CD", "Test Address", mock_epc_client)
|
||||
prop2 = Property(1, postcode="AB12CD", address="Test Address")
|
||||
prop2.data = {'floor-level': 'Ground', 'property-type': 'Flat'}
|
||||
prop2.floor = {
|
||||
'original_description': '(Another dwelling below)', 'clean_description': 'Solid, no insulation',
|
||||
|
|
@ -410,7 +434,7 @@ class TestProperty:
|
|||
assert prop2.floor_level == 1
|
||||
|
||||
# this property is correctly labelled as being on the 2nd floor
|
||||
prop3 = Property(1, "AB12CD", "Test Address", mock_epc_client)
|
||||
prop3 = Property(1, postcode="AB12CD", address="Test Address")
|
||||
prop3.data = {'floor-level': '02', 'property-type': 'Flat'}
|
||||
prop3.floor = {
|
||||
'original_description': '(Another dwelling below)', 'clean_description': 'Solid, no insulation',
|
||||
|
|
@ -425,7 +449,7 @@ class TestProperty:
|
|||
assert prop3.floor_level == 2
|
||||
|
||||
# Example of a house
|
||||
prop4 = Property(1, "AB12CD", "Test Address", mock_epc_client)
|
||||
prop4 = Property(1, postcode="AB12CD", address="Test Address")
|
||||
prop4.data = {'floor-level': '', 'property-type': 'House'}
|
||||
prop4.floor = {
|
||||
'original_description': '(Another dwelling below)', 'clean_description': 'Solid, no insulation',
|
||||
|
|
|
|||
|
|
@ -2,13 +2,11 @@ from backend.Property import Property
|
|||
from etl.epc.DataProcessor import DataProcessor
|
||||
from backend.app.plan.utils import create_recommendation_scoring_data, get_cleaned
|
||||
from etl.epc.settings import COLUMNS_TO_MERGE_ON
|
||||
from epc_api.client import EpcClient
|
||||
import pandas as pd
|
||||
import pytest
|
||||
import msgpack
|
||||
|
||||
from utils.s3 import read_dataframe_from_s3_parquet, read_from_s3
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
# Handy code for selecting testing data
|
||||
|
|
@ -122,7 +120,21 @@ class TestSapModelPrep:
|
|||
cleaned = msgpack.unpackb(cleaned, raw=False)
|
||||
return cleaned
|
||||
|
||||
def test_fill_cavity_wall(self, cleaned, cleaning_data):
|
||||
@pytest.fixture
|
||||
def photo_supply_lookup(self):
|
||||
photo_supply_lookup = read_dataframe_from_s3_parquet(
|
||||
bucket_name="retrofit-data-dev", file_key="solar_pv_supply/photo_supply_lookup.parquet",
|
||||
)
|
||||
return photo_supply_lookup
|
||||
|
||||
@pytest.fixture
|
||||
def floor_area_decile_thresholds(self):
|
||||
floor_area_decile_thresholds = read_dataframe_from_s3_parquet(
|
||||
bucket_name="retrofit-data-dev", file_key="solar_pv_supply/floor_area_decile_thresholds.parquet",
|
||||
)
|
||||
return floor_area_decile_thresholds
|
||||
|
||||
def test_fill_cavity_wall(self, cleaned, cleaning_data, photo_supply_lookup, floor_area_decile_thresholds):
|
||||
"""
|
||||
We ensure that the process that prepares the data in the engine code results in the same data as
|
||||
the model is trained on
|
||||
|
|
@ -288,11 +300,10 @@ class TestSapModelPrep:
|
|||
home = Property(
|
||||
id=0,
|
||||
postcode=starting_epc["postcode"],
|
||||
address1=starting_epc["address1"],
|
||||
epc_client=EpcClient(auth_token="notoken"),
|
||||
address=starting_epc["address1"],
|
||||
data=starting_epc
|
||||
)
|
||||
home.get_components(cleaned)
|
||||
home.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
|
||||
|
||||
data_processor = DataProcessor(None, newdata=True)
|
||||
data_processor.insert_data(pd.DataFrame([home.get_model_data()]))
|
||||
|
|
@ -356,7 +367,7 @@ class TestSapModelPrep:
|
|||
|
||||
assert test_record[c].values[0] == row[c]
|
||||
|
||||
def test_internal_wall_insulation(self, cleaned, cleaning_data):
|
||||
def test_internal_wall_insulation(self, cleaned, cleaning_data, photo_supply_lookup, floor_area_decile_thresholds):
|
||||
|
||||
starting_epc2 = {
|
||||
'low-energy-fixed-light-count': '2', 'address': 'FLAT 12, WAREHOUSE W, 3 WESTERN GATEWAY',
|
||||
|
|
@ -508,11 +519,10 @@ class TestSapModelPrep:
|
|||
home2 = Property(
|
||||
id=0,
|
||||
postcode=starting_epc2["postcode"],
|
||||
address1=starting_epc2["address1"],
|
||||
epc_client=EpcClient(auth_token="notoken"),
|
||||
address=starting_epc2["address1"],
|
||||
data=starting_epc2
|
||||
)
|
||||
home2.get_components(cleaned)
|
||||
home2.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
|
||||
home2.set_number_lighting_outlets(None)
|
||||
|
||||
data_processor2 = DataProcessor(None, newdata=True)
|
||||
|
|
@ -578,7 +588,7 @@ class TestSapModelPrep:
|
|||
|
||||
assert test_record2[c].values[0] == row2[c]
|
||||
|
||||
def test_ventilation(self, cleaned, cleaning_data):
|
||||
def test_ventilation(self, cleaned, cleaning_data, photo_supply_lookup, floor_area_decile_thresholds):
|
||||
|
||||
starting_epc3 = {
|
||||
'low-energy-fixed-light-count': '', 'address': '45 Shepperson Road', 'uprn-source': 'Energy Assessor',
|
||||
|
|
@ -728,11 +738,10 @@ class TestSapModelPrep:
|
|||
home3 = Property(
|
||||
id=0,
|
||||
postcode=starting_epc3["postcode"],
|
||||
address1=starting_epc3["address1"],
|
||||
epc_client=EpcClient(auth_token="notoken"),
|
||||
address=starting_epc3["address1"],
|
||||
data=starting_epc3
|
||||
)
|
||||
home3.get_components(cleaned)
|
||||
home3.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
|
||||
home3.set_number_lighting_outlets(None)
|
||||
|
||||
data_processor3 = DataProcessor(None, newdata=True)
|
||||
|
|
@ -782,7 +791,7 @@ class TestSapModelPrep:
|
|||
|
||||
assert test_record3[c].values[0] == row3[c]
|
||||
|
||||
def test_fireplaces(self, cleaned, cleaning_data):
|
||||
def test_fireplaces(self, cleaned, cleaning_data, photo_supply_lookup, floor_area_decile_thresholds):
|
||||
|
||||
starting_epc4 = {
|
||||
'low-energy-fixed-light-count': '', 'address': '9 Glebe Road, Asfordby Hill',
|
||||
|
|
@ -937,11 +946,10 @@ class TestSapModelPrep:
|
|||
home4 = Property(
|
||||
id=0,
|
||||
postcode=starting_epc4["postcode"],
|
||||
address1=starting_epc4["address1"],
|
||||
epc_client=EpcClient(auth_token="notoken"),
|
||||
address=starting_epc4["address1"],
|
||||
data=starting_epc4
|
||||
)
|
||||
home4.get_components(cleaned)
|
||||
home4.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
|
||||
home4.set_number_lighting_outlets(None)
|
||||
|
||||
data_processor4 = DataProcessor(None, newdata=True)
|
||||
|
|
|
|||
|
|
@ -75,6 +75,7 @@ def app():
|
|||
ewi_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="external_wall_insulation", header=0)
|
||||
lel_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="low_energy_lighting", header=0)
|
||||
flat_roof_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="flat_roof_insulation", header=0)
|
||||
window_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="window_glazing", header=0)
|
||||
|
||||
# Form a single table to be uploaded
|
||||
costs = pd.concat(
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ class Eligibility:
|
|||
|
||||
# If the loft has less than 100mm of insulation, we classify the home has needing loft insulation
|
||||
LOFT_INSULATION_THRESHOLD = 100
|
||||
HIGH_LOFT_INSULATION_THRESHOLD = 269
|
||||
|
||||
# Because EPCS have different values for tenure, we need to remap them to a common set of values
|
||||
tenure_remap = {
|
||||
|
|
@ -104,6 +105,8 @@ class Eligibility:
|
|||
self.LOFT_INSULATION_THRESHOLD if loft_thickness_threshold is None else loft_thickness_threshold
|
||||
)
|
||||
|
||||
high_loft_thickness_threshold = self.HIGH_LOFT_INSULATION_THRESHOLD
|
||||
|
||||
# We firstly check if the roof is a loft
|
||||
is_loft = self.roof["is_pitched"] and (not self.roof["is_roof_room"])
|
||||
|
||||
|
|
@ -122,7 +125,22 @@ class Eligibility:
|
|||
is_flat=self.roof["is_flat"]
|
||||
)
|
||||
|
||||
if insulation_thickness > loft_thickness_threshold:
|
||||
if insulation_thickness <= loft_thickness_threshold:
|
||||
self.loft = {
|
||||
"suitability": True,
|
||||
"thickness": insulation_thickness,
|
||||
"reason": None
|
||||
}
|
||||
|
||||
if insulation_thickness <= high_loft_thickness_threshold:
|
||||
self.loft = {
|
||||
"suitability": True,
|
||||
"thickness": insulation_thickness,
|
||||
"reason": "high loft thickness but below regulation"
|
||||
}
|
||||
return
|
||||
|
||||
if insulation_thickness > high_loft_thickness_threshold:
|
||||
# Insulation is already thick enough
|
||||
self.loft = {
|
||||
"suitability": False,
|
||||
|
|
@ -131,12 +149,6 @@ class Eligibility:
|
|||
}
|
||||
return
|
||||
|
||||
self.loft = {
|
||||
"suitability": True,
|
||||
"thickness": insulation_thickness,
|
||||
"reason": None
|
||||
}
|
||||
|
||||
def cavity_insulation(self):
|
||||
|
||||
"""
|
||||
|
|
@ -152,9 +164,25 @@ class Eligibility:
|
|||
is_partial_filled = (
|
||||
self.walls["is_as_built"] and self.walls["insulation_thickness"] not in ["below average"]
|
||||
)
|
||||
# We look for potentially under performing cavities - anything that is assumed, as built and insulated
|
||||
is_underperforming = (
|
||||
self.walls["is_as_built"] and self.walls["insulation_thickness"] in ["average"] and self.walls["is_assumed"]
|
||||
)
|
||||
|
||||
is_unfilled_cavity = is_cavity and is_empty
|
||||
is_partial_filled_cavity = is_cavity and is_partial_filled
|
||||
is_underperforming_cavity = is_cavity and is_underperforming
|
||||
|
||||
# Check if it has internal or external wall insulation
|
||||
has_internal_wall_insulation = self.walls["internal_insulation"]
|
||||
has_external_wall_insulation = self.walls["external_insulation"]
|
||||
|
||||
if has_internal_wall_insulation or has_external_wall_insulation:
|
||||
self.cavity = {
|
||||
"suitability": False,
|
||||
"type": "internal or external wall insulation"
|
||||
}
|
||||
return
|
||||
|
||||
if is_unfilled_cavity:
|
||||
self.cavity = {
|
||||
|
|
@ -170,6 +198,13 @@ class Eligibility:
|
|||
}
|
||||
return
|
||||
|
||||
if is_underperforming_cavity:
|
||||
self.cavity = {
|
||||
"suitability": True,
|
||||
"type": "underperforming"
|
||||
}
|
||||
return
|
||||
|
||||
self.cavity = {
|
||||
"suitability": False,
|
||||
"type": "full"
|
||||
|
|
@ -223,6 +258,14 @@ class Eligibility:
|
|||
}
|
||||
|
||||
def suspended_floor_insulation(self):
|
||||
|
||||
if "no_data" in self.floor.keys():
|
||||
if self.floor["no_data"]:
|
||||
self.suspended_floor = {
|
||||
"suitability": False,
|
||||
}
|
||||
return
|
||||
|
||||
is_suspended = self.floor["is_suspended"]
|
||||
is_insulated = self.floor["insulation_thickness"] in ["average", "above average"]
|
||||
|
||||
|
|
@ -232,6 +275,14 @@ class Eligibility:
|
|||
return
|
||||
|
||||
def solid_floor_insulation(self):
|
||||
|
||||
if "no_data" in self.floor.keys():
|
||||
if self.floor["no_data"]:
|
||||
self.solid_floor = {
|
||||
"suitability": False,
|
||||
}
|
||||
return
|
||||
|
||||
is_solid = self.floor["is_solid"]
|
||||
is_insulated = self.floor["insulation_thickness"] in ["average", "above average"]
|
||||
|
||||
|
|
@ -305,7 +356,8 @@ class Eligibility:
|
|||
"""
|
||||
|
||||
current_sap = int(self.epc["current-energy-efficiency"])
|
||||
if current_sap > 54:
|
||||
|
||||
if current_sap >= 69:
|
||||
self.eco4_warmfront = {
|
||||
"eligible": False,
|
||||
"message": "sap too high"
|
||||
|
|
@ -319,9 +371,22 @@ class Eligibility:
|
|||
is_eligible = self.cavity["suitability"] & self.loft["suitability"]
|
||||
|
||||
if post_retrofit_sap is None:
|
||||
|
||||
if current_sap >= 55:
|
||||
message = "Possibly eligible but property currently EPC D"
|
||||
else:
|
||||
message = "subject to post retrofit sap" if is_eligible else "not eligible"
|
||||
|
||||
# Update the message to flag properties that failed just because of a full cavity.
|
||||
# We need to double check that the wall is a cavity, that the loft is suitable and that the
|
||||
# sap is within reason
|
||||
# We can then estimate the age of the cavity fill
|
||||
if not is_eligible and (current_sap < 69) and self.loft["suitability"] and self.walls["is_cavity_wall"]:
|
||||
message = "Failed due to full cavity - check cavity age"
|
||||
|
||||
self.eco4_warmfront = {
|
||||
"eligible": is_eligible,
|
||||
"message": "subject to post retrofit sap"
|
||||
"message": message
|
||||
}
|
||||
return
|
||||
|
||||
|
|
|
|||
|
|
@ -11,13 +11,12 @@ import numpy as np
|
|||
import msgpack
|
||||
from datetime import datetime, timedelta
|
||||
from utils.logger import setup_logger
|
||||
from utils.s3 import read_from_s3
|
||||
from utils.s3 import read_from_s3, read_dataframe_from_s3_parquet
|
||||
from dotenv import load_dotenv
|
||||
from backend.SearchEpc import SearchEpc
|
||||
from backend.Property import Property
|
||||
from etl.eligibility.Eligibility import Eligibility
|
||||
from etl.epc.DataProcessor import DataProcessor
|
||||
from backend.app.utils import read_parquet_from_s3
|
||||
from backend.app.plan.utils import create_recommendation_scoring_data
|
||||
from etl.epc.settings import COLUMNS_TO_MERGE_ON
|
||||
from backend.ml_models.api import ModelApi
|
||||
|
|
@ -247,6 +246,8 @@ def merge_ha_15(asset_list, identified_addresses):
|
|||
|
||||
identified_addresses = identified_addresses.drop_duplicates("merge_key")
|
||||
|
||||
# We pull out raw counts for the survey lists
|
||||
|
||||
# Check asset list for dupes
|
||||
asset_list_dupes = asset_list["merge_key"].duplicated()
|
||||
if asset_list_dupes.sum():
|
||||
|
|
@ -336,7 +337,10 @@ def merge_ha_15(asset_list, identified_addresses):
|
|||
return merged_data, dropped_identified_merge_keys
|
||||
|
||||
|
||||
def prepare_model_data_row(property_id, modelling_epc, cleaned, cleaning_data, created_at):
|
||||
def prepare_model_data_row(
|
||||
property_id, modelling_epc, cleaned, cleaning_data, created_at,
|
||||
photo_supply_lookup, floor_area_decile_thresholds, old_data=None, full_sap_epc=None,
|
||||
):
|
||||
"""
|
||||
This function prepares the data for modelling, in the same fashion as the recommendation engine
|
||||
With up-coming refactoring, this will change
|
||||
|
|
@ -346,15 +350,24 @@ def prepare_model_data_row(property_id, modelling_epc, cleaned, cleaning_data, c
|
|||
p = Property(
|
||||
id=property_id,
|
||||
postcode=modelling_epc["postcode"],
|
||||
address1=modelling_epc["address1"],
|
||||
epc_client=None,
|
||||
data=modelling_epc
|
||||
address=modelling_epc["address1"],
|
||||
data=modelling_epc,
|
||||
old_data=old_data,
|
||||
full_sap_epc=full_sap_epc
|
||||
)
|
||||
|
||||
p.get_components(cleaned)
|
||||
p.get_components(cleaned, photo_supply_lookup=photo_supply_lookup,
|
||||
floor_area_decile_thresholds=floor_area_decile_thresholds)
|
||||
|
||||
# THIS IS TEMP AND SHOULDN'T BE HERE
|
||||
data_to_clean = p.get_model_data()
|
||||
if data_to_clean["NUMBER_HEATED_ROOMS"] in ['', None]:
|
||||
data_to_clean["NUMBER_HEATED_ROOMS"] = data_to_clean["NUMBER_HABITABLE_ROOMS"]
|
||||
p.data["number-heated-rooms"] = data_to_clean["NUMBER_HABITABLE_ROOMS"]
|
||||
|
||||
# This is temp - this should happen after scoring
|
||||
cleaned_property_data = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=pd.DataFrame([dict(**p.get_model_data(), LOCAL_AUTHORITY=p.data["local-authority"])]),
|
||||
data_to_clean=pd.DataFrame([dict(**data_to_clean, LOCAL_AUTHORITY=p.data["local-authority"])]),
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
|
||||
)
|
||||
|
|
@ -829,6 +842,18 @@ def analyse_ha_32_results(results, ha32, no_house_numbers):
|
|||
results_df["warmfront_identified"]
|
||||
]
|
||||
|
||||
# Aggregates of no eco and gbis jobs identified
|
||||
n_eco = results_df["eco4_eligible"].sum()
|
||||
# Gbis is rows where eco4 is not eligible
|
||||
n_gbis = results_df[
|
||||
(results_df["gbis_eligible"] == True) & (results_df["eco4_eligible"] == False)
|
||||
]["gbis_eligible"].sum()
|
||||
|
||||
pipeline_potential = results_df[
|
||||
(results_df["warmfront_identified"] == True) | (results_df["eco4_eligible"] == True) | (
|
||||
results_df["gbis_eligible"] == True)
|
||||
]
|
||||
|
||||
success_rate = warmfront_identified["gbis_eligible"].sum() / warmfront_identified.shape[0]
|
||||
# For HA32, this is 89%
|
||||
|
||||
|
|
@ -886,8 +911,16 @@ def analyse_ha_32_results(results, ha32, no_house_numbers):
|
|||
|
||||
new_possibilities = results_df[
|
||||
(~results_df["warmfront_identified"]) &
|
||||
(results_df["gbis_eligible"] | results_df["eco4_eligible"]) &
|
||||
(results_df["tenure"] == "Rented (social)")
|
||||
(results_df["gbis_eligible"] | results_df["eco4_eligible"])
|
||||
].copy()
|
||||
|
||||
new_possibilities_eco = results_df[
|
||||
(~results_df["warmfront_identified"]) &
|
||||
(results_df["eco4_eligible"] == True)
|
||||
].copy()
|
||||
new_possibilities_gbis = results_df[
|
||||
(~results_df["warmfront_identified"]) &
|
||||
(results_df["eco4_eligible"] == False) & (results_df["gbis_eligible"] == True)
|
||||
].copy()
|
||||
|
||||
future_possibilities_eco = results_df[
|
||||
|
|
@ -947,6 +980,8 @@ def analyse_ha_15_results(results_df, ha15, no_house_numbers):
|
|||
results_df["warmfront_identified"]
|
||||
]
|
||||
|
||||
warmfront_identified = warmfront_identified
|
||||
|
||||
n_identified = (warmfront_identified["gbis_eligible"] | warmfront_identified["eco4_eligible"]).sum()
|
||||
|
||||
success_rate = n_identified / warmfront_identified.shape[0]
|
||||
|
|
@ -955,6 +990,11 @@ def analyse_ha_15_results(results_df, ha15, no_house_numbers):
|
|||
"eligibility_classification"].value_counts()
|
||||
# For HA15 this is 50.3%
|
||||
|
||||
pipeline_potential = results_df[
|
||||
(results_df["warmfront_identified"] == True) | (results_df["eco4_eligible"] == True) | (
|
||||
results_df["gbis_eligible"] == True)
|
||||
]
|
||||
|
||||
# of the properties we identify, what is the mix of confidenc
|
||||
|
||||
missed = results_df[
|
||||
|
|
@ -973,32 +1013,37 @@ def analyse_ha_15_results(results_df, ha15, no_house_numbers):
|
|||
missed["sap"] < 69
|
||||
]
|
||||
|
||||
sap_low_enough["walls"].value_counts()
|
||||
z = ha15[ha15["row_id"].isin(sap_too_high["row_id"].values)]
|
||||
|
||||
investigate_1 = ha15[ha15["row_id"].isin(sap_too_high["row_id"])][
|
||||
["row_id", "Postcode", "Address Line 1", "Address Line 2", "Address Line 3"]]
|
||||
|
||||
investigate_2 = ha15[ha15["row_id"].isin(sap_low_enough["row_id"])][
|
||||
["row_id", "Postcode", "Address Line 1", "Address Line 2", "Address Line 3"]]
|
||||
|
||||
missed["message"].value_counts()
|
||||
# Aggregates of no eco and gbis jobs identified
|
||||
n_eco = results_df["eco4_eligible"].sum()
|
||||
# Gbis is rows where eco4 is not eligible
|
||||
n_gbis = results_df[
|
||||
(results_df["gbis_eligible"] == True) & (results_df["eco4_eligible"] == False)
|
||||
]["gbis_eligible"].sum()
|
||||
|
||||
# We now look for properties that we identified, that were not identified by Warmfront
|
||||
|
||||
new_possibilities = results_df[
|
||||
(~results_df["warmfront_identified"]) &
|
||||
((results_df["gbis_eligible"] == True) | (results_df["eco4_eligible"] == True)) &
|
||||
(results_df["tenure"] == "Rented (social)")
|
||||
((results_df["gbis_eligible"] == True) | (results_df["eco4_eligible"] == True))
|
||||
].copy()
|
||||
|
||||
new_possibilities_eco = results_df[
|
||||
(~results_df["warmfront_identified"]) &
|
||||
(results_df["eco4_eligible"] == True)
|
||||
].copy()
|
||||
|
||||
new_possibilities_gbis = results_df[
|
||||
(~results_df["warmfront_identified"]) &
|
||||
(results_df["eco4_eligible"] == False) & (results_df["gbis_eligible"] == True)
|
||||
].copy()
|
||||
|
||||
# These are future possibilityies
|
||||
new_possibilities_eco = results_df[
|
||||
future_possibilities_eco = results_df[
|
||||
(~results_df["warmfront_identified"]) &
|
||||
(results_df["eco4_eligible_future"] == True) & (~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
|
||||
].copy()
|
||||
|
||||
new_possibilities_gbis = results_df[
|
||||
future_possibilities_gbis = results_df[
|
||||
(~results_df["warmfront_identified"]) &
|
||||
(results_df["gbis_eligible_future"] == True) & (results_df["eco4_eligible_future"] == False) & (
|
||||
~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
|
||||
|
|
@ -1058,7 +1103,7 @@ def app():
|
|||
)
|
||||
cleaned = msgpack.unpackb(cleaned, raw=False)
|
||||
|
||||
cleaning_data = read_parquet_from_s3(
|
||||
cleaning_data = read_dataframe_from_s3_parquet(
|
||||
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
|
||||
)
|
||||
|
||||
|
|
|
|||
647
etl/eligibility/ha_15_32/ha16_app.py
Normal file
647
etl/eligibility/ha_15_32/ha16_app.py
Normal file
|
|
@ -0,0 +1,647 @@
|
|||
import os
|
||||
import msgpack
|
||||
import openpyxl
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from utils.s3 import read_from_s3
|
||||
from utils.logger import setup_logger
|
||||
from dotenv import load_dotenv
|
||||
from utils.s3 import read_dataframe_from_s3_parquet
|
||||
from tqdm import tqdm
|
||||
from backend.SearchEpc import SearchEpc
|
||||
from etl.eligibility.Eligibility import Eligibility
|
||||
from etl.eligibility.ha_15_32.app import prepare_model_data_row
|
||||
from etl.epc.DataProcessor import DataProcessor
|
||||
from etl.epc.settings import COLUMNS_TO_MERGE_ON
|
||||
from backend.ml_models.api import ModelApi
|
||||
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
|
||||
from recommendations.recommendation_utils import calculate_cavity_age
|
||||
from recommendation_utils import convert_thickness_to_numeric
|
||||
|
||||
import re
|
||||
|
||||
ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
|
||||
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
||||
|
||||
logger = setup_logger()
|
||||
load_dotenv(ENV_FILE)
|
||||
|
||||
|
||||
def load_data():
|
||||
# This asset list is spread across two sheets, which we need to combine
|
||||
|
||||
asset_list_filenames = [
|
||||
"HESTIA - HA 16 ASSET LIST PART 1 OF 2.xlsx",
|
||||
"HESTIA - HA 16 ASSET LIST PART 2 OF 2.xlsx",
|
||||
]
|
||||
|
||||
# Prepare lists to collect rows data and their colors
|
||||
rows_data = []
|
||||
rows_colors = []
|
||||
colnames = []
|
||||
for asset_list_filename in asset_list_filenames:
|
||||
workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/{asset_list_filename}')
|
||||
sheet = workbook.active
|
||||
sheet_colnames = [cell.value for cell in sheet[1]]
|
||||
colnames.append(sheet_colnames)
|
||||
|
||||
for row in sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
|
||||
row_data = [cell.value for cell in row] # This will get you the cell values
|
||||
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
|
||||
# row_color = COLOR_INDEX[row_color]
|
||||
rows_data.append(row_data)
|
||||
rows_colors.append(row_color)
|
||||
|
||||
asset_list = pd.DataFrame(rows_data, columns=colnames[0])
|
||||
# Remove None columns
|
||||
asset_list = asset_list.iloc[:, 0:12]
|
||||
asset_list['row_color'] = rows_colors
|
||||
|
||||
asset_list["row_colour_name"] = np.where(
|
||||
asset_list["row_color"] == "FFFF0000", "red",
|
||||
np.where(asset_list["row_color"] == "FF92D050", "green", "yellow")
|
||||
)
|
||||
|
||||
# Split up the address on commas, which is useful for matching later
|
||||
split_addresses = asset_list['Address'].str.split(',', expand=True)
|
||||
split_addresses.columns = ['temp', 'address2', 'address3', 'address4', 'address5']
|
||||
|
||||
asset_list = pd.concat([asset_list, split_addresses], axis=1)
|
||||
# There is no commas separating house number and address 1
|
||||
split_addresses2 = asset_list['temp'].str.split(' ', expand=True)
|
||||
split_addresses2.columns = ['HouseNo', 'part1', 'part2', "part3", "part4"]
|
||||
# We could re-concatenate but we only care about HouseNo for the moment
|
||||
asset_list = pd.concat([asset_list, split_addresses2[["HouseNo"]]], axis=1)
|
||||
|
||||
# We now read in the survey list
|
||||
survey_workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/HESTIA- HA 16 ECO4 SURVEY LIST.xlsx')
|
||||
survey_sheet = survey_workbook.active
|
||||
|
||||
survey_rows = []
|
||||
survey_colors = []
|
||||
|
||||
for row in survey_sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
|
||||
row_data = [cell.value for cell in row] # This will get you the cell values
|
||||
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
|
||||
# row_color = COLOR_INDEX[row_color]
|
||||
survey_rows.append(row_data)
|
||||
survey_colors.append(row_color)
|
||||
|
||||
survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
|
||||
|
||||
# For the survey list, we don't need the colours, since there is a column called "INSTALLED OR CANCELLED"
|
||||
# which describes the status of the property
|
||||
survey_list["row_colour"] = survey_colors
|
||||
survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(survey_list))]
|
||||
# Tidy up the street/block name a bit
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("/", ", ")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.lower()
|
||||
survey_list["Street / Block Name"] = np.where(
|
||||
survey_list["Street / Block Name"] == "REEDS RD",
|
||||
"Reeds ROAD",
|
||||
survey_list["Street / Block Name"]
|
||||
)
|
||||
# Replace " rd " with "road"
|
||||
survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(r'\brd\b', 'road', regex=True)
|
||||
|
||||
# Replace " , " with ", "
|
||||
survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(
|
||||
" , ", ', ',
|
||||
)
|
||||
# Fix "{place} ,{place}" with "{place}, {place}"
|
||||
survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(r'\s*,\s*', ', ', regex=True)
|
||||
# Strip whitespace
|
||||
survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.strip()
|
||||
|
||||
# Correct errors
|
||||
survey_list["Post Code"] = np.where(
|
||||
survey_list["Post Code"] == "M38 0SA",
|
||||
"M38 9SA",
|
||||
survey_list["Post Code"]
|
||||
)
|
||||
|
||||
survey_list["Post Code"] = np.where(
|
||||
(survey_list["Street / Block Name"] == "nelson drive") & (survey_list["Post Code"] == "M44 5JE"),
|
||||
"M44 5JF",
|
||||
survey_list["Post Code"]
|
||||
)
|
||||
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("eccels", "eccles")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("chatley, road", "chatley road")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("vaughen", "Vaughan")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("cresent", "crescent")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("plantation road",
|
||||
"plantation avenue")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("how clough drive",
|
||||
"howclough drive")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brockhurst lane",
|
||||
"brookhurst lane")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("biirch road",
|
||||
"birch road")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hadson road",
|
||||
"hodson road")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("harbonne avennue",
|
||||
"narbonne avenue")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("cumberland road, cadishead",
|
||||
"cumberland avenue, cadishead")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("aston field drive",
|
||||
"ashton field drive")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("wedgewood road",
|
||||
"wedgwood road")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hamilton close",
|
||||
"hamilton avenue")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("lichens crescent, fitton hill",
|
||||
"lichens crescent")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("south croft, fitton hill",
|
||||
"south croft")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(", fitton hill", "")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("firtree dr", "fir tree avenue")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hawthorne road",
|
||||
"hawthorn crescent")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("rein lee avenue",
|
||||
"reins lee avenue")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("westerhill road",
|
||||
"wester hill road")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("st martins road",
|
||||
"saint martins road")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("timperley avenue",
|
||||
"timperley close")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("eastwood road",
|
||||
"eastwood avenue")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("new road", "new street")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("grassmere road",
|
||||
"grasmere road")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hulton road",
|
||||
"hulton avenue")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("beechfield avenue",
|
||||
"beechfield road")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("princess avenue",
|
||||
"princes avenue")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("edge ford crecent",
|
||||
"edge fold crescent")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("conniston avenue",
|
||||
"coniston avenue")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("blackthorne crescent",
|
||||
"blackthorn crescent")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("wellstock road",
|
||||
"wellstock lane")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brackley avenue",
|
||||
"brackley street")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brook avenue swinton",
|
||||
"brook avenue, swinton")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("green avenue swinton",
|
||||
"green avenue, swinton")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("grasmere avenue wardley",
|
||||
"grasmere avenue, wardley")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("mardale avenue wardle",
|
||||
"mardale avenue, wardle")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("carleach grove",
|
||||
"cartleach Grove")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("arbour grove",
|
||||
"arbor Grove")
|
||||
|
||||
# Replacement for clively avenue 66-68
|
||||
survey_list["NO."] = np.where(
|
||||
survey_list["NO."] == "66-68",
|
||||
"66",
|
||||
survey_list["NO."]
|
||||
)
|
||||
|
||||
# asset_list[asset_list["Address"].str.lower().str.contains("clively")]
|
||||
|
||||
# We now need to merge the survey list onto the asset list
|
||||
# Could be easier just to do a search on each row, even though it's much slower
|
||||
matched = []
|
||||
for _, row in tqdm(survey_list.iterrows(), total=len(survey_list)):
|
||||
|
||||
house_number = row["NO."]
|
||||
if isinstance(house_number, str):
|
||||
house_number = house_number.lower()
|
||||
|
||||
# Filter on the first line of the address
|
||||
df = asset_list[asset_list["Address"].str.lower().str.contains(row["Street / Block Name"].lower())].copy()
|
||||
# df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
|
||||
df = df[df["Address"].str.lower().str.contains(str(house_number))]
|
||||
if df.shape[0] != 1:
|
||||
df = df[df["HouseNo"] == str(house_number)]
|
||||
if df.shape[0] != 1:
|
||||
df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
|
||||
if df.shape[0] != 1:
|
||||
raise ValueError("Investigate")
|
||||
|
||||
matched.append(
|
||||
{
|
||||
"survey_key": row["survey_key"],
|
||||
"matched_address": df["Address"].values[0],
|
||||
"survey_house_no": row["NO."],
|
||||
"survey_street_name": row["Street / Block Name"],
|
||||
"survey_postcode": row["Post Code"],
|
||||
"survey_status": row["INSTALLED OR CANCELLED"]
|
||||
}
|
||||
)
|
||||
|
||||
matched = pd.DataFrame(matched)
|
||||
matched["warmfront_identified"] = True
|
||||
|
||||
# Combine asset list and surveys
|
||||
data = asset_list.merge(
|
||||
matched, how="left", left_on="Address", right_on="matched_address",
|
||||
)
|
||||
data["warmfront_identified"] = data["warmfront_identified"].fillna(False)
|
||||
|
||||
return data, survey_list
|
||||
|
||||
|
||||
def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
|
||||
scoring_data = []
|
||||
results = []
|
||||
nodata = []
|
||||
|
||||
property_type_lookup = {
|
||||
'Semi Detached Bungalow': {"property-type": "Bungalow", "built-form": "Semi-Detached"},
|
||||
'Mid Terraced House': {"property-type": "House", "built-form": "Mid-Terrace"},
|
||||
'End Terraced House': {"property-type": "House", "built-form": "End-Terrace"},
|
||||
'Low Rise Flat': {"property-type": "Flat", "built-form": "Mid-Terrace"},
|
||||
'Semi-Detached House': {"property-type": "House", "built-form": "Semi-Detached"},
|
||||
'Detached Bungalow': {"property-type": "Bungalow", "built-form": "Detached"},
|
||||
'End Terraced Bungalow': {"property-type": "Bungalow", "built-form": "End-Terrace"},
|
||||
'Mid Terraced Bungalow': {"property-type": "Bungalow", "built-form": "Mid-Terrace"},
|
||||
'Medium Rise Flat': {"property-type": "Flat", "built-form": "Mid-Terrace"},
|
||||
'Detached House': {"property-type": "House", "built-form": "Detached"},
|
||||
'Cottage Flat': {"property-type": "Flat", "built-form": "Semi-Detached"},
|
||||
'Maisonette Medium Rise': {"property-type": "Flat", "built-form": "Mid-Terrace"},
|
||||
'Maisonette Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"},
|
||||
'End Terraced Town House': {"property-type": "House", "built-form": "End-Terrace"},
|
||||
'Flat Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"},
|
||||
'Mid Terraced Town House': {"property-type": "House", "built-form": "Mid-Terrace"},
|
||||
}
|
||||
|
||||
for index, property_meta in tqdm(data.iterrows(), total=len(data)):
|
||||
|
||||
searcher = SearchEpc(
|
||||
address1=property_meta["HouseNo"],
|
||||
postcode=property_meta["Postcode"],
|
||||
auth_token=EPC_AUTH_TOKEN,
|
||||
os_api_key=None,
|
||||
full_address=property_meta["Address"]
|
||||
)
|
||||
searcher.ordnance_survey_client.property_type = property_type_lookup[property_meta["Type"]]["property-type"]
|
||||
searcher.ordnance_survey_client.built_form = property_type_lookup[property_meta["Type"]]["built-form"]
|
||||
searcher.find_property(skip_os=True)
|
||||
|
||||
if searcher.newest_epc is None:
|
||||
nodata.append(property_meta)
|
||||
continue
|
||||
|
||||
if searcher.newest_epc.get("estimated"):
|
||||
# We insert the row ID as our proxy for UPRN
|
||||
proxy_uprn = int(property_meta["row_id"].split("_")[1])
|
||||
searcher.newest_epc["uprn"] = proxy_uprn
|
||||
|
||||
newest_epc = searcher.newest_epc
|
||||
older_epcs = searcher.older_epcs
|
||||
full_sap_epc = searcher.full_sap_epc
|
||||
# We also want to get the penultimate epc
|
||||
penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
|
||||
if not penultimate_epc:
|
||||
penultimate_epc = newest_epc
|
||||
|
||||
eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
|
||||
eligibility.check_gbis_warmfront()
|
||||
eligibility.check_eco4_warmfront()
|
||||
|
||||
if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
|
||||
eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
|
||||
eligibility.check_gbis_warmfront()
|
||||
eligibility.check_eco4_warmfront()
|
||||
# If this is the case, we need to update the older epcs
|
||||
# We don't update just to make data cleaning easier
|
||||
if penultimate_epc.get("estimated") is None:
|
||||
older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
|
||||
|
||||
# If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity
|
||||
|
||||
# Loft MUST be suitable
|
||||
cavity_age = None
|
||||
if (
|
||||
eligibility.walls["is_cavity_wall"] and
|
||||
eligibility.walls["is_filled_cavity"] and
|
||||
eligibility.loft["suitability"] and
|
||||
eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
|
||||
):
|
||||
# We check the age of the cavity and if it's particularly old, we flag it
|
||||
cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
|
||||
|
||||
# Full checks
|
||||
eligibility.check_gbis()
|
||||
eligibility.check_eco4()
|
||||
|
||||
if eligibility.eco4_warmfront["eligible"]:
|
||||
if eligibility.epc["uprn"] == "":
|
||||
eligibility.epc["uprn"] = int(property_meta["row_id"].split("_")[1])
|
||||
|
||||
scoring_dictionary = prepare_model_data_row(
|
||||
property_id=property_meta["row_id"],
|
||||
modelling_epc=eligibility.epc,
|
||||
cleaned=cleaned,
|
||||
cleaning_data=cleaning_data,
|
||||
created_at=created_at,
|
||||
old_data=older_epcs,
|
||||
full_sap_epc=full_sap_epc,
|
||||
photo_supply_lookup=photo_supply_lookup,
|
||||
floor_area_decile_thresholds=floor_area_decile_thresholds
|
||||
)
|
||||
scoring_data.extend(scoring_dictionary)
|
||||
|
||||
results.append(
|
||||
{
|
||||
"row_id": property_meta["row_id"],
|
||||
"uprn": eligibility.epc["uprn"],
|
||||
"Address": property_meta["Address"],
|
||||
"Postcode": property_meta["Postcode"],
|
||||
"property_type": eligibility.epc["property-type"],
|
||||
"gbis_eligible": eligibility.gbis_warmfront,
|
||||
"eco4_eligible": eligibility.eco4_warmfront["eligible"],
|
||||
"eco4_message": eligibility.eco4_warmfront["message"],
|
||||
"sap": float(eligibility.epc["current-energy-efficiency"]),
|
||||
"gbis_eligible_future": eligibility.gbis["eligible"],
|
||||
"gbis_eligible_future_message": eligibility.gbis["message"],
|
||||
"eco4_eligible_future": eligibility.eco4["eligible"],
|
||||
"eco4_eligible_future_message": eligibility.eco4["message"],
|
||||
# Property components
|
||||
"roof": eligibility.roof["clean_description"],
|
||||
"walls": eligibility.walls["clean_description"],
|
||||
"cavity_type": eligibility.cavity["type"],
|
||||
"heating": eligibility.epc["mainheat-description"],
|
||||
"tenure": eligibility.tenure,
|
||||
"date_epc": eligibility.epc["lodgement-date"],
|
||||
"loft_thickness": eligibility.roof["insulation_thickness"],
|
||||
"cavity_age": cavity_age,
|
||||
**eligibility.walls,
|
||||
**eligibility.roof,
|
||||
}
|
||||
)
|
||||
|
||||
scoring_df = pd.DataFrame(scoring_data)
|
||||
|
||||
# Perform the same cleaning as in the model - first clean number of room variables though
|
||||
scoring_df = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=scoring_df,
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
|
||||
colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
|
||||
)
|
||||
|
||||
scoring_df = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=scoring_df,
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
|
||||
).drop(columns=["LOCAL_AUTHORITY"])
|
||||
|
||||
scoring_df = DataProcessor.clean_missings_after_description_process(
|
||||
scoring_df,
|
||||
ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
|
||||
"insulation_thickness" in c) or ("ENERGY_EFF" in c)]
|
||||
)
|
||||
|
||||
scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
|
||||
scoring_df["UPRN"] = scoring_df["UPRN"].astype(int)
|
||||
|
||||
model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
|
||||
all_predictions = model_api.predict_all(
|
||||
df=scoring_df,
|
||||
bucket="retrofit-data-dev",
|
||||
prediction_buckets={
|
||||
"sap_change_predictions": "retrofit-sap-predictions-dev",
|
||||
"heat_demand_predictions": "retrofit-heat-predictions-dev",
|
||||
"carbon_change_predictions": "retrofit-carbon-predictions-dev"
|
||||
}
|
||||
)
|
||||
|
||||
predictions = all_predictions["sap_change_predictions"].copy()
|
||||
|
||||
results_df = pd.DataFrame(results)
|
||||
|
||||
predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
|
||||
results_df[["row_id", "sap"]], how="left", on="row_id"
|
||||
)
|
||||
predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
|
||||
predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
|
||||
|
||||
results_df = results_df.merge(
|
||||
predictions[["sap_uplift", "row_id"]],
|
||||
how="left",
|
||||
on="row_id"
|
||||
)
|
||||
results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
|
||||
|
||||
eligibility_assessment = []
|
||||
for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
|
||||
# The upgrade requirements are dependent on the current SAP
|
||||
|
||||
# If the property is an F or G, it only needs to upgrade to an %
|
||||
if row["sap"] <= 38:
|
||||
if row["post_install_sap"] >= 57:
|
||||
eligibility_classification = "highest confidence"
|
||||
elif row["post_install_sap"] >= 55:
|
||||
eligibility_classification = "high confidence"
|
||||
elif row["post_install_sap"] >= 53:
|
||||
eligibility_classification = "medium confidence"
|
||||
else:
|
||||
eligibility_classification = "unlikely"
|
||||
else:
|
||||
|
||||
if row["post_install_sap"] >= 71:
|
||||
eligibility_classification = "highest confidence"
|
||||
elif row["post_install_sap"] >= 69:
|
||||
eligibility_classification = "high confidence"
|
||||
elif row["post_install_sap"] >= 67:
|
||||
eligibility_classification = "medium confidence"
|
||||
else:
|
||||
eligibility_classification = "unlikely"
|
||||
|
||||
eligibility_assessment.append(
|
||||
{
|
||||
"row_id": row["row_id"],
|
||||
"eligibility_classification": eligibility_classification
|
||||
}
|
||||
)
|
||||
|
||||
eligibility_assessment = pd.DataFrame(eligibility_assessment)
|
||||
|
||||
results_df = results_df.merge(
|
||||
eligibility_assessment, how="left", on="row_id"
|
||||
)
|
||||
return results_df, scoring_data, nodata
|
||||
|
||||
|
||||
def analyse_results(results_df, data, survey_list):
|
||||
analysis_data = data[["row_id", "survey_key", "warmfront_identified", "row_colour_name"]].merge(
|
||||
results_df, how="left", on="row_id"
|
||||
).merge(
|
||||
survey_list[["survey_key", survey_list.columns[0]]].rename(columns={survey_list.columns[0]: "funding_scheme"}),
|
||||
how="left", on="survey_key"
|
||||
)
|
||||
|
||||
analysis_data["roof_insulation_thickness"] = np.where(
|
||||
pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
|
||||
)
|
||||
analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
|
||||
lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
|
||||
)
|
||||
|
||||
warmfront_sold_eco4 = analysis_data[
|
||||
(analysis_data["warmfront_identified"] == True) & (
|
||||
analysis_data["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"]))
|
||||
] # 1407
|
||||
|
||||
warmfront_sold_gbis = analysis_data[
|
||||
(analysis_data["warmfront_identified"] == True) & (
|
||||
analysis_data["funding_scheme"].isin(["ECO4 GBIS (ECO+)"]))
|
||||
]
|
||||
|
||||
ideal_eco4_warmfront_not_sold = analysis_data[
|
||||
(analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
|
||||
analysis_data["roof_insulation_thickness_numeric"] <= 100)
|
||||
]
|
||||
|
||||
secondary_eco4_warmfront_not_sold = analysis_data[
|
||||
(analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
|
||||
analysis_data["roof_insulation_thickness_numeric"] > 100)
|
||||
]
|
||||
|
||||
# underperforming cavities
|
||||
underperforming_cavities = analysis_data[
|
||||
(analysis_data["eco4_message"] == "Failed due to full cavity - check cavity age") & (
|
||||
analysis_data["cavity_age"] > 10 * 365
|
||||
) & (analysis_data["roof_insulation_thickness_numeric"] <= 100)
|
||||
]
|
||||
|
||||
identified_gbis_not_sold = analysis_data[
|
||||
(analysis_data["gbis_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
|
||||
analysis_data["eco4_eligible"] == False
|
||||
)
|
||||
]
|
||||
|
||||
eco_eligible = analysis_data[analysis_data["eco4_eligible"] == True]
|
||||
eco_ineligible = analysis_data[analysis_data["eco4_eligible"] == False]
|
||||
|
||||
eco_ineligible["eco4_message"].value_counts()
|
||||
|
||||
# SAP too high:
|
||||
sap_too_high = eco_ineligible[eco_ineligible["eco4_message"] == "sap too high"].copy()
|
||||
further_possibilities = sap_too_high[
|
||||
sap_too_high["walls"].isin(
|
||||
[
|
||||
"Cavity wall, as built, insulated",
|
||||
"Cavity wall, as built, no insulation",
|
||||
"Cavity wall, as built, partial insulation",
|
||||
"Cavity wall, no insulation",
|
||||
"Cavity wall, partial insulation"
|
||||
]
|
||||
)
|
||||
]
|
||||
|
||||
filled_cavities = eco_ineligible[
|
||||
eco_ineligible["eco4_message"] == "sap too high"
|
||||
]
|
||||
|
||||
warmfront_identified = analysis_data[analysis_data["warmfront_identified"]]
|
||||
warmfront_identified["walls"].value_counts()
|
||||
|
||||
all_identified_gbis = analysis_data[
|
||||
(analysis_data["warmfront_identified"] & analysis_data["funding_scheme"].isin(
|
||||
["ECO4 GBIS (ECO+)"])) |
|
||||
(analysis_data["gbis_eligible"] & analysis_data["eco4_eligible"].isin([False, None]))
|
||||
]
|
||||
|
||||
empty_cavity_desriptions = [
|
||||
"Cavity wall, as built, no insulation", "Cavity wall, as built, partial insulation",
|
||||
"Cavity wall, no insulation", "Cavity wall, partial insulation"
|
||||
]
|
||||
|
||||
empty_cavities = analysis_data[analysis_data["walls"].isin(empty_cavity_desriptions)]
|
||||
remaining_empty = empty_cavities[~empty_cavities["warmfront_identified"]]
|
||||
|
||||
warmfront_identified = analysis_data[analysis_data["warmfront_identified"]]
|
||||
|
||||
# Of the ECO jobs, what proportion to we get right
|
||||
warmfront_identified_eco = warmfront_identified[
|
||||
warmfront_identified["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"])
|
||||
]
|
||||
|
||||
eco_success_rate = warmfront_identified_eco["eco4_eligible"].sum() / warmfront_identified_eco.shape[0]
|
||||
|
||||
warmfront_identified_gbis = warmfront_identified[
|
||||
warmfront_identified["funding_scheme"].isin(["ECO4 GBIS (ECO+)"])
|
||||
]
|
||||
|
||||
gbis_success_rate = warmfront_identified_gbis["gbis_eligible"].sum() / warmfront_identified_gbis.shape[0]
|
||||
|
||||
# Additional identified
|
||||
additional_identified_eco = analysis_data[
|
||||
(analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False)
|
||||
]
|
||||
|
||||
additional_identified_eco["eligibility_classification"].value_counts()
|
||||
|
||||
additional_identified_gbis = analysis_data[
|
||||
(analysis_data["gbis_eligible"] == True) & (analysis_data["eco4_eligible"] == False) & (
|
||||
analysis_data["warmfront_identified"] == False
|
||||
)
|
||||
].shape[0]
|
||||
# Future
|
||||
additional_identified_eco_future = analysis_data[
|
||||
(analysis_data["eco4_eligible_future"] == True) & (analysis_data["warmfront_identified"] == False)
|
||||
].shape[0]
|
||||
additional_identified_gbis_future = analysis_data[
|
||||
(analysis_data["gbis_eligible_future"] == True) & (analysis_data["eco4_eligible_future"] == False) & (
|
||||
analysis_data["warmfront_identified"] == False
|
||||
)
|
||||
].shape[0]
|
||||
|
||||
|
||||
def app():
|
||||
data, survey_list = load_data()
|
||||
|
||||
data["row_id"] = ["ha16_" + str(i) for i in range(0, len(data))]
|
||||
|
||||
cleaned = read_from_s3(
|
||||
s3_file_name="cleaned_epc_data/cleaned.bson",
|
||||
bucket_name="retrofit-data-dev"
|
||||
)
|
||||
cleaned = msgpack.unpackb(cleaned, raw=False)
|
||||
|
||||
cleaning_data = read_dataframe_from_s3_parquet(
|
||||
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
|
||||
)
|
||||
|
||||
created_at = datetime.now().isoformat()
|
||||
|
||||
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
|
||||
|
||||
results_df, scoring_data, nodata = get_epc_data(
|
||||
data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds
|
||||
)
|
||||
|
||||
# Store
|
||||
# Old file was ha16.pickle
|
||||
# import pickle
|
||||
# with open("ha16_10_jan.pickle", "wb") as f:
|
||||
# pickle.dump(
|
||||
# {
|
||||
# "scoring_data": scoring_data,
|
||||
# "results": results_df,
|
||||
# "nodata": nodata
|
||||
# }, f
|
||||
# )
|
||||
|
||||
# Read pickle
|
||||
# import pickle
|
||||
# with open("ha16_10_jan.pickle", "rb") as f:
|
||||
# saved = pickle.load(f)
|
||||
# scoring_data = saved["scoring_data"]
|
||||
# results_df = saved["results"]
|
||||
# nodata = saved["nodata"]
|
||||
524
etl/eligibility/ha_15_32/ha24_app.py
Normal file
524
etl/eligibility/ha_15_32/ha24_app.py
Normal file
|
|
@ -0,0 +1,524 @@
|
|||
import os
|
||||
import msgpack
|
||||
import openpyxl
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from utils.s3 import read_from_s3, read_dataframe_from_s3_parquet
|
||||
from utils.logger import setup_logger
|
||||
from dotenv import load_dotenv
|
||||
from tqdm import tqdm
|
||||
from backend.SearchEpc import SearchEpc
|
||||
from etl.eligibility.Eligibility import Eligibility
|
||||
from etl.eligibility.ha_15_32.app import prepare_model_data_row
|
||||
from etl.epc.DataProcessor import DataProcessor
|
||||
from etl.epc.settings import COLUMNS_TO_MERGE_ON
|
||||
from backend.ml_models.api import ModelApi
|
||||
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
|
||||
from recommendations.recommendation_utils import calculate_cavity_age
|
||||
from recommendation_utils import convert_thickness_to_numeric
|
||||
|
||||
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
||||
ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
|
||||
|
||||
logger = setup_logger()
|
||||
load_dotenv(ENV_FILE)
|
||||
|
||||
|
||||
def load_data():
|
||||
workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/HESTIA - HA 24 ASSET LIST.xlsx')
|
||||
sheet = workbook.active
|
||||
sheet_colnames = [cell.value for cell in sheet[1]]
|
||||
|
||||
rows_data = []
|
||||
rows_colors = []
|
||||
for row in sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
|
||||
row_data = [cell.value for cell in row] # This will get you the cell values
|
||||
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
|
||||
# row_color = COLOR_INDEX[row_color]
|
||||
rows_data.append(row_data)
|
||||
rows_colors.append(row_color)
|
||||
|
||||
asset_list = pd.DataFrame(rows_data, columns=sheet_colnames)
|
||||
# Remove None columns
|
||||
asset_list = asset_list.iloc[:, 0:10]
|
||||
asset_list['row_color'] = rows_colors
|
||||
|
||||
asset_list["row_colour_name"] = np.where(
|
||||
asset_list["row_color"] == "FFFF0000", "red",
|
||||
np.where(asset_list["row_color"] == "FF92D050", "green", "yellow")
|
||||
)
|
||||
|
||||
asset_list["row_colour_code"] = np.where(
|
||||
asset_list["row_colour_name"] == "red", "does not meet criteria",
|
||||
np.where(asset_list["row_colour_name"] == "green", "identified potential eco", "maybe in the future")
|
||||
)
|
||||
|
||||
# The third column is listed as "Address" but it's actually the postcode". We have two Address columns so we
|
||||
# change just the third
|
||||
asset_list.columns.values[2] = "Postcode"
|
||||
|
||||
# Split up the address on commas, which is useful for matching later
|
||||
split_addresses = asset_list['Address'].str.split(',', expand=True)
|
||||
split_addresses.columns = ['temp', 'address2', 'address3', 'address4', 'address5', 'address6']
|
||||
|
||||
asset_list = pd.concat([asset_list, split_addresses], axis=1)
|
||||
# There is no commas separating house number and address 1
|
||||
split_addresses2 = asset_list['temp'].str.split(' ', expand=True)
|
||||
split_addresses2.columns = ['HouseNo', 'part1', 'part2', "part3", "part4"]
|
||||
# We could re-concatenate but we only care about HouseNo for the moment
|
||||
asset_list = pd.concat([asset_list, split_addresses2[["HouseNo"]]], axis=1)
|
||||
|
||||
# Read in surveys
|
||||
survey_workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/HESTIA - HA 24 ECO4 SURVEY LIST.xlsx')
|
||||
survey_sheet = survey_workbook.active
|
||||
|
||||
survey_rows = []
|
||||
survey_colors = []
|
||||
|
||||
for row in survey_sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
|
||||
row_data = [cell.value for cell in row] # This will get you the cell values
|
||||
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
|
||||
# row_color = COLOR_INDEX[row_color]
|
||||
survey_rows.append(row_data)
|
||||
survey_colors.append(row_color)
|
||||
|
||||
survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
|
||||
|
||||
survey_list["row_colour"] = survey_colors
|
||||
survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(survey_list))]
|
||||
# Tidy up the street/block name a bit
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("/", ", ")
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.lower()
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.strip()
|
||||
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
|
||||
"council house, nidds lane", "nidds lane"
|
||||
)
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
|
||||
"wirral avenue", "wirrall avenue"
|
||||
)
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
|
||||
"st ives road", "st. ives crescent"
|
||||
)
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
|
||||
"sundringham road", "sandringham road"
|
||||
)
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
|
||||
"milton avenue", "milton road"
|
||||
)
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
|
||||
"st ives crescent", "st. ives crescent"
|
||||
)
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
|
||||
"council house, waterbelly lane", "waterbelly lane"
|
||||
)
|
||||
# Generally remove "councile house, " from the start of the street name
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
|
||||
"council house, ", ""
|
||||
)
|
||||
survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
|
||||
"st. leodegars close", "st leodegars close"
|
||||
)
|
||||
|
||||
# asset_list[asset_list["Address"].str.lower().str.contains("wirral")]["Address"]
|
||||
|
||||
# Drop all None rows
|
||||
survey_list = survey_list[~pd.isnull(survey_list["Street / Block Name"])]
|
||||
survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(survey_list))]
|
||||
|
||||
matched = []
|
||||
for _, row in tqdm(survey_list.iterrows(), total=len(survey_list)):
|
||||
house_number = row["NO."]
|
||||
if isinstance(house_number, str):
|
||||
house_number = house_number.lower()
|
||||
|
||||
# Filter on the first line of the address
|
||||
df = asset_list[asset_list["Address"].str.lower().str.contains(row["Street / Block Name"].lower())].copy()
|
||||
# df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
|
||||
df = df[df["Address"].str.lower().str.contains(str(house_number))]
|
||||
if df.shape[0] != 1:
|
||||
df = df[df["HouseNo"] == str(house_number)]
|
||||
if df.shape[0] != 1:
|
||||
df = df[df["Postcode"].str.lower().str.contains(row["Post Code"].lower())]
|
||||
if df.shape[0] != 1:
|
||||
print(row["Street / Block Name"])
|
||||
print(house_number)
|
||||
print(row["Post Code"].lower())
|
||||
raise ValueError("Investigate")
|
||||
|
||||
matched.append(
|
||||
{
|
||||
"survey_key": row["survey_key"],
|
||||
"matched_address": df["Address"].values[0],
|
||||
"survey_house_no": row["NO."],
|
||||
"survey_street_name": row["Street / Block Name"],
|
||||
"survey_postcode": row["Post Code"],
|
||||
"survey_status": row["INSTALLED OR CANCELLED"]
|
||||
}
|
||||
)
|
||||
|
||||
matched = pd.DataFrame(matched)
|
||||
matched["warmfront_identified"] = True
|
||||
|
||||
# Combine asset list and surveys
|
||||
data = asset_list.merge(
|
||||
matched, how="left", left_on="Address", right_on="matched_address",
|
||||
)
|
||||
data["warmfront_identified"] = data["warmfront_identified"].fillna(False)
|
||||
|
||||
return data, survey_list
|
||||
|
||||
|
||||
def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
|
||||
scoring_data = []
|
||||
results = []
|
||||
nodata = []
|
||||
|
||||
property_type_lookup = {
|
||||
"01 HOUSE": "House",
|
||||
"02 FLAT": "Flat",
|
||||
"03 BUNGALOW": "Bungalow",
|
||||
"05 BEDSIT": "Flat",
|
||||
"04 MAISONETTE": "Maisonette",
|
||||
"01 HOUSE MID": "House",
|
||||
"10 PBUNGALOW": "Bungalow",
|
||||
"14 SFLAT": "Flat",
|
||||
"12 SBEDSIT": "Flat",
|
||||
"11 PFLAT": "Flat",
|
||||
"13 SBUNGALOW": "Bungalow",
|
||||
" 01 HOUSE MID": "House",
|
||||
"09 PBEDSIT": "Flat"
|
||||
}
|
||||
|
||||
for _, property_meta in tqdm(data.iterrows(), total=len(data)):
|
||||
|
||||
searcher = SearchEpc(
|
||||
address1=property_meta["HouseNo"],
|
||||
postcode=property_meta["Postcode"],
|
||||
auth_token=EPC_AUTH_TOKEN,
|
||||
os_api_key=None,
|
||||
full_address=property_meta["Address"]
|
||||
)
|
||||
searcher.ordnance_survey_client.property_type = property_type_lookup[property_meta["Property Type"]]
|
||||
searcher.find_property(skip_os=True)
|
||||
|
||||
if searcher.newest_epc is None:
|
||||
nodata.append(property_meta)
|
||||
continue
|
||||
|
||||
newest_epc = searcher.newest_epc
|
||||
older_epcs = searcher.older_epcs
|
||||
full_sap_epc = searcher.full_sap_epc
|
||||
# We also want to get the penultimate epc
|
||||
penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
|
||||
if not penultimate_epc:
|
||||
penultimate_epc = newest_epc
|
||||
|
||||
eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
|
||||
eligibility.check_gbis_warmfront()
|
||||
eligibility.check_eco4_warmfront()
|
||||
|
||||
if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
|
||||
eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
|
||||
eligibility.check_gbis_warmfront()
|
||||
eligibility.check_eco4_warmfront()
|
||||
# If this is the case, we need to update the older epcs
|
||||
# older_epcs = [
|
||||
# x for x in older_epcs if x["lmk-key"] not in [newest_epc["lmk-key"], penultimate_epc["lmk-key"]]
|
||||
# ]
|
||||
# If this is the case, we need to update the older epcs
|
||||
# We don't update just to make data cleaning easier
|
||||
if penultimate_epc.get("estimated") is None:
|
||||
older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
|
||||
|
||||
# Loft MUST be suitable
|
||||
cavity_age = None
|
||||
if (
|
||||
eligibility.walls["is_cavity_wall"] and
|
||||
eligibility.walls["is_filled_cavity"] and
|
||||
eligibility.loft["suitability"] and
|
||||
eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
|
||||
):
|
||||
# We check the age of the cavity and if it's particularly old, we flag it
|
||||
cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
|
||||
|
||||
# Full checks
|
||||
eligibility.check_gbis()
|
||||
eligibility.check_eco4()
|
||||
|
||||
if eligibility.eco4_warmfront["eligible"]:
|
||||
if eligibility.epc["uprn"] in ["", None]:
|
||||
eligibility.epc["uprn"] = int(property_meta["row_id"].split("_")[1])
|
||||
|
||||
scoring_dictionary = prepare_model_data_row(
|
||||
property_id=property_meta["row_id"],
|
||||
modelling_epc=eligibility.epc,
|
||||
cleaned=cleaned,
|
||||
cleaning_data=cleaning_data,
|
||||
created_at=created_at,
|
||||
old_data=older_epcs,
|
||||
full_sap_epc=full_sap_epc,
|
||||
photo_supply_lookup=photo_supply_lookup,
|
||||
floor_area_decile_thresholds=floor_area_decile_thresholds
|
||||
)
|
||||
scoring_data.extend(scoring_dictionary)
|
||||
|
||||
results.append(
|
||||
{
|
||||
"row_id": property_meta["row_id"],
|
||||
"uprn": eligibility.epc["uprn"],
|
||||
"Address": property_meta["Address"],
|
||||
"Postcode": property_meta["Postcode"],
|
||||
"property_type": eligibility.epc["property-type"],
|
||||
"gbis_eligible": eligibility.gbis_warmfront,
|
||||
"eco4_eligible": eligibility.eco4_warmfront["eligible"],
|
||||
"eco4_message": eligibility.eco4_warmfront["message"],
|
||||
"sap": float(eligibility.epc["current-energy-efficiency"]),
|
||||
"gbis_eligible_future": eligibility.gbis["eligible"],
|
||||
"gbis_eligible_future_message": eligibility.gbis["message"],
|
||||
"eco4_eligible_future": eligibility.eco4["eligible"],
|
||||
"eco4_eligible_future_message": eligibility.eco4["message"],
|
||||
# Property components
|
||||
"roof": eligibility.roof["clean_description"],
|
||||
"walls": eligibility.walls["clean_description"],
|
||||
"cavity_type": eligibility.cavity["type"],
|
||||
"heating": eligibility.epc["mainheat-description"],
|
||||
"tenure": eligibility.tenure,
|
||||
"date_epc": eligibility.epc["lodgement-date"],
|
||||
"cavity_age": cavity_age,
|
||||
**eligibility.walls,
|
||||
**eligibility.roof,
|
||||
}
|
||||
)
|
||||
|
||||
scoring_df = pd.DataFrame(scoring_data)
|
||||
|
||||
# Perform the same cleaning as in the model - first clean number of room variables though
|
||||
scoring_df = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=scoring_df,
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
|
||||
colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
|
||||
)
|
||||
|
||||
scoring_df = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=scoring_df,
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
|
||||
).drop(columns=["LOCAL_AUTHORITY"])
|
||||
|
||||
scoring_df = DataProcessor.clean_missings_after_description_process(
|
||||
scoring_df,
|
||||
ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
|
||||
"insulation_thickness" in c) or ("ENERGY_EFF" in c)]
|
||||
)
|
||||
|
||||
scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
|
||||
scoring_df["UPRN"] = scoring_df["UPRN"].astype(int)
|
||||
|
||||
model_api = ModelApi(portfolio_id="ha24-eligibility", timestamp=created_at)
|
||||
all_predictions = model_api.predict_all(
|
||||
df=scoring_df,
|
||||
bucket="retrofit-data-dev",
|
||||
prediction_buckets={
|
||||
"sap_change_predictions": "retrofit-sap-predictions-dev",
|
||||
"heat_demand_predictions": "retrofit-heat-predictions-dev",
|
||||
"carbon_change_predictions": "retrofit-carbon-predictions-dev"
|
||||
}
|
||||
)
|
||||
|
||||
predictions = all_predictions["sap_change_predictions"].copy()
|
||||
|
||||
results_df = pd.DataFrame(results)
|
||||
|
||||
predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
|
||||
results_df[["row_id", "sap"]], how="left", on="row_id"
|
||||
)
|
||||
predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
|
||||
predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
|
||||
|
||||
results_df = results_df.merge(
|
||||
predictions[["sap_uplift", "row_id"]],
|
||||
how="left",
|
||||
on="row_id"
|
||||
)
|
||||
results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
|
||||
|
||||
eligibility_assessment = []
|
||||
for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
|
||||
# The upgrade requirements are dependent on the current SAP
|
||||
|
||||
# If the property is an F or G, it only needs to upgrade to an %
|
||||
if row["sap"] <= 38:
|
||||
if row["post_install_sap"] >= 57:
|
||||
eligibility_classification = "highest confidence"
|
||||
elif row["post_install_sap"] >= 55:
|
||||
eligibility_classification = "high confidence"
|
||||
elif row["post_install_sap"] >= 53:
|
||||
eligibility_classification = "medium confidence"
|
||||
else:
|
||||
eligibility_classification = "unlikely"
|
||||
else:
|
||||
|
||||
if row["post_install_sap"] >= 71:
|
||||
eligibility_classification = "highest confidence"
|
||||
elif row["post_install_sap"] >= 69:
|
||||
eligibility_classification = "high confidence"
|
||||
elif row["post_install_sap"] >= 67:
|
||||
eligibility_classification = "medium confidence"
|
||||
else:
|
||||
eligibility_classification = "unlikely"
|
||||
|
||||
eligibility_assessment.append(
|
||||
{
|
||||
"row_id": row["row_id"],
|
||||
"eligibility_classification": eligibility_classification
|
||||
}
|
||||
)
|
||||
|
||||
eligibility_assessment = pd.DataFrame(eligibility_assessment)
|
||||
|
||||
results_df = results_df.merge(
|
||||
eligibility_assessment, how="left", on="row_id"
|
||||
)
|
||||
return results_df, scoring_data, nodata
|
||||
|
||||
|
||||
def analyse_results(results_df, data, survey_list):
|
||||
analysis_data = data[["row_id", "survey_key", "warmfront_identified"]].merge(
|
||||
results_df, how="left", on="row_id"
|
||||
).merge(
|
||||
survey_list[["survey_key", survey_list.columns[0]]].rename(columns={survey_list.columns[0]: "funding_scheme"}),
|
||||
how="left", on="survey_key"
|
||||
)
|
||||
|
||||
# NEW
|
||||
|
||||
analysis_data["roof_insulation_thickness"] = np.where(
|
||||
pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
|
||||
)
|
||||
analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
|
||||
lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
|
||||
)
|
||||
|
||||
warmfront_sold_eco4 = analysis_data[
|
||||
(analysis_data["warmfront_identified"] == True) & (
|
||||
analysis_data["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"]))
|
||||
]
|
||||
|
||||
warmfront_sold_gbis = analysis_data[
|
||||
(analysis_data["warmfront_identified"] == True) & (
|
||||
analysis_data["funding_scheme"].isin(["ECO4 GBIS (ECO+)"]))
|
||||
]
|
||||
# 1407
|
||||
|
||||
additional_eco4_warmfront_not_sold = analysis_data[
|
||||
(analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
|
||||
analysis_data["roof_insulation_thickness_numeric"] <= 100)
|
||||
]
|
||||
|
||||
additional_gbis_warmfront_not_sold = analysis_data[
|
||||
(analysis_data["gbis_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
|
||||
~analysis_data["row_id"].isin(additional_eco4_warmfront_not_sold["row_id"].values)
|
||||
)
|
||||
]
|
||||
|
||||
additional_gbis_warmfront_not_sold["walls"].value_counts()
|
||||
analysis_data["walls"].value_counts()
|
||||
|
||||
# END NEW
|
||||
|
||||
all_identified_eco = analysis_data[
|
||||
(analysis_data["warmfront_identified"] & analysis_data["funding_scheme"].isin(
|
||||
["ECO4 A/W"])) |
|
||||
(analysis_data["eco4_eligible"])
|
||||
]
|
||||
|
||||
all_identified_gbis = analysis_data[
|
||||
(analysis_data["warmfront_identified"] & analysis_data["funding_scheme"].isin(
|
||||
["ECO4 GBIS (ECO+)"])) |
|
||||
(analysis_data["gbis_eligible"] & analysis_data["eco4_eligible"].isin([False, None]))
|
||||
]
|
||||
|
||||
warmfront_identified = analysis_data[analysis_data["warmfront_identified"]]
|
||||
|
||||
# Of the ECO jobs, what proportion to we get right
|
||||
warmfront_identified_eco = warmfront_identified[
|
||||
warmfront_identified["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"])
|
||||
]
|
||||
|
||||
eco_success_rate = warmfront_identified_eco["eco4_eligible"].sum() / warmfront_identified_eco.shape[0]
|
||||
|
||||
warmfront_identified_gbis = warmfront_identified[
|
||||
warmfront_identified["funding_scheme"].isin(["ECO4 GBIS (ECO+)"])
|
||||
]
|
||||
|
||||
# No gbis for this
|
||||
# gbis_success_rate = warmfront_identified_gbis["gbis_eligible"].sum() / warmfront_identified_gbis.shape[0]
|
||||
|
||||
# Additional identified
|
||||
additional_identified_eco = analysis_data[
|
||||
(analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False)
|
||||
]
|
||||
|
||||
additional_identified_eco["eligibility_classification"].value_counts()
|
||||
|
||||
additional_identified_gbis = analysis_data[
|
||||
(analysis_data["gbis_eligible"] == True) & (analysis_data["eco4_eligible"] == False) & (
|
||||
analysis_data["warmfront_identified"] == False
|
||||
)
|
||||
].shape[0]
|
||||
# Future
|
||||
additional_identified_eco_future = analysis_data[
|
||||
(analysis_data["eco4_eligible_future"] == True) & (analysis_data["warmfront_identified"] == False)
|
||||
].shape[0]
|
||||
additional_identified_gbis_future = analysis_data[
|
||||
(analysis_data["gbis_eligible_future"] == True) & (analysis_data["eco4_eligible_future"] == False) & (
|
||||
analysis_data["warmfront_identified"] == False
|
||||
)
|
||||
].shape[0]
|
||||
|
||||
|
||||
def app():
|
||||
data, survey_list = load_data()
|
||||
|
||||
data["row_id"] = ["ha24_" + str(i) for i in range(0, len(data))]
|
||||
|
||||
cleaned = read_from_s3(
|
||||
s3_file_name="cleaned_epc_data/cleaned.bson",
|
||||
bucket_name="retrofit-data-dev"
|
||||
)
|
||||
cleaned = msgpack.unpackb(cleaned, raw=False)
|
||||
|
||||
cleaning_data = read_dataframe_from_s3_parquet(
|
||||
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
|
||||
)
|
||||
|
||||
created_at = datetime.now().isoformat()
|
||||
|
||||
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
|
||||
|
||||
results_df, scoring_data, nodata = get_epc_data(
|
||||
data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds
|
||||
)
|
||||
|
||||
# Pickle results just in case
|
||||
# import pickle
|
||||
# with open("ha24_10_jan.pickle", "wb") as f:
|
||||
# pickle.dump(
|
||||
# {
|
||||
# "scoring_data": scoring_data,
|
||||
# "results": results_df,
|
||||
# "nodata": nodata
|
||||
# }, f
|
||||
# )
|
||||
|
||||
# Read in pickle
|
||||
# import pickle
|
||||
# with open("ha24_10_jan.pickle", "rb") as f:
|
||||
# saved = pickle.load(f)
|
||||
# scoring_data = saved["scoring_data"]
|
||||
# results_df = saved["results"]
|
||||
# nodata = saved["nodata"]
|
||||
883
etl/eligibility/ha_15_32/ha25_app.py
Normal file
883
etl/eligibility/ha_15_32/ha25_app.py
Normal file
|
|
@ -0,0 +1,883 @@
|
|||
import os
|
||||
import msgpack
|
||||
import openpyxl
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from utils.s3 import read_from_s3
|
||||
from utils.logger import setup_logger
|
||||
from dotenv import load_dotenv
|
||||
from utils.s3 import read_dataframe_from_s3_parquet
|
||||
from tqdm import tqdm
|
||||
from backend.SearchEpc import SearchEpc
|
||||
from etl.eligibility.Eligibility import Eligibility
|
||||
from etl.eligibility.ha_15_32.app import prepare_model_data_row
|
||||
from etl.epc.DataProcessor import DataProcessor
|
||||
from etl.epc.settings import COLUMNS_TO_MERGE_ON
|
||||
from backend.ml_models.api import ModelApi
|
||||
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
|
||||
from recommendations.recommendation_utils import calculate_cavity_age
|
||||
from recommendation_utils import convert_thickness_to_numeric
|
||||
|
||||
import re
|
||||
|
||||
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
||||
ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
|
||||
|
||||
logger = setup_logger()
|
||||
load_dotenv(ENV_FILE)
|
||||
|
||||
|
||||
def load_data():
|
||||
workbook = openpyxl.load_workbook('etl/eligibility/ha_15_32/HESTIA - HA 25 ASSET LIST.xlsx', data_only=True)
|
||||
sheet = workbook.active
|
||||
|
||||
rows_data = []
|
||||
rows_colors = []
|
||||
for row in sheet.iter_rows(min_row=1, values_only=True): # use values_only=True to get values
|
||||
|
||||
row_data = list(row) # No need for comprehension, values_only=True returns a tuple of values
|
||||
rows_data.append(row_data)
|
||||
|
||||
# Headers are on the final row. Pop them off and store them and then remove them from rows_data
|
||||
headers = rows_data.pop()
|
||||
# The postcode header is None, so we replace it with "postcode"
|
||||
headers[-1] = "postcode"
|
||||
|
||||
# Handle colours separately
|
||||
for row in sheet.iter_rows(min_row=1, values_only=False):
|
||||
# Assume first cell color is indicative of entire row
|
||||
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
|
||||
rows_colors.append(row_color)
|
||||
|
||||
# Remove the final row of colours, which is the header
|
||||
rows_colors.pop()
|
||||
|
||||
asset_list = pd.DataFrame(rows_data, columns=headers)
|
||||
asset_list['row_color'] = rows_colors
|
||||
|
||||
asset_list["row_colour_name"] = np.where(
|
||||
asset_list["row_color"] == "FFFF0000", "red",
|
||||
np.where(asset_list["row_color"] == "FF00B050", "green", "yellow")
|
||||
)
|
||||
|
||||
asset_list["row_colour_code"] = np.where(
|
||||
asset_list["row_colour_name"] == "red", "does not meet criteria",
|
||||
np.where(asset_list["row_colour_name"] == "green", "identified potential eco", "maybe in the future")
|
||||
)
|
||||
|
||||
asset_list["address"] = asset_list["T1_Address"].copy().str.lower()
|
||||
asset_list["address"] = asset_list["address"].str.replace("flat", "")
|
||||
asset_list["address"] = asset_list["address"].str.strip()
|
||||
|
||||
split_addresses = asset_list['address'].str.split(' ', expand=True)
|
||||
split_addresses.columns = ['HouseNo', 'address2', 'address3', 'address4', 'address5', 'address6', 'address7',
|
||||
'address8',
|
||||
'address9', 'address10', 'address11', 'address12', 'address13', 'address14', ]
|
||||
split_addresses["HouseNo"] = split_addresses["HouseNo"].str.replace(";", "")
|
||||
|
||||
# We could re-concatenate but we only care about HouseNo for the moment
|
||||
asset_list = pd.concat([asset_list, split_addresses[["HouseNo"]]], axis=1)
|
||||
asset_list["postcode"] = asset_list["postcode"].str.strip()
|
||||
|
||||
# We analysis historical ECO3 survey list
|
||||
eco3_survey_workbook = openpyxl.load_workbook(f'etl/eligibility/ha_15_32/HESTIA - HA 25 ECO3 SURVEY LIST.xlsx')
|
||||
eco3_survey_sheet = eco3_survey_workbook["CAVITY"]
|
||||
|
||||
eco3_survey_rows = []
|
||||
eco3_survey_colors = []
|
||||
|
||||
for row in eco3_survey_sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
|
||||
row_data = [cell.value for cell in row] # This will get you the cell values
|
||||
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
|
||||
# row_color = COLOR_INDEX[row_color]
|
||||
eco3_survey_rows.append(row_data)
|
||||
eco3_survey_colors.append(row_color)
|
||||
|
||||
# Some adhoc analysis on the eco3 survey list, just to get completion and cancellation rates historically
|
||||
eco3_survey_list = pd.DataFrame(eco3_survey_rows, columns=[cell.value for cell in eco3_survey_sheet[1]])
|
||||
eco3_survey_list["row_colour"] = eco3_survey_colors
|
||||
# Remove rows where street name is missing
|
||||
eco3_survey_list = eco3_survey_list[~pd.isnull(eco3_survey_list["Street / Block Name"])]
|
||||
# We need to parse the row colours
|
||||
# We have the following mappings:
|
||||
# FF7030A0: purple
|
||||
# FF92D050: green
|
||||
# FFFF0000: red
|
||||
# FFFFFF00: yellow
|
||||
# FF38FD23: green
|
||||
eco3_survey_list["row_colour_name"] = np.where(
|
||||
eco3_survey_list["row_colour"] == "FF7030A0", "purple",
|
||||
np.where(eco3_survey_list["row_colour"] == "FF92D050", "green",
|
||||
np.where(eco3_survey_list["row_colour"] == "FFFF0000", "red",
|
||||
np.where(eco3_survey_list["row_colour"] == "FFFFFF00", "yellow",
|
||||
np.where(eco3_survey_list["row_colour"] == "FF38FD23", "green", "unknown")
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
# We map the meaning:
|
||||
# red: cancelled
|
||||
# green: installed advised install complete
|
||||
# purple: installer advised install complete + post works EPC
|
||||
# yellow: filler row - drop
|
||||
eco3_survey_list["row_colour_code"] = np.where(
|
||||
eco3_survey_list["row_colour_name"] == "red", "cancelled",
|
||||
np.where(eco3_survey_list["row_colour_name"] == "green", "installed advised install complete",
|
||||
np.where(eco3_survey_list["row_colour_name"] == "purple",
|
||||
"installer advised install complete + post works EPC",
|
||||
np.where(eco3_survey_list["row_colour_name"] == "yellow", "filler row - drop", "unknown")
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
# This is good enough for the indicative cancellation rates
|
||||
|
||||
# We now read in the indicative survey list which identified pospects for ECO4 works
|
||||
eco4_survey_workbook = openpyxl.load_workbook(
|
||||
f'etl/eligibility/ha_15_32/HESTIA - HA 25 ADHOC ISOLATED IDENTIFIED PROPERTIES FOR CWI.xlsx'
|
||||
)
|
||||
eco4_prospect_survey_sheet = eco4_survey_workbook["LiveWest"]
|
||||
|
||||
eco4_prospects_survey_rows = []
|
||||
eco4_prospects_survey_colors = []
|
||||
|
||||
for row in eco4_prospect_survey_sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
|
||||
row_data = [cell.value for cell in row] # This will get you the cell values
|
||||
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
|
||||
# row_color = COLOR_INDEX[row_color]
|
||||
eco4_prospects_survey_rows.append(row_data)
|
||||
eco4_prospects_survey_colors.append(row_color)
|
||||
|
||||
# Some adhoc analysis on the eco3 survey list, just to get completion and cancellation rates historically
|
||||
eco4_prospects_survey_list = pd.DataFrame(
|
||||
eco4_prospects_survey_rows, columns=[cell.value for cell in eco4_prospect_survey_sheet[1]]
|
||||
)
|
||||
eco4_prospects_survey_list["row_colour"] = eco4_prospects_survey_colors
|
||||
|
||||
eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.lower()
|
||||
eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.strip()
|
||||
|
||||
eco4_prospects_survey_list = eco4_prospects_survey_list[~pd.isnull(eco4_prospects_survey_list["ADDRESS 1"])]
|
||||
eco4_prospects_survey_list["survey_key"] = ["survey_" + str(i) for i in range(0, len(eco4_prospects_survey_list))]
|
||||
|
||||
# Correct some errors in the survey list
|
||||
eco4_prospects_survey_list["POSTCODE"] = np.where(
|
||||
(eco4_prospects_survey_list["ADDRESS 1"] == "berry park") &
|
||||
(eco4_prospects_survey_list["POSTCODE"] == "PL12 6HP"),
|
||||
"PL12 6EN",
|
||||
eco4_prospects_survey_list["POSTCODE"]
|
||||
)
|
||||
|
||||
# Remove semi colons from address in asset and survey list
|
||||
asset_list["T1_Address"] = asset_list["T1_Address"].str.replace(";", "")
|
||||
eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.replace(";", "")
|
||||
|
||||
# In the prosepcts survey list, we have 6 WALKHAM MEADOWS listed twice, which should be 6a and 6b
|
||||
eco4_prospects_survey_list.loc[838, "NO"] = "6a"
|
||||
eco4_prospects_survey_list.loc[839, "NO"] = "6b"
|
||||
|
||||
# 3, 7, 9 BOLDVENTURE ROAD should be BOLDVENTURE CLOSE
|
||||
eco4_prospects_survey_list["ADDRESS 1"] = np.where(
|
||||
(eco4_prospects_survey_list["ADDRESS 1"] == "boldventure road") &
|
||||
(eco4_prospects_survey_list["NO"].isin([3, 7, 9])),
|
||||
"boldventure close",
|
||||
eco4_prospects_survey_list["ADDRESS 1"]
|
||||
)
|
||||
|
||||
eco4_prospects_survey_list["ADDRESS 1"] = np.where(
|
||||
(eco4_prospects_survey_list["ADDRESS 1"] == "old farm road") & (
|
||||
eco4_prospects_survey_list["POSTCODE"] == "PL5 1EP"),
|
||||
"old school road",
|
||||
eco4_prospects_survey_list["ADDRESS 1"]
|
||||
)
|
||||
|
||||
eco4_prospects_survey_list["ADDRESS 1"] = np.where(
|
||||
(eco4_prospects_survey_list["ADDRESS 1"] == "croft orchard") & (
|
||||
eco4_prospects_survey_list["POSTCODE"] == "TQ12 6RP") & (
|
||||
eco4_prospects_survey_list["NO"] == 52),
|
||||
"drum way",
|
||||
eco4_prospects_survey_list["ADDRESS 1"]
|
||||
)
|
||||
|
||||
# String replace
|
||||
eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.replace(
|
||||
"the gulls, collaton road", "the gulls collaton road"
|
||||
)
|
||||
eco4_prospects_survey_list["ADDRESS 1"] = eco4_prospects_survey_list["ADDRESS 1"].str.replace(
|
||||
"crows-an-eglose", "crows-an-eglos"
|
||||
)
|
||||
|
||||
# We have a high volume of rows that do not match
|
||||
matched = []
|
||||
nomatch = []
|
||||
for _, row in tqdm(eco4_prospects_survey_list.iterrows(), total=len(eco4_prospects_survey_list)):
|
||||
|
||||
# Not in the asset list
|
||||
if (row["ADDRESS 1"] == "berry park") and row["NO"] in [40, 42] and row["POSTCODE"] == "PL12 6EN":
|
||||
nomatch.append(row.to_dict())
|
||||
continue
|
||||
|
||||
# Not in the asset list
|
||||
if (row["ADDRESS 1"] == "roberts road") and row["NO"] == 23 and row["POSTCODE"] == "PL5 1DP":
|
||||
nomatch.append(row.to_dict())
|
||||
continue
|
||||
|
||||
# Not in the asset list
|
||||
if row["ADDRESS 1"] in [
|
||||
"kaynton mead", "broadmoor lane", "hoopers barton", "ecos court", "selwood road",
|
||||
"castle street"
|
||||
]:
|
||||
nomatch.append(row.to_dict())
|
||||
continue
|
||||
|
||||
house_number = row["NO"]
|
||||
if isinstance(house_number, str):
|
||||
house_number = house_number.lower()
|
||||
|
||||
if "flat" in house_number:
|
||||
house_number = house_number.split("flat")[1].strip()
|
||||
|
||||
# Filter on the first line of the address
|
||||
df = asset_list[asset_list["T1_Address"].str.lower().str.contains(row["ADDRESS 1"].lower())].copy()
|
||||
if house_number is not None:
|
||||
if df.shape[0] != 1:
|
||||
df = df[df["T1_Address"].str.lower().str.contains(str(house_number))]
|
||||
if df.shape[0] != 1:
|
||||
if house_number is not None:
|
||||
df = df[df["HouseNo"] == str(house_number)]
|
||||
if df.shape[0] != 1:
|
||||
if row["POSTCODE"] is not None:
|
||||
df = df[df["postcode"].str.lower().str.contains(row["POSTCODE"].lower())]
|
||||
if df.shape[0] != 1:
|
||||
nomatch.append(row.to_dict())
|
||||
continue
|
||||
|
||||
matched.append(
|
||||
{
|
||||
"survey_key": row["survey_key"],
|
||||
"matched_address": df["T1_Address"].values[0],
|
||||
"survey_house_no": row["NO"],
|
||||
"survey_street_name": row["ADDRESS 1"],
|
||||
"survey_postcode": row["POSTCODE"],
|
||||
}
|
||||
)
|
||||
|
||||
nomatch = pd.DataFrame(nomatch)
|
||||
matched = pd.DataFrame(matched)
|
||||
|
||||
matched["warmfront_identified"] = True
|
||||
|
||||
# Combine asset list and surveys
|
||||
data = asset_list.merge(
|
||||
matched, how="left", left_on="T1_Address", right_on="matched_address",
|
||||
)
|
||||
data["warmfront_identified"] = data["warmfront_identified"].fillna(False)
|
||||
|
||||
lost_identified_properties = eco4_prospects_survey_list[
|
||||
~eco4_prospects_survey_list["survey_key"].isin(matched["survey_key"])
|
||||
]
|
||||
|
||||
return data, eco4_prospects_survey_list, lost_identified_properties
|
||||
|
||||
|
||||
def map_year_to_age_band(year):
|
||||
try:
|
||||
year = int(year)
|
||||
except ValueError:
|
||||
return "Invalid Year" # Or any other way you want to handle invalid inputs
|
||||
|
||||
if year < 1900:
|
||||
return "England and Wales: before 1900"
|
||||
elif 1900 <= year <= 1929:
|
||||
return "England and Wales: 1900-1929"
|
||||
elif 1930 <= year <= 1949:
|
||||
return "England and Wales: 1930-1949"
|
||||
elif 1950 <= year <= 1966:
|
||||
return "England and Wales: 1950-1966"
|
||||
elif 1967 <= year <= 1975:
|
||||
return "England and Wales: 1967-1975"
|
||||
elif 1976 <= year <= 1982:
|
||||
return "England and Wales: 1976-1982"
|
||||
elif 1983 <= year <= 1990:
|
||||
return "England and Wales: 1983-1990"
|
||||
elif 1991 <= year <= 1995:
|
||||
return "England and Wales: 1991-1995"
|
||||
elif 1996 <= year <= 2002:
|
||||
return "England and Wales: 1996-2002"
|
||||
elif 2003 <= year <= 2006:
|
||||
return "England and Wales: 2003-2006"
|
||||
elif 2007 <= year <= 2011:
|
||||
return "England and Wales: 2007-2011"
|
||||
else: # Assuming all remaining years are 2012 onwards
|
||||
return "England and Wales: 2012 onwards"
|
||||
|
||||
|
||||
def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
|
||||
scoring_data = []
|
||||
results = []
|
||||
nodata = []
|
||||
|
||||
property_type_lookup = {
|
||||
"Flat": {"property-type": "Flat", "built-form": None},
|
||||
"Mid Terrace House": {"property-type": "House", "built-form": "Mid-Terrace"},
|
||||
"End Terrace House": {"property-type": "House", "built-form": "End-Terrace"},
|
||||
"Maisonnette": {"property-type": "Flat", "built-form": None},
|
||||
"Semi Detached House": {"property-type": "House", "built-form": "Semi-Detached"},
|
||||
"Detached House": {"property-type": "House", "built-form": "Detached"},
|
||||
"Coach House": {"property-type": "House", "built-form": "Detached"},
|
||||
"Bungalow": {"property-type": "Bungalow", "built-form": None},
|
||||
"Detached Bungalow": {"property-type": "Bungalow", "built-form": "Detached"},
|
||||
"House": {"property-type": "House", "built-form": None},
|
||||
"Semi Detached Bung": {"property-type": "Bungalow", "built-form": "Semi-Detached"},
|
||||
"Bedspace": {"property-type": None, "built-form": None},
|
||||
"Office Buildings": {"property-type": None, "built-form": None},
|
||||
"End Terrace Bungalow": {"property-type": "Bungalow", "built-form": "End-Terrace"},
|
||||
"Mid Terrace Bungalow": {"property-type": "Bungalow", "built-form": "Mid-Terrace"},
|
||||
"Bedsit": {"property-type": "Flat", "built-form": None},
|
||||
"Mid Terrace Housekeeping": {"property-type": "House", "built-form": "Mid-Terrace"},
|
||||
"Mid Terrace Housekeeping ": {"property-type": "House", "built-form": "Mid-Terrace"},
|
||||
"End Terrace Housex": {"property-type": "House", "built-form": "End-Terrace"},
|
||||
"Guest Room": {"property-type": None, "built-form": None}
|
||||
}
|
||||
|
||||
for _, property_meta in tqdm(data, total=len(data)):
|
||||
|
||||
searcher = SearchEpc(
|
||||
address1=property_meta["HouseNo"],
|
||||
postcode=property_meta["postcode"],
|
||||
auth_token=EPC_AUTH_TOKEN,
|
||||
os_api_key=None,
|
||||
full_address=property_meta["address"]
|
||||
)
|
||||
searcher.ordnance_survey_client.property_type = property_type_lookup[property_meta["T1_AssetType"]][
|
||||
"property-type"]
|
||||
searcher.ordnance_survey_client.built_form = property_type_lookup[property_meta["T1_AssetType"]]["built-form"]
|
||||
searcher.find_property(skip_os=True)
|
||||
|
||||
if searcher.newest_epc is None:
|
||||
nodata.append(property_meta)
|
||||
continue
|
||||
|
||||
if searcher.newest_epc.get("estimated"):
|
||||
# We insert the row ID as our proxy for UPRN
|
||||
proxy_uprn = int(property_meta["row_id"].split("_")[1])
|
||||
searcher.newest_epc["uprn"] = proxy_uprn
|
||||
|
||||
newest_epc = searcher.newest_epc
|
||||
older_epcs = searcher.older_epcs
|
||||
full_sap_epc = searcher.full_sap_epc
|
||||
# We also want to get the penultimate epc
|
||||
# penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
|
||||
# if not penultimate_epc:
|
||||
# penultimate_epc = newest_epc
|
||||
|
||||
eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
|
||||
eligibility.check_gbis_warmfront()
|
||||
eligibility.check_eco4_warmfront()
|
||||
|
||||
# if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
|
||||
# eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
|
||||
# eligibility.check_gbis_warmfront()
|
||||
# eligibility.check_eco4_warmfront()
|
||||
# # If this is the case, we need to update the older epcs
|
||||
# # We don't update just to make data cleaning easier
|
||||
# if penultimate_epc.get("estimated") is None:
|
||||
# older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
|
||||
|
||||
# If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity
|
||||
|
||||
# Loft MUST be suitable
|
||||
cavity_age = None
|
||||
if (
|
||||
eligibility.walls["is_cavity_wall"] and
|
||||
eligibility.walls["is_filled_cavity"] and
|
||||
eligibility.loft["suitability"] and
|
||||
eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
|
||||
):
|
||||
# We check the age of the cavity and if it's particularly old, we flag it
|
||||
cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
|
||||
|
||||
# Full checks
|
||||
eligibility.check_gbis()
|
||||
eligibility.check_eco4()
|
||||
|
||||
if eligibility.eco4_warmfront["eligible"]:
|
||||
if eligibility.epc["uprn"] in ["", None]:
|
||||
eligibility.epc["uprn"] = int(property_meta["row_id"].split("_")[1])
|
||||
|
||||
if eligibility.epc["construction-age-band"] in ["", None]:
|
||||
eligibility.epc["construction-age-band"] = map_year_to_age_band(property_meta["Build Yr"])
|
||||
|
||||
# This is not the right place to do this but this is temp
|
||||
if eligibility.epc["extension-count"] in ["", None]:
|
||||
eligibility.epc["extension-count"] = 0
|
||||
|
||||
# Not in the right place but temp
|
||||
if eligibility.epc["built-form"] in ["", None]:
|
||||
if not older_epcs:
|
||||
eligibility.epc["built-form"] = "Mid-Terrace"
|
||||
|
||||
scoring_dictionary = prepare_model_data_row(
|
||||
property_id=property_meta["row_id"],
|
||||
modelling_epc=eligibility.epc,
|
||||
cleaned=cleaned,
|
||||
cleaning_data=cleaning_data,
|
||||
created_at=created_at,
|
||||
old_data=older_epcs,
|
||||
full_sap_epc=full_sap_epc,
|
||||
photo_supply_lookup=photo_supply_lookup,
|
||||
floor_area_decile_thresholds=floor_area_decile_thresholds,
|
||||
)
|
||||
scoring_data.extend(scoring_dictionary)
|
||||
|
||||
results.append(
|
||||
{
|
||||
"row_id": property_meta["row_id"],
|
||||
"uprn": eligibility.epc["uprn"],
|
||||
"Address": property_meta["T1_Address"],
|
||||
"Postcode": property_meta["postcode"],
|
||||
"property_type": eligibility.epc["property-type"],
|
||||
"gbis_eligible": eligibility.gbis_warmfront,
|
||||
"eco4_eligible": eligibility.eco4_warmfront["eligible"],
|
||||
"eco4_message": eligibility.eco4_warmfront["message"],
|
||||
"sap": float(eligibility.epc["current-energy-efficiency"]),
|
||||
"gbis_eligible_future": eligibility.gbis["eligible"],
|
||||
"gbis_eligible_future_message": eligibility.gbis["message"],
|
||||
"eco4_eligible_future": eligibility.eco4["eligible"],
|
||||
"eco4_eligible_future_message": eligibility.eco4["message"],
|
||||
# Property components
|
||||
"roof": eligibility.roof["clean_description"],
|
||||
"walls": eligibility.walls["clean_description"],
|
||||
"cavity_type": eligibility.cavity["type"],
|
||||
"heating": eligibility.epc["mainheat-description"],
|
||||
"tenure": eligibility.tenure,
|
||||
"date_epc": eligibility.epc["lodgement-date"],
|
||||
"cavity_age": cavity_age,
|
||||
**eligibility.walls,
|
||||
**eligibility.roof,
|
||||
}
|
||||
)
|
||||
|
||||
scoring_df = pd.DataFrame(scoring_data)
|
||||
|
||||
# Perform the same cleaning as in the model - first clean number of room variables though
|
||||
scoring_df = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=scoring_df,
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
|
||||
colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
|
||||
)
|
||||
|
||||
scoring_df = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=scoring_df,
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
|
||||
).drop(columns=["LOCAL_AUTHORITY"])
|
||||
|
||||
scoring_df = DataProcessor.clean_missings_after_description_process(
|
||||
scoring_df,
|
||||
ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
|
||||
"insulation_thickness" in c) or ("ENERGY_EFF" in c)]
|
||||
)
|
||||
|
||||
scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
|
||||
scoring_df["UPRN"] = scoring_df["UPRN"].astype(int)
|
||||
|
||||
model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
|
||||
all_predictions = model_api.predict_all(
|
||||
df=scoring_df,
|
||||
bucket="retrofit-data-dev",
|
||||
prediction_buckets={
|
||||
"sap_change_predictions": "retrofit-sap-predictions-dev",
|
||||
"heat_demand_predictions": "retrofit-heat-predictions-dev",
|
||||
"carbon_change_predictions": "retrofit-carbon-predictions-dev"
|
||||
}
|
||||
)
|
||||
|
||||
predictions = all_predictions["sap_change_predictions"].copy()
|
||||
|
||||
results_df = pd.DataFrame(results)
|
||||
|
||||
predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
|
||||
results_df[["row_id", "sap"]], how="left", on="row_id"
|
||||
)
|
||||
predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
|
||||
predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
|
||||
|
||||
results_df = results_df.merge(
|
||||
predictions[["sap_uplift", "row_id"]],
|
||||
how="left",
|
||||
on="row_id"
|
||||
)
|
||||
results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
|
||||
|
||||
eligibility_assessment = []
|
||||
for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
|
||||
# The upgrade requirements are dependent on the current SAP
|
||||
|
||||
# If the property is an F or G, it only needs to upgrade to an %
|
||||
if row["sap"] <= 38:
|
||||
if row["post_install_sap"] >= 57:
|
||||
eligibility_classification = "highest confidence"
|
||||
elif row["post_install_sap"] >= 55:
|
||||
eligibility_classification = "high confidence"
|
||||
elif row["post_install_sap"] >= 53:
|
||||
eligibility_classification = "medium confidence"
|
||||
else:
|
||||
eligibility_classification = "unlikely"
|
||||
else:
|
||||
|
||||
if row["post_install_sap"] >= 71:
|
||||
eligibility_classification = "highest confidence"
|
||||
elif row["post_install_sap"] >= 69:
|
||||
eligibility_classification = "high confidence"
|
||||
elif row["post_install_sap"] >= 67:
|
||||
eligibility_classification = "medium confidence"
|
||||
else:
|
||||
eligibility_classification = "unlikely"
|
||||
|
||||
eligibility_assessment.append(
|
||||
{
|
||||
"row_id": row["row_id"],
|
||||
"eligibility_classification": eligibility_classification
|
||||
}
|
||||
)
|
||||
|
||||
eligibility_assessment = pd.DataFrame(eligibility_assessment)
|
||||
|
||||
results_df = results_df.merge(
|
||||
eligibility_assessment, how="left", on="row_id"
|
||||
)
|
||||
return results_df, scoring_data, nodata
|
||||
|
||||
|
||||
def get_epc_data_for_lost_surveys(
|
||||
lost_identified_properties, cleaned, cleaning_data, created_at, photo_supply_lookup,
|
||||
floor_area_decile_thresholds
|
||||
):
|
||||
lost_identified_properties["row_id"] = [
|
||||
"lost_surveys_ha25_" + str(i) for i in range(0, len(lost_identified_properties))
|
||||
]
|
||||
|
||||
scoring_data = []
|
||||
results = []
|
||||
nodata = []
|
||||
|
||||
property_type_lookup = {
|
||||
"MID-TERRACE": {"property-type": "House", "built-form": "Mid-Terrace"},
|
||||
"N/A": {"property-type": "House", "built-form": None},
|
||||
"END-TERRACE": {"property-type": "House", "built-form": "End-Terrace"},
|
||||
"GROUND-FLOOR": {"property-type": "House", "built-form": None},
|
||||
"TOP-FLOOR": {"property-type": "House", "built-form": None},
|
||||
"SEMI-DETACHED": {"property-type": "House", "built-form": "Semi-Detached"},
|
||||
"MID-FLOOR": {"property-type": "House", "built-form": None},
|
||||
"TOP-FLOOR FLAT": {"property-type": "House", "built-form": None},
|
||||
"DETACHED": {"property-type": "House", "built-form": "Detached"},
|
||||
"MID-FLOOR FLAT": {"property-type": "House", "built-form": None},
|
||||
"SEMI- DETACHED": {"property-type": "House", "built-form": "Semi-Detached"},
|
||||
"NO EPC ON GOV": {"property-type": "House", "built-form": None},
|
||||
"Top-floor flat": {"property-type": "House", "built-form": None},
|
||||
"GROUND-FLOOR FLAT": {"property-type": "House", "built-form": None},
|
||||
"NOT ON GOV SITE": {"property-type": "House", "built-form": None}
|
||||
}
|
||||
|
||||
for _, property_meta in tqdm(lost_identified_properties.iterrows(), total=len(lost_identified_properties)):
|
||||
|
||||
if property_meta["POSTCODE"] is None:
|
||||
continue
|
||||
|
||||
full_address = ", ".join(
|
||||
[str(x) for x in [
|
||||
property_meta["NO"], property_meta["ADDRESS 1"], property_meta["ADDRESS 2"], property_meta["ADDRESS 3"]
|
||||
] if x is not None]
|
||||
)
|
||||
|
||||
searcher = SearchEpc(
|
||||
address1=str(property_meta["NO"]),
|
||||
postcode=property_meta["POSTCODE"],
|
||||
auth_token=EPC_AUTH_TOKEN,
|
||||
os_api_key=None,
|
||||
full_address=full_address
|
||||
)
|
||||
|
||||
property_type_key = property_meta["PROPERTY TYPE"]
|
||||
if property_type_key is not None:
|
||||
searcher.ordnance_survey_client.property_type = property_type_lookup[property_type_key.strip()][
|
||||
"property-type"]
|
||||
searcher.ordnance_survey_client.built_form = property_type_lookup[property_type_key.strip()][
|
||||
"built-form"]
|
||||
searcher.find_property(skip_os=True)
|
||||
|
||||
if searcher.newest_epc is None:
|
||||
nodata.append(property_meta)
|
||||
continue
|
||||
|
||||
if searcher.newest_epc.get("estimated"):
|
||||
# We insert the row ID as our proxy for UPRN
|
||||
proxy_uprn = int(property_meta["row_id"].split("_")[-1])
|
||||
searcher.newest_epc["uprn"] = proxy_uprn
|
||||
|
||||
newest_epc = searcher.newest_epc
|
||||
older_epcs = searcher.older_epcs
|
||||
full_sap_epc = searcher.full_sap_epc
|
||||
# We also want to get the penultimate epc
|
||||
penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
|
||||
if not penultimate_epc:
|
||||
penultimate_epc = newest_epc
|
||||
|
||||
eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
|
||||
eligibility.check_gbis_warmfront()
|
||||
eligibility.check_eco4_warmfront()
|
||||
|
||||
if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
|
||||
eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
|
||||
eligibility.check_gbis_warmfront()
|
||||
eligibility.check_eco4_warmfront()
|
||||
# If this is the case, we need to update the older epcs
|
||||
# We don't update just to make data cleaning easier
|
||||
if penultimate_epc.get("estimated") is None:
|
||||
older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
|
||||
|
||||
# Full checks
|
||||
eligibility.check_gbis()
|
||||
eligibility.check_eco4()
|
||||
|
||||
if eligibility.eco4_warmfront["eligible"] & (eligibility.epc["construction-age-band"] not in ["", None]):
|
||||
if eligibility.epc["uprn"] in ["", None]:
|
||||
eligibility.epc["uprn"] = int(property_meta["row_id"].split("_")[1])
|
||||
|
||||
scoring_dictionary = prepare_model_data_row(
|
||||
property_id=property_meta["row_id"],
|
||||
modelling_epc=eligibility.epc,
|
||||
cleaned=cleaned,
|
||||
cleaning_data=cleaning_data,
|
||||
created_at=created_at,
|
||||
old_data=older_epcs,
|
||||
full_sap_epc=full_sap_epc,
|
||||
photo_supply_lookup=photo_supply_lookup,
|
||||
floor_area_decile_thresholds=floor_area_decile_thresholds,
|
||||
)
|
||||
scoring_data.extend(scoring_dictionary)
|
||||
|
||||
results.append(
|
||||
{
|
||||
"row_id": property_meta["row_id"],
|
||||
"uprn": eligibility.epc["uprn"],
|
||||
"Address": property_meta["ADDRESS 1"],
|
||||
"Postcode": property_meta["POSTCODE"],
|
||||
"property_type": eligibility.epc["property-type"],
|
||||
"gbis_eligible": eligibility.gbis_warmfront,
|
||||
"eco4_eligible": eligibility.eco4_warmfront["eligible"],
|
||||
"eco4_message": eligibility.eco4_warmfront["message"],
|
||||
"sap": float(eligibility.epc["current-energy-efficiency"]),
|
||||
"gbis_eligible_future": eligibility.gbis["eligible"],
|
||||
"gbis_eligible_future_message": eligibility.gbis["message"],
|
||||
"eco4_eligible_future": eligibility.eco4["eligible"],
|
||||
"eco4_eligible_future_message": eligibility.eco4["message"],
|
||||
# Property components
|
||||
"roof": eligibility.roof["clean_description"],
|
||||
"walls": eligibility.walls["clean_description"],
|
||||
"cavity_type": eligibility.cavity["type"],
|
||||
"heating": eligibility.epc["mainheat-description"],
|
||||
"tenure": eligibility.tenure,
|
||||
"date_epc": eligibility.epc["lodgement-date"],
|
||||
**eligibility.walls,
|
||||
**eligibility.roof,
|
||||
}
|
||||
)
|
||||
|
||||
scoring_df = pd.DataFrame(scoring_data)
|
||||
|
||||
# Perform the same cleaning as in the model - first clean number of room variables though
|
||||
scoring_df = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=scoring_df,
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
|
||||
colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
|
||||
)
|
||||
|
||||
scoring_df = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=scoring_df,
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
|
||||
).drop(columns=["LOCAL_AUTHORITY"])
|
||||
|
||||
scoring_df = DataProcessor.clean_missings_after_description_process(
|
||||
scoring_df,
|
||||
ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
|
||||
"insulation_thickness" in c) or ("ENERGY_EFF" in c)]
|
||||
)
|
||||
|
||||
scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
|
||||
scoring_df["UPRN"] = scoring_df["UPRN"].astype(int)
|
||||
|
||||
model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
|
||||
all_predictions = model_api.predict_all(
|
||||
df=scoring_df,
|
||||
bucket="retrofit-data-dev",
|
||||
prediction_buckets={
|
||||
"sap_change_predictions": "retrofit-sap-predictions-dev",
|
||||
"heat_demand_predictions": "retrofit-heat-predictions-dev",
|
||||
"carbon_change_predictions": "retrofit-carbon-predictions-dev"
|
||||
}
|
||||
)
|
||||
|
||||
predictions = all_predictions["sap_change_predictions"].copy()
|
||||
|
||||
results_df = pd.DataFrame(results)
|
||||
|
||||
predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
|
||||
results_df[["row_id", "sap"]], how="left", on="row_id"
|
||||
)
|
||||
predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
|
||||
predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
|
||||
|
||||
results_df = results_df.merge(
|
||||
predictions[["sap_uplift", "row_id"]],
|
||||
how="left",
|
||||
on="row_id"
|
||||
)
|
||||
results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
|
||||
|
||||
eligibility_assessment = []
|
||||
for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
|
||||
# The upgrade requirements are dependent on the current SAP
|
||||
|
||||
# If the property is an F or G, it only needs to upgrade to an %
|
||||
if row["sap"] <= 38:
|
||||
if row["post_install_sap"] >= 57:
|
||||
eligibility_classification = "highest confidence"
|
||||
elif row["post_install_sap"] >= 55:
|
||||
eligibility_classification = "high confidence"
|
||||
elif row["post_install_sap"] >= 53:
|
||||
eligibility_classification = "medium confidence"
|
||||
else:
|
||||
eligibility_classification = "unlikely"
|
||||
else:
|
||||
|
||||
if row["post_install_sap"] >= 71:
|
||||
eligibility_classification = "highest confidence"
|
||||
elif row["post_install_sap"] >= 69:
|
||||
eligibility_classification = "high confidence"
|
||||
elif row["post_install_sap"] >= 67:
|
||||
eligibility_classification = "medium confidence"
|
||||
else:
|
||||
eligibility_classification = "unlikely"
|
||||
|
||||
eligibility_assessment.append(
|
||||
{
|
||||
"row_id": row["row_id"],
|
||||
"eligibility_classification": eligibility_classification
|
||||
}
|
||||
)
|
||||
|
||||
eligibility_assessment = pd.DataFrame(eligibility_assessment)
|
||||
|
||||
results_df = results_df.merge(
|
||||
eligibility_assessment, how="left", on="row_id"
|
||||
)
|
||||
return results_df, scoring_data, nodata
|
||||
|
||||
|
||||
def analyse_results(results_df, data, eco4_prospects_survey_list):
|
||||
analysis_data = data[["row_id", "survey_key", "warmfront_identified"]].merge(
|
||||
results_df, how="left", on="row_id"
|
||||
)
|
||||
|
||||
analysis_data = analysis_data.merge(
|
||||
eco4_prospects_survey_list[["survey_key", "ADDRESS 1", "NO", "POSTCODE"]],
|
||||
how="left", on="survey_key"
|
||||
)
|
||||
|
||||
# NEW
|
||||
analysis_data["roof_insulation_thickness"] = np.where(
|
||||
pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
|
||||
)
|
||||
analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
|
||||
lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
|
||||
)
|
||||
|
||||
warmfront_identified = analysis_data[
|
||||
(analysis_data["warmfront_identified"] == True)
|
||||
] # 2204
|
||||
|
||||
# Because we don't know which property is for which scheme, we'll just look at what we found
|
||||
ideal_eco4 = analysis_data[
|
||||
(analysis_data["eco4_eligible"] == True) &
|
||||
(analysis_data["roof_insulation_thickness_numeric"] <= 100) &
|
||||
(analysis_data["sap"] <= 54)
|
||||
] # 335
|
||||
|
||||
gbis = analysis_data[
|
||||
(analysis_data["gbis_eligible"] == True) &
|
||||
~analysis_data["row_id"].isin(ideal_eco4["row_id"].values)
|
||||
]
|
||||
|
||||
ideal_eco4 = ideal_eco4[ideal_eco4["sap"] <= 54]
|
||||
|
||||
|
||||
def analyse_lost_surveys(results_df):
|
||||
results_df["roof_insulation_thickness"] = np.where(
|
||||
pd.isnull(results_df["roof_insulation_thickness"]), None, results_df["roof_insulation_thickness"]
|
||||
)
|
||||
results_df["roof_insulation_thickness_numeric"] = results_df["roof_insulation_thickness"].apply(
|
||||
lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
|
||||
)
|
||||
|
||||
ideal_eco4 = results_df[
|
||||
(results_df["eco4_eligible"] == True) &
|
||||
(results_df["roof_insulation_thickness_numeric"] <= 100) &
|
||||
(results_df["sap"] <= 54)
|
||||
] # 25
|
||||
|
||||
gbis = results_df[
|
||||
(results_df["gbis_eligible"] == True) &
|
||||
~results_df["row_id"].isin(ideal_eco4["row_id"].values)
|
||||
] # 82
|
||||
|
||||
|
||||
def app():
|
||||
data, eco4_prospects_survey_list, lost_identified_properties = load_data()
|
||||
|
||||
data["row_id"] = ["ha25_" + str(i) for i in range(0, len(data))]
|
||||
|
||||
cleaned = read_from_s3(
|
||||
s3_file_name="cleaned_epc_data/cleaned.bson",
|
||||
bucket_name="retrofit-data-dev"
|
||||
)
|
||||
cleaned = msgpack.unpackb(cleaned, raw=False)
|
||||
|
||||
cleaning_data = read_dataframe_from_s3_parquet(
|
||||
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
|
||||
)
|
||||
|
||||
created_at = datetime.now().isoformat()
|
||||
|
||||
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
|
||||
|
||||
results_df, scoring_data, nodata = get_epc_data(
|
||||
data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds
|
||||
)
|
||||
# Pickle the outputs
|
||||
# Old data was ha25.pickle
|
||||
# import pickle
|
||||
# with open("ha25_10_jan.pickle", "wb") as f:
|
||||
# pickle.dump(
|
||||
# {
|
||||
# "results_df": results_df,
|
||||
# "scoring_data": scoring_data,
|
||||
# "nodata": nodata
|
||||
# },
|
||||
# f
|
||||
# )
|
||||
|
||||
# Load in pickle
|
||||
import pickle
|
||||
with open("ha25_10_jan.pickle", "rb") as f:
|
||||
saved = pickle.load(f)
|
||||
results_df = saved["results_df"]
|
||||
scoring_data = saved["scoring_data"]
|
||||
nodata = saved["nodata"]
|
||||
|
|
@ -264,21 +264,21 @@ def get_ha_33data(data, cleaned, cleaning_data, created_at):
|
|||
|
||||
|
||||
def analyse_ha_33(results_df, data):
|
||||
results_df_social = results_df[results_df["tenure"] == "Rented (social)"]
|
||||
# results_df_social = results_df[results_df["tenure"] == "Rented (social)"]
|
||||
#
|
||||
# results_df_social["tenure"].value_counts()
|
||||
|
||||
results_df_social["tenure"].value_counts()
|
||||
data[data["row_id"].isin(results_df["row_id"].values)]["PROPERTY TYPE"].value_counts()
|
||||
|
||||
data[data["row_id"].isin(results_df_social["row_id"].values)]["PROPERTY TYPE"].value_counts()
|
||||
n_identified = (results_df["gbis_eligible"] | results_df["eco4_eligible"]).sum()
|
||||
n_eco4 = results_df["eco4_eligible"].sum()
|
||||
n_gbis = results_df[~results_df["eco4_eligible"]]["gbis_eligible"].sum()
|
||||
|
||||
n_identified = (results_df_social["gbis_eligible"] | results_df_social["eco4_eligible"]).sum()
|
||||
n_eco4 = results_df_social["eco4_eligible"].sum()
|
||||
n_gbis = results_df_social[~results_df_social["eco4_eligible"]]["gbis_eligible"].sum()
|
||||
|
||||
eco_eligibile = results_df_social[results_df_social["eco4_eligible"]]
|
||||
eco_eligibile = results_df[results_df["eco4_eligible"]]
|
||||
eco_eligibile["walls"].value_counts()
|
||||
eco_eligibile["roof"].value_counts()
|
||||
|
||||
results_df_social[results_df_social["gbis_eligible"] | results_df_social["eco4_eligible"]]["tenure"].value_counts()
|
||||
results_df[results_df["gbis_eligible"] | results_df["eco4_eligible"]]["tenure"].value_counts()
|
||||
|
||||
results_df_social["eligibility_classification"].value_counts()
|
||||
|
||||
|
|
@ -316,3 +316,11 @@ def app():
|
|||
created_at = datetime.now().isoformat()
|
||||
|
||||
results_df, _, _ = get_ha_33data(data, cleaned, cleaning_data, created_at)
|
||||
|
||||
# Read in
|
||||
import pickle
|
||||
with open("ha33_results.pickle", "rb") as f:
|
||||
data = pickle.load(f)
|
||||
results_df = pd.DataFrame(data["results"])
|
||||
scoring_data = data["scoring_data"]
|
||||
nodata = data["nodata"]
|
||||
|
|
|
|||
328
etl/eligibility/ha_15_32/ha4_app.py
Normal file
328
etl/eligibility/ha_15_32/ha4_app.py
Normal file
|
|
@ -0,0 +1,328 @@
|
|||
import os
|
||||
import msgpack
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from utils.s3 import read_from_s3
|
||||
from utils.logger import setup_logger
|
||||
from dotenv import load_dotenv
|
||||
from utils.s3 import read_dataframe_from_s3_parquet
|
||||
from tqdm import tqdm
|
||||
from backend.SearchEpc import SearchEpc
|
||||
from etl.eligibility.Eligibility import Eligibility
|
||||
from etl.eligibility.ha_15_32.app import prepare_model_data_row
|
||||
from etl.epc.DataProcessor import DataProcessor
|
||||
from etl.epc.settings import COLUMNS_TO_MERGE_ON
|
||||
from backend.ml_models.api import ModelApi
|
||||
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
|
||||
from recommendations.recommendation_utils import calculate_cavity_age
|
||||
from recommendation_utils import convert_thickness_to_numeric
|
||||
|
||||
import re
|
||||
|
||||
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
||||
ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
|
||||
|
||||
logger = setup_logger()
|
||||
load_dotenv(ENV_FILE)
|
||||
|
||||
|
||||
def load_ha_4():
|
||||
pd.set_option('display.max_rows', 500)
|
||||
pd.set_option('display.max_columns', 500)
|
||||
pd.set_option('display.width', 1000)
|
||||
|
||||
data = pd.read_csv(f"etl/eligibility/ha_15_32/HA 4 Asset List.csv", low_memory=False)
|
||||
return data
|
||||
|
||||
|
||||
def standardise_ha_4(data):
|
||||
# Location name contains some strings like {0664} which we remove
|
||||
data['Location Name'] = data['Location Name'].str.replace('\{.*?\}', '', regex=True)
|
||||
|
||||
# Trim whitespace from either end of location name
|
||||
data["Location Name"] = data["Location Name"].str.strip()
|
||||
|
||||
# Remove any unusable postcodes
|
||||
data = data[data["Post Code"] != '\\\\'].copy()
|
||||
|
||||
# Some specific replacements
|
||||
data["Location Name"] = np.where(
|
||||
data["Location Name"] == "Calderbrook Pl & Cog La",
|
||||
"Calderbrook Place",
|
||||
data["Location Name"]
|
||||
)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def get_ha_4_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
|
||||
scoring_data = []
|
||||
results = []
|
||||
nodata = []
|
||||
for _, property_meta in tqdm(data.iterrows(), total=len(data)):
|
||||
# For many of the entries in this dataset, we're actually given an entire building, so we EPCs for every
|
||||
# building
|
||||
searcher = SearchEpc(
|
||||
address1=property_meta["Address Line 1"],
|
||||
postcode=property_meta["Post Code"],
|
||||
auth_token=EPC_AUTH_TOKEN,
|
||||
os_api_key=None,
|
||||
property_type=property_type_lookup.get(house["Archetype"]),
|
||||
)
|
||||
|
||||
searcher.find_property(skip_os=True)
|
||||
|
||||
if searcher.newest_epc is None:
|
||||
searcher = SearchEpc(
|
||||
address1=property_meta["Location Name"],
|
||||
postcode=property_meta["Post Code"],
|
||||
auth_token=EPC_AUTH_TOKEN,
|
||||
os_api_key=None,
|
||||
property_type=property_type_lookup.get(house["Archetype"]),
|
||||
)
|
||||
searcher.search()
|
||||
|
||||
if searcher.newest_epc is None:
|
||||
nodata.append(house["row_id"])
|
||||
continue
|
||||
|
||||
newest_epc = searcher.newest_epc
|
||||
older_epcs = searcher.older_epcs
|
||||
full_sap_epc = searcher.full_sap_epc
|
||||
|
||||
searcher.search()
|
||||
|
||||
if searcher.data is None:
|
||||
nodata.append(property_meta.to_dict())
|
||||
continue
|
||||
|
||||
epcs = searcher.data["rows"]
|
||||
epcs = pd.DataFrame(epcs)
|
||||
|
||||
# Take the newest EPC by UPRN
|
||||
epcs = epcs.sort_values(by=["lodgement-date"], ascending=False)
|
||||
newest_epcs = epcs.drop_duplicates(subset=["uprn"], keep="first")
|
||||
|
||||
# For each EPC, we now check eligibility
|
||||
for _, epc in newest_epcs.iterrows():
|
||||
eligibility = Eligibility(epc=epc.to_dict(), cleaned=cleaned)
|
||||
eligibility.check_gbis_warmfront()
|
||||
eligibility.check_eco4_warmfront()
|
||||
|
||||
# If the house is not identified, we do a full gbis and eco4 check
|
||||
eligibility.check_gbis()
|
||||
eligibility.check_eco4()
|
||||
|
||||
if eligibility.eco4_warmfront["eligible"]:
|
||||
# We get old_eps
|
||||
old_data = epcs[
|
||||
(epcs["uprn"] == epc["uprn"]) &
|
||||
(epcs["lmk-key"] != epc["lmk-key"])
|
||||
].to_dict("records")
|
||||
|
||||
full_sap_epc = epcs[
|
||||
(epcs["uprn"] == epc["uprn"]) &
|
||||
(epcs["transaction-type"] == "new dwelling")
|
||||
].to_dict("records")
|
||||
|
||||
scoring_dictionary = prepare_model_data_row(
|
||||
property_id=eligibility.epc["uprn"],
|
||||
modelling_epc=eligibility.epc,
|
||||
cleaned=cleaned,
|
||||
cleaning_data=cleaning_data,
|
||||
created_at=created_at,
|
||||
old_data=old_data,
|
||||
full_sap_epc=full_sap_epc
|
||||
)
|
||||
scoring_data.extend(scoring_dictionary)
|
||||
|
||||
results.append(
|
||||
{
|
||||
"uprn": epc["uprn"],
|
||||
"Location Name": property_meta["Location Name"],
|
||||
"Post Code": property_meta["Post Code"],
|
||||
"property_type": eligibility.epc["property-type"],
|
||||
"gbis_eligible": eligibility.gbis_warmfront,
|
||||
"eco4_eligible": eligibility.eco4_warmfront["eligible"],
|
||||
"eco4_message": eligibility.eco4_warmfront["message"],
|
||||
"sap": float(eligibility.epc["current-energy-efficiency"]),
|
||||
"gbis_eligible_future": eligibility.gbis["eligible"],
|
||||
"gbis_eligible_future_message": eligibility.gbis["message"],
|
||||
"eco4_eligible_future": eligibility.eco4["eligible"],
|
||||
"eco4_eligible_future_message": eligibility.eco4["message"],
|
||||
# Property components
|
||||
"roof": eligibility.roof["clean_description"],
|
||||
"walls": eligibility.walls["clean_description"],
|
||||
"cavity_type": eligibility.cavity["type"],
|
||||
"heating": eligibility.epc["mainheat-description"],
|
||||
"tenure": eligibility.tenure,
|
||||
"date_epc": eligibility.epc["lodgement-date"],
|
||||
}
|
||||
)
|
||||
|
||||
scoring_df = pd.DataFrame(scoring_data)
|
||||
|
||||
# Perform the same cleaning as in the model - first clean number of room variables though
|
||||
scoring_df = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=scoring_df,
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
|
||||
colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
|
||||
)
|
||||
|
||||
scoring_df = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=scoring_df,
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
|
||||
).drop(columns=["LOCAL_AUTHORITY"])
|
||||
|
||||
scoring_df = DataProcessor.clean_missings_after_description_process(
|
||||
scoring_df,
|
||||
ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
|
||||
"insulation_thickness" in c) or ("ENERGY_EFF" in c)]
|
||||
)
|
||||
|
||||
scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
|
||||
|
||||
model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
|
||||
all_predictions = model_api.predict_all(
|
||||
df=scoring_df,
|
||||
bucket="retrofit-data-dev",
|
||||
prediction_buckets={
|
||||
"sap_change_predictions": "retrofit-sap-predictions-dev",
|
||||
"heat_demand_predictions": "retrofit-heat-predictions-dev",
|
||||
"carbon_change_predictions": "retrofit-carbon-predictions-dev"
|
||||
}
|
||||
)
|
||||
|
||||
predictions = all_predictions["sap_change_predictions"].copy()
|
||||
|
||||
results_df = pd.DataFrame(results)
|
||||
|
||||
predictions = predictions.rename(columns={"property_id": "uprn"}).merge(
|
||||
results_df[["uprn", "sap"]], how="left", on="uprn"
|
||||
)
|
||||
predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
|
||||
predictions = predictions.groupby("uprn")["sap_uplift"].sum().reset_index()
|
||||
|
||||
results_df = results_df.merge(
|
||||
predictions[["sap_uplift", "uprn"]],
|
||||
how="left",
|
||||
on="uprn"
|
||||
)
|
||||
results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
|
||||
|
||||
results_df = results_df[~pd.isnull(results_df["uprn"])]
|
||||
|
||||
eligibility_assessment = []
|
||||
for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
|
||||
# The upgrade requirements are dependent on the current SAP
|
||||
|
||||
# If the property is an F or G, it only needs to upgrade to an %
|
||||
if row["sap"] <= 38:
|
||||
if row["post_install_sap"] >= 57:
|
||||
eligibility_classification = "highest confidence"
|
||||
elif row["post_install_sap"] >= 55:
|
||||
eligibility_classification = "high confidence"
|
||||
elif row["post_install_sap"] >= 53:
|
||||
eligibility_classification = "medium confidence"
|
||||
else:
|
||||
eligibility_classification = "unlikely"
|
||||
else:
|
||||
|
||||
if row["post_install_sap"] >= 71:
|
||||
eligibility_classification = "highest confidence"
|
||||
elif row["post_install_sap"] >= 69:
|
||||
eligibility_classification = "high confidence"
|
||||
elif row["post_install_sap"] >= 67:
|
||||
eligibility_classification = "medium confidence"
|
||||
else:
|
||||
eligibility_classification = "unlikely"
|
||||
|
||||
eligibility_assessment.append(
|
||||
{
|
||||
"uprn": row["uprn"],
|
||||
"eligibility_classification": eligibility_classification
|
||||
}
|
||||
)
|
||||
|
||||
eligibility_assessment = pd.DataFrame(eligibility_assessment)
|
||||
|
||||
results_df = results_df.merge(
|
||||
eligibility_assessment, how="left", on="uprn"
|
||||
)
|
||||
# We have some properties that are duplicated so we take just one instance
|
||||
results_df = results_df.drop_duplicates(subset=["uprn"])
|
||||
|
||||
return results_df, scoring_data, nodata
|
||||
|
||||
|
||||
def analyse_ha_4(results_df, data):
|
||||
n_identified = (results_df["gbis_eligible"] | results_df["eco4_eligible"]).sum()
|
||||
n_eco4 = results_df["eco4_eligible"].sum()
|
||||
n_gbis = results_df[~results_df["eco4_eligible"]]["gbis_eligible"].sum()
|
||||
|
||||
eco_eligibile = results_df[results_df["eco4_eligible"]]
|
||||
eco_eligibile["eligibility_classification"].value_counts()
|
||||
|
||||
future_possibilities_eco = results_df[
|
||||
(results_df["eco4_eligible_future"] == True) & (~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
|
||||
].copy()
|
||||
|
||||
future_possibilities_gbis = results_df[
|
||||
(results_df["gbis_eligible_future"] == True) & (results_df["eco4_eligible_future"] == False) & (
|
||||
~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
|
||||
].copy()
|
||||
|
||||
total_future_possibilities = future_possibilities_eco.shape[0] + future_possibilities_gbis.shape[0]
|
||||
|
||||
|
||||
def app():
|
||||
data = load_ha_4()
|
||||
|
||||
data = standardise_ha_4(data)
|
||||
|
||||
data["row_id"] = ["h4" + str(i) for i in range(0, len(data))]
|
||||
|
||||
cleaned = read_from_s3(
|
||||
s3_file_name="cleaned_epc_data/cleaned.bson",
|
||||
bucket_name="retrofit-data-dev"
|
||||
)
|
||||
cleaned = msgpack.unpackb(cleaned, raw=False)
|
||||
|
||||
cleaning_data = read_dataframe_from_s3_parquet(
|
||||
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
|
||||
)
|
||||
|
||||
created_at = datetime.now().isoformat()
|
||||
|
||||
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
|
||||
|
||||
results_df, scoring_data, nodata = get_ha_4_data(
|
||||
data=data,
|
||||
cleaned=cleaned,
|
||||
cleaning_data=cleaning_data,
|
||||
created_at=created_at,
|
||||
photo_supply_lookup=photo_supply_lookup,
|
||||
floor_area_decile_thresholds=floor_area_decile_thresholds
|
||||
)
|
||||
|
||||
# Store the data locally as a pickle
|
||||
# import pickle
|
||||
# with open("ha_4.pickle", "wb") as f:
|
||||
# pickle.dump(
|
||||
# {
|
||||
# "results_df": results_df,
|
||||
# "scoring_data": scoring_data,
|
||||
# "nodata": nodata
|
||||
# }, f)
|
||||
|
||||
# Read in
|
||||
# import pickle
|
||||
# with open("ha_4.pickle", "rb") as f:
|
||||
# data = pickle.load(f)
|
||||
# results_df = data["results_df"]
|
||||
# scoring_data = data["scoring_data"]
|
||||
# nodata = data["nodata"]
|
||||
383
etl/eligibility/ha_15_32/ha7_app.py
Normal file
383
etl/eligibility/ha_15_32/ha7_app.py
Normal file
|
|
@ -0,0 +1,383 @@
|
|||
import os
|
||||
import msgpack
|
||||
import openpyxl
|
||||
from openpyxl.styles.colors import COLOR_INDEX
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from utils.s3 import read_from_s3, read_dataframe_from_s3_parquet
|
||||
from utils.logger import setup_logger
|
||||
from dotenv import load_dotenv
|
||||
from tqdm import tqdm
|
||||
from backend.SearchEpc import SearchEpc
|
||||
from etl.eligibility.Eligibility import Eligibility
|
||||
from etl.eligibility.ha_15_32.app import prepare_model_data_row
|
||||
from etl.epc.DataProcessor import DataProcessor
|
||||
from etl.epc.settings import COLUMNS_TO_MERGE_ON
|
||||
from backend.ml_models.api import ModelApi
|
||||
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
|
||||
from recommendations.recommendation_utils import calculate_cavity_age
|
||||
from recommendation_utils import convert_thickness_to_numeric
|
||||
|
||||
ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
|
||||
|
||||
logger = setup_logger()
|
||||
load_dotenv(ENV_FILE)
|
||||
|
||||
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
||||
OS_API_KEY = os.getenv("ORDNANCE_SURVEY_API_KEY")
|
||||
|
||||
|
||||
def load_data():
|
||||
"""
|
||||
Load the data from the excel
|
||||
"""
|
||||
|
||||
workbook = openpyxl.load_workbook('etl/eligibility/ha_15_32/HESTIA - HA 7 ASSET LIST.xlsx')
|
||||
sheet = workbook.active
|
||||
|
||||
# Prepare lists to collect rows data and their colors
|
||||
rows_data = []
|
||||
rows_colors = []
|
||||
for row in sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
|
||||
row_data = [cell.value for cell in row] # This will get you the cell values
|
||||
|
||||
row_color = row[0].fill.start_color.index if row[0].fill.start_color.index != '00000000' else None
|
||||
row_color = COLOR_INDEX[row_color]
|
||||
rows_data.append(row_data)
|
||||
rows_colors.append(row_color)
|
||||
|
||||
df = pd.DataFrame(rows_data, columns=[cell.value for cell in sheet[1]])
|
||||
|
||||
# Add the row colors as a new column
|
||||
df['row_color'] = rows_colors
|
||||
df.columns.values[8] = "is_active"
|
||||
|
||||
# Remove None columns
|
||||
df = df.dropna(axis=1, how='all')
|
||||
# We now parse the colours
|
||||
df["row_color"].unique()
|
||||
df["row_colour_name"] = np.where(
|
||||
df["row_color"] == "0000FFFF", "red",
|
||||
np.where(df["row_color"] == "00FF00FF", "green", "yellow")
|
||||
)
|
||||
df["row_code"] = np.where(
|
||||
df["row_colour_name"] == "red", "invalid",
|
||||
np.where(df["row_colour_name"] == "green", "potential ECO4", "needs criteria change")
|
||||
)
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def get_ha7_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
|
||||
property_type_lookup = {
|
||||
# "Mid Terrace": "Mid-Terrace",
|
||||
# "End Terrace": "End-Terrace",
|
||||
# "Semi Detached": "Semi-Detached",
|
||||
# "Detached": "Detached",
|
||||
"House": "House",
|
||||
"Flat": "Flat",
|
||||
"Bungalow": "Bungalow",
|
||||
"Maisonette": "Maisonette",
|
||||
}
|
||||
|
||||
scoring_data = []
|
||||
results = []
|
||||
nodata = []
|
||||
for _, house in tqdm(data.iterrows(), total=len(data)):
|
||||
|
||||
if house["Address"]:
|
||||
address = house["Address"]
|
||||
else:
|
||||
address = house["Address2"]
|
||||
|
||||
searcher = SearchEpc(
|
||||
address1=address,
|
||||
postcode=house["Postcode"],
|
||||
auth_token=EPC_AUTH_TOKEN,
|
||||
os_api_key=None,
|
||||
property_type=property_type_lookup.get(house["Archetype"]),
|
||||
)
|
||||
|
||||
searcher.find_property(skip_os=True)
|
||||
|
||||
if searcher.newest_epc is None:
|
||||
nodata.append(house["row_id"])
|
||||
continue
|
||||
|
||||
newest_epc = searcher.newest_epc
|
||||
older_epcs = searcher.older_epcs
|
||||
full_sap_epc = searcher.full_sap_epc
|
||||
|
||||
eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
|
||||
eligibility.check_gbis_warmfront()
|
||||
eligibility.check_eco4_warmfront()
|
||||
|
||||
# If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity
|
||||
|
||||
# Loft MUST be suitable
|
||||
cavity_age = None
|
||||
if (
|
||||
eligibility.walls["is_cavity_wall"] and
|
||||
eligibility.walls["is_filled_cavity"] and
|
||||
eligibility.loft["suitability"] and
|
||||
eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
|
||||
):
|
||||
# We check the age of the cavity and if it's particularly old, we flag it
|
||||
cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
|
||||
|
||||
# If the house is not identified, we do a full gbis and eco4 check
|
||||
eligibility.check_gbis()
|
||||
eligibility.check_eco4()
|
||||
|
||||
if eligibility.eco4_warmfront["eligible"]:
|
||||
scoring_dictionary = prepare_model_data_row(
|
||||
property_id=house["row_id"],
|
||||
modelling_epc=eligibility.epc,
|
||||
cleaned=cleaned,
|
||||
cleaning_data=cleaning_data,
|
||||
created_at=created_at,
|
||||
old_data=older_epcs,
|
||||
full_sap_epc=full_sap_epc,
|
||||
photo_supply_lookup=photo_supply_lookup,
|
||||
floor_area_decile_thresholds=floor_area_decile_thresholds
|
||||
)
|
||||
scoring_data.extend(scoring_dictionary)
|
||||
|
||||
# If nothing is eligible or gbis is eligible, then we make a record this
|
||||
results.append(
|
||||
{
|
||||
"row_id": house["row_id"],
|
||||
"address": house["Address"],
|
||||
"postcode": house["Postcode"],
|
||||
"gbis_eligible": eligibility.gbis_warmfront,
|
||||
"eco4_eligible": eligibility.eco4_warmfront["eligible"],
|
||||
"eco4_message": eligibility.eco4_warmfront["message"],
|
||||
"sap": float(eligibility.epc["current-energy-efficiency"]),
|
||||
"gbis_eligible_future": eligibility.gbis["eligible"],
|
||||
"gbis_eligible_future_message": eligibility.gbis["message"],
|
||||
"eco4_eligible_future": eligibility.eco4["eligible"],
|
||||
"eco4_eligible_future_message": eligibility.eco4["message"],
|
||||
# Property components
|
||||
"roof": eligibility.roof["clean_description"],
|
||||
"walls": eligibility.walls["clean_description"],
|
||||
"heating": eligibility.epc["mainheat-description"],
|
||||
"tenure": eligibility.tenure,
|
||||
"date_epc": eligibility.epc["lodgement-date"],
|
||||
**newest_epc,
|
||||
"cavity_age": cavity_age,
|
||||
**eligibility.walls,
|
||||
**eligibility.roof,
|
||||
}
|
||||
)
|
||||
|
||||
scoring_df = pd.DataFrame(scoring_data)
|
||||
# Implement the same process that is being used in the recommendation engine to cleaning scoring_df
|
||||
|
||||
# Perform the same cleaning as in the model - first clean number of room variables though
|
||||
scoring_df = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=scoring_df,
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
|
||||
colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
|
||||
)
|
||||
|
||||
scoring_df = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=scoring_df,
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"],
|
||||
).drop(columns=["LOCAL_AUTHORITY"])
|
||||
|
||||
scoring_df = DataProcessor.clean_missings_after_description_process(
|
||||
scoring_df,
|
||||
ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or (
|
||||
"insulation_thickness" in c) or ("ENERGY_EFF" in c)]
|
||||
)
|
||||
|
||||
scoring_df = DataProcessor.clean_efficiency_variables(scoring_df)
|
||||
|
||||
model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at)
|
||||
all_predictions = model_api.predict_all(
|
||||
df=scoring_df,
|
||||
bucket="retrofit-data-dev",
|
||||
prediction_buckets={
|
||||
"sap_change_predictions": "retrofit-sap-predictions-dev",
|
||||
"heat_demand_predictions": "retrofit-heat-predictions-dev",
|
||||
"carbon_change_predictions": "retrofit-carbon-predictions-dev"
|
||||
}
|
||||
)
|
||||
|
||||
predictions = all_predictions["sap_change_predictions"].copy()
|
||||
|
||||
results_df = pd.DataFrame(results)
|
||||
|
||||
predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
|
||||
results_df[["row_id", "sap"]], how="left", on="row_id"
|
||||
)
|
||||
predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
|
||||
predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
|
||||
|
||||
results_df = results_df.merge(
|
||||
predictions[["sap_uplift", "row_id"]],
|
||||
how="left",
|
||||
on="row_id"
|
||||
)
|
||||
|
||||
results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
|
||||
|
||||
eligibility_assessment = []
|
||||
for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
|
||||
# The upgrade requirements are dependent on the current SAP
|
||||
|
||||
# If the property is an F or G, it only needs to upgrade to an %
|
||||
if row["sap"] <= 38:
|
||||
if row["post_install_sap"] >= 57:
|
||||
eligibility_classification = "highest confidence"
|
||||
elif row["post_install_sap"] >= 55:
|
||||
eligibility_classification = "high confidence"
|
||||
elif row["post_install_sap"] >= 53:
|
||||
eligibility_classification = "medium confidence"
|
||||
else:
|
||||
eligibility_classification = "unlikely"
|
||||
else:
|
||||
|
||||
if row["post_install_sap"] >= 71:
|
||||
eligibility_classification = "highest confidence"
|
||||
elif row["post_install_sap"] >= 69:
|
||||
eligibility_classification = "high confidence"
|
||||
elif row["post_install_sap"] >= 67:
|
||||
eligibility_classification = "medium confidence"
|
||||
else:
|
||||
eligibility_classification = "unlikely"
|
||||
|
||||
eligibility_assessment.append(
|
||||
{
|
||||
"row_id": row["row_id"],
|
||||
"eligibility_classification": eligibility_classification
|
||||
}
|
||||
)
|
||||
|
||||
eligibility_assessment = pd.DataFrame(eligibility_assessment)
|
||||
|
||||
results_df = results_df.merge(
|
||||
eligibility_assessment, how="left", on="row_id"
|
||||
)
|
||||
|
||||
return results_df, scoring_data, nodata
|
||||
|
||||
|
||||
def analyse_ha_7(results_df, data):
|
||||
analysis_data = results_df.merge(
|
||||
data[["row_id", "row_code", "Property Type", "Construction Year Band"]], how="left", on="row_id"
|
||||
)
|
||||
|
||||
analysis_data["row_code"].value_counts()
|
||||
|
||||
# NEW
|
||||
|
||||
analysis_data["roof_insulation_thickness"] = np.where(
|
||||
pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
|
||||
)
|
||||
analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
|
||||
lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
|
||||
)
|
||||
|
||||
ideal_eco4 = analysis_data[
|
||||
(analysis_data["eco4_eligible"] == True) & (
|
||||
analysis_data["roof_insulation_thickness_numeric"] <= 100)
|
||||
]
|
||||
|
||||
secondary_eco4_warmfront_not_sold = analysis_data[
|
||||
(analysis_data["eco4_eligible"] == True) & (
|
||||
analysis_data["roof_insulation_thickness_numeric"] > 100)
|
||||
]
|
||||
|
||||
# underperforming cavities
|
||||
underperforming_cavities = analysis_data[
|
||||
(analysis_data["eco4_message"] == "Failed due to full cavity - check cavity age") & (
|
||||
analysis_data["cavity_age"] > 9 * 365
|
||||
) & (analysis_data["roof_insulation_thickness_numeric"] <= 100)
|
||||
]
|
||||
|
||||
identified_gbis_not_sold = analysis_data[
|
||||
(analysis_data["gbis_eligible"] == True) & (
|
||||
analysis_data["eco4_eligible"] == False
|
||||
)
|
||||
]
|
||||
|
||||
wf_identified = analysis_data[
|
||||
(analysis_data["row_code"] == "potential ECO4")
|
||||
]
|
||||
|
||||
# END NEW
|
||||
|
||||
warmfront_identification = analysis_data["row_code"].value_counts()
|
||||
warmfront_identified = analysis_data[analysis_data["row_code"] == "potential ECO4"]
|
||||
warmfront_identified["walls"].value_counts(normalize=True)
|
||||
|
||||
analysis_data["Construction Year Band"].value_counts(normalize=True)
|
||||
|
||||
# Number of days from today
|
||||
|
||||
days_to_today = (datetime.now() - pd.to_datetime(warmfront_identified["date_epc"])).dt.days
|
||||
days_to_today.mean()
|
||||
|
||||
property_types = analysis_data["Property Type"].value_counts()
|
||||
|
||||
n_identified = (results_df["gbis_eligible"] | results_df["eco4_eligible"]).sum()
|
||||
|
||||
eco_identified = results_df[results_df["eco4_eligible"]]
|
||||
n_eco4 = eco_identified["eco4_eligible"].sum()
|
||||
gbis_identified = results_df[~results_df["eco4_eligible"] & results_df["gbis_eligible"]]
|
||||
n_gbis = results_df[~results_df["eco4_eligible"]]["gbis_eligible"].sum()
|
||||
|
||||
eco_eligibile = results_df[results_df["eco4_eligible"]]
|
||||
eco_eligibile["eligibility_classification"].value_counts()
|
||||
|
||||
future_possibilities_eco = results_df[
|
||||
(results_df["eco4_eligible_future"] == True) & (~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
|
||||
].copy()
|
||||
|
||||
future_possibilities_gbis = results_df[
|
||||
(results_df["gbis_eligible_future"] == True) & (results_df["eco4_eligible_future"] == False) & (
|
||||
~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))
|
||||
].copy()
|
||||
|
||||
total_future_possibilities = future_possibilities_eco.shape[0] + future_possibilities_gbis.shape[0]
|
||||
|
||||
|
||||
def app():
|
||||
data = load_data()
|
||||
data["row_id"] = ["ha7" + str(i) for i in range(0, len(data))]
|
||||
|
||||
cleaned = read_from_s3(
|
||||
s3_file_name="cleaned_epc_data/cleaned.bson",
|
||||
bucket_name="retrofit-data-dev"
|
||||
)
|
||||
cleaned = msgpack.unpackb(cleaned, raw=False)
|
||||
|
||||
cleaning_data = read_dataframe_from_s3_parquet(
|
||||
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
|
||||
)
|
||||
|
||||
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
|
||||
|
||||
created_at = datetime.now().isoformat()
|
||||
|
||||
results_df, scoring_data, nodata = get_ha7_data(
|
||||
data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds
|
||||
)
|
||||
|
||||
# Pickle results
|
||||
# import pickle
|
||||
# with open("ha7_results_jan_10.pkl", "wb") as f:
|
||||
# pickle.dump({"results_df": results_df, "scoring_data": scoring_data, "nodata": nodata}, f)
|
||||
|
||||
# Read in the old data
|
||||
# import pickle
|
||||
# with open("ha7_results_jan_10.pkl", "rb") as f:
|
||||
# old_data = pickle.load(f)
|
||||
# results_df = old_data["results_df"]
|
||||
# scoring_data = old_data["scoring_data"]
|
||||
# nodata = old_data["nodata"]
|
||||
|
|
@ -766,12 +766,16 @@ class EPCDataProcessor:
|
|||
how='left'
|
||||
)
|
||||
|
||||
global_averages = cleaning_data[cols_to_clean].mean()
|
||||
|
||||
# Fill NaN values with averages
|
||||
for col in cols_to_clean:
|
||||
data_to_clean[col].fillna(data_to_clean[f"{col}_AVERAGE"], inplace=True)
|
||||
data_to_clean.drop(columns=[f"{col}_AVERAGE"], inplace=True)
|
||||
# If we still have missings
|
||||
data_to_clean[col].fillna(data_to_clean[col].mean(), inplace=True)
|
||||
# Final step if we still have missings - use global mean
|
||||
data_to_clean[col].fillna(global_averages[col], inplace=True)
|
||||
|
||||
return data_to_clean
|
||||
|
||||
|
|
|
|||
|
|
@ -23,6 +23,12 @@ def main():
|
|||
pd.DataFrame(epc_pipeline.compiled_all_equal_rows).to_parquet("refactor_datasets/all_equal_rows.parquet")
|
||||
pd.concat(epc_pipeline.compiled_cleaning_averages).to_parquet("refactor_datasets/cleaning_averages.parquet")
|
||||
|
||||
from utils.s3 import read_dataframe_from_s3_parquet
|
||||
dataset = read_dataframe_from_s3_parquet(
|
||||
bucket_name="retrofit-data-dev",
|
||||
file_key="sap_change_model/dataset_test.parquet",
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ class MainHeatAttributes(Definitions):
|
|||
"solar assisted heat pump",
|
||||
"exhaust source heat pump",
|
||||
"community heat pump",
|
||||
"portable electric heating"
|
||||
]
|
||||
FUEL_TYPES = ["electric", "mains gas", "wood logs", "coal", "oil", "wood pellets", "anthracite",
|
||||
"dual fuel mineral and wood", "smokeless fuel", "lpg", "b30k"]
|
||||
|
|
|
|||
|
|
@ -152,4 +152,7 @@ class WallAttributes(Definitions):
|
|||
else:
|
||||
result["insulation_thickness"] = "average"
|
||||
|
||||
if result["is_cavity_wall"] & result["is_as_built"] & (result["insulation_thickness"] == "average"):
|
||||
result["is_filled_cavity"] = True
|
||||
|
||||
return result
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ class WindowAttributes(Definitions):
|
|||
raise ValueError('Invalid description')
|
||||
|
||||
def process(self) -> Dict[str, Union[str, bool]]:
|
||||
result: Dict[str, Union[str, bool]] = {
|
||||
result: Dict[str, Union[str, bool, None]] = {
|
||||
"has_glazing": False,
|
||||
"glazing_coverage": None,
|
||||
"glazing_type": None,
|
||||
|
|
@ -80,7 +80,11 @@ class WindowAttributes(Definitions):
|
|||
break
|
||||
|
||||
# If we didn't find any coverage or type, we assume full coverage
|
||||
if not result["glazing_coverage"]:
|
||||
if (not result["glazing_coverage"]) & (result["glazing_type"] != "single"):
|
||||
result["glazing_coverage"] = "full"
|
||||
|
||||
# We reset some values if the glazing is single
|
||||
if result["glazing_type"] == "single":
|
||||
result["has_glazing"] = False
|
||||
|
||||
return result
|
||||
|
|
|
|||
|
|
@ -1652,4 +1652,17 @@ mainheat_cases = [
|
|||
'has_electricaire': False, 'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False,
|
||||
"has_electric_heat_pumps": False,
|
||||
"has_micro-cogeneration": False},
|
||||
{'original_description': 'Portable electric heating assumed for most rooms', 'has_radiators': False,
|
||||
'has_fan_coil_units': False, 'has_pipes_in_screed_above_insulation': False,
|
||||
'has_pipes_in_insulated_timber_floor': False, 'has_pipes_in_concrete_slab': False, 'has_boiler': False,
|
||||
'has_air_source_heat_pump': False, 'has_room_heaters': False, 'has_electric_storage_heaters': False,
|
||||
'has_warm_air': False, 'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False,
|
||||
'has_community_scheme': False, 'has_ground_source_heat_pump': False, 'has_no_system_present': False,
|
||||
'has_portable_electric_heaters': False, 'has_water_source_heat_pump': False, 'has_electric_heat_pump': False,
|
||||
'has_micro-cogeneration': False, 'has_solar_assisted_heat_pump': False, 'has_exhaust_source_heat_pump': False,
|
||||
'has_community_heat_pump': False, 'has_portable_electric_heating': True, 'has_electric': True,
|
||||
'has_mains_gas': False, 'has_wood_logs': False, 'has_coal': False, 'has_oil': False, 'has_wood_pellets': False,
|
||||
'has_anthracite': False, 'has_dual_fuel_mineral_and_wood': False, 'has_smokeless_fuel': False, 'has_lpg': False,
|
||||
'has_b30k': False, 'has_assumed': True, 'has_electricaire': False, 'has_assumed_for_most_rooms': True,
|
||||
'has_underfloor_heating': False}
|
||||
]
|
||||
|
|
|
|||
|
|
@ -550,7 +550,7 @@ wall_cases = [
|
|||
'is_as_built': False, 'is_cob': False, 'is_assumed': False, 'is_sandstone_or_limestone': False,
|
||||
'insulation_thickness': None, 'external_insulation': False, 'internal_insulation': False},
|
||||
{'original_description': 'Cavity wall, as built, insulated (assumed)', 'thermal_transmittance': None,
|
||||
'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': False, 'is_solid_brick': False,
|
||||
'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': True, 'is_solid_brick': False,
|
||||
'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': True,
|
||||
'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average',
|
||||
'external_insulation': False, 'internal_insulation': False},
|
||||
|
|
@ -727,7 +727,7 @@ wall_cases = [
|
|||
'external_insulation': False, 'internal_insulation': False},
|
||||
{'original_description': 'Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)',
|
||||
'thermal_transmittance': None,
|
||||
'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': False, 'is_solid_brick': False,
|
||||
'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': True, 'is_solid_brick': False,
|
||||
'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': True,
|
||||
'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average',
|
||||
'external_insulation': False, 'internal_insulation': False},
|
||||
|
|
|
|||
|
|
@ -30,7 +30,8 @@ windows_cases = [
|
|||
'glazing_type': 'triple', 'no_data': False},
|
||||
{'original_description': 'Gwydrau triphlyg rhannol', 'has_glazing': True, 'glazing_coverage': 'partial',
|
||||
'glazing_type': 'triple', 'no_data': False},
|
||||
{'original_description': 'Single glazed', 'has_glazing': True, 'glazing_coverage': 'full', 'glazing_type': 'single',
|
||||
{'original_description': 'Single glazed', 'has_glazing': False, 'glazing_coverage': None,
|
||||
'glazing_type': 'single',
|
||||
'no_data': False},
|
||||
{'original_description': 'Some double glazing', 'has_glazing': True, 'glazing_coverage': 'partial',
|
||||
'glazing_type': 'double', 'no_data': False},
|
||||
|
|
@ -46,7 +47,8 @@ windows_cases = [
|
|||
'glazing_type': 'double', 'no_data': False},
|
||||
{'original_description': 'Gwydrau dwbl gan mwyaf', 'has_glazing': True, 'glazing_coverage': 'most',
|
||||
'glazing_type': 'double', 'no_data': False},
|
||||
{'original_description': 'Gwydrau sengl', 'has_glazing': True, 'glazing_coverage': 'full', 'glazing_type': 'single',
|
||||
{'original_description': 'Gwydrau sengl', 'has_glazing': False, 'glazing_coverage': None,
|
||||
'glazing_type': 'single',
|
||||
'no_data': False},
|
||||
{'original_description': 'Ffenestri perfformiad uchel', 'has_glazing': True, 'glazing_coverage': 'full',
|
||||
'glazing_type': 'high performance', 'no_data': False},
|
||||
|
|
|
|||
|
|
@ -3,12 +3,13 @@ from pathlib import Path
|
|||
from etl.epc_clean.tests.test_data.test_roof_attributes_cases import clean_roof_test_cases
|
||||
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
|
||||
|
||||
|
||||
# For local testing
|
||||
if __file__ == "<input>":
|
||||
input_data_path = Path("./model_data/tests/test_data/EpcClean_inputs.obj")
|
||||
else:
|
||||
current_file_path = Path(__file__)
|
||||
input_data_path = current_file_path.parent / 'test_data' / 'EpcClean_inputs.obj'
|
||||
# if __file__ == "<input>":
|
||||
# input_data_path = Path("./model_data/tests/test_data/EpcClean_inputs.obj")
|
||||
# else:
|
||||
# current_file_path = Path(__file__)
|
||||
# input_data_path = current_file_path.parent / 'test_data' / 'EpcClean_inputs.obj'
|
||||
|
||||
|
||||
class TestRoofAttributes:
|
||||
|
|
@ -88,7 +89,12 @@ class TestRoofAttributes:
|
|||
|
||||
def test_clean_roof_no_description(self):
|
||||
roof = RoofAttributes('').process()
|
||||
assert roof == {}
|
||||
assert roof == {
|
||||
'thermal_transmittance': False, 'thermal_transmittance_unit': False, 'is_pitched': False,
|
||||
'is_roof_room': False, 'is_loft': False, 'is_flat': False, 'is_thatched': False,
|
||||
'is_at_rafters': False, 'is_assumed': False, 'has_dwelling_above': False, 'is_valid': False,
|
||||
'insulation_thickness': False
|
||||
}
|
||||
|
||||
def test_clean_roof_edge_cases(self):
|
||||
# Insulation thickness edge case
|
||||
|
|
|
|||
244
etl/solar/SolarPhotoSupply.py
Normal file
244
etl/solar/SolarPhotoSupply.py
Normal file
|
|
@ -0,0 +1,244 @@
|
|||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
from utils.s3 import save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
|
||||
from utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
class SolarPhotoSupply:
|
||||
DATASET_COLUMNS = [
|
||||
"UPRN", "PROPERTY_TYPE", "TENURE", "BUILT_FORM", "ROOF_DESCRIPTION", "PHOTO_SUPPLY", "TOTAL_FLOOR_AREA",
|
||||
"CONSTRUCTION_AGE_BAND", "SOLAR_WATER_HEATING_FLAG"
|
||||
]
|
||||
|
||||
def __init__(self, file_directories, cleaned_lookup):
|
||||
"""
|
||||
Initialize the SolarPhotoSupply class with file directories and a cleaned lookup. Currently, this class
|
||||
just works with locally stored data, but this could be extended to work with data stored in S3.
|
||||
|
||||
:param file_directories: A list of directories where files are stored.
|
||||
:param cleaned_lookup: A dictionary containing cleaned lookup data.
|
||||
"""
|
||||
self.file_directories = file_directories
|
||||
|
||||
self.results = []
|
||||
self.decile_thresholds = None
|
||||
|
||||
self.roof_lookup = pd.DataFrame(cleaned_lookup.get("roof-description"))
|
||||
|
||||
self.photo_supply_lookup = pd.DataFrame()
|
||||
self.floor_area_decile_thresholds = pd.DataFrame()
|
||||
|
||||
def create_dataset(self):
|
||||
"""
|
||||
Create a dataset from the provided file directories. This method processes the data files,
|
||||
applies transformations, and aggregates data into a useful format.
|
||||
"""
|
||||
|
||||
if self.roof_lookup.empty:
|
||||
raise ValueError("No roof lookup data")
|
||||
|
||||
results = []
|
||||
|
||||
logger.info("Creating solar photo supply dataset")
|
||||
for dir in tqdm(self.file_directories):
|
||||
filepath = dir / "certificates.csv"
|
||||
df = pd.read_csv(filepath, low_memory=False)
|
||||
df = df[~pd.isnull(df["UPRN"])]
|
||||
df["UPRN"] = df["UPRN"].astype(int).astype(str)
|
||||
# Drop rows that have a missing PROPERTY_TYPE, BUILT_FORM, CONSTRUCTION_AGE_BAND, TOTAL_FLOOR_AREA
|
||||
for col in ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA"]:
|
||||
df = df[~pd.isnull(df[col])]
|
||||
# Take newest LODGEMENT_DATE per UPRN
|
||||
df = df.sort_values(by="LODGEMENT_DATE", ascending=False).drop_duplicates(subset=["UPRN"])
|
||||
|
||||
data = df[self.DATASET_COLUMNS].copy()
|
||||
data["PHOTO_SUPPLY"] = data["PHOTO_SUPPLY"].fillna(0)
|
||||
data = data[data["PHOTO_SUPPLY"] != 0]
|
||||
results.append(data)
|
||||
|
||||
self.results = pd.concat(results)
|
||||
|
||||
# Convert total floor area to deciles
|
||||
self.decile_thresholds = self.results["TOTAL_FLOOR_AREA"].quantile(
|
||||
[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
|
||||
).values
|
||||
|
||||
self.results["floor_area_decile"] = pd.cut(
|
||||
self.results["TOTAL_FLOOR_AREA"],
|
||||
bins=[0] + list(self.decile_thresholds) + [float('inf')],
|
||||
labels=False,
|
||||
include_lowest=True
|
||||
)
|
||||
|
||||
# Convert tenure to lower
|
||||
self.results["TENURE"] = self.results["TENURE"].str.lower()
|
||||
|
||||
self.results = self.results.merge(
|
||||
self.roof_lookup.drop(
|
||||
columns=[
|
||||
"clean_description", "thermal_transmittance", "thermal_transmittance_unit", "insulation_thickness",
|
||||
"is_assumed"
|
||||
]
|
||||
),
|
||||
left_on="ROOF_DESCRIPTION",
|
||||
right_on="original_description",
|
||||
how="left"
|
||||
)
|
||||
|
||||
self.photo_supply_lookup = self.results.groupby(
|
||||
[
|
||||
"PROPERTY_TYPE", "BUILT_FORM", "TENURE", "is_pitched", "is_roof_room", "is_flat",
|
||||
"CONSTRUCTION_AGE_BAND", "floor_area_decile"
|
||||
],
|
||||
observed=True
|
||||
).agg(
|
||||
{
|
||||
"PHOTO_SUPPLY": ["median", "mean"],
|
||||
}
|
||||
).reset_index()
|
||||
|
||||
self.photo_supply_lookup.columns = ['_'.join(col).strip() for col in self.photo_supply_lookup.columns.values]
|
||||
# Remove trailing underscore from columns
|
||||
self.photo_supply_lookup.columns = [
|
||||
col[:-1] if col.endswith("_") else col for col in self.photo_supply_lookup.columns.values
|
||||
]
|
||||
# Convert columns to lowercase
|
||||
self.photo_supply_lookup.columns = [col.lower() for col in self.photo_supply_lookup.columns.values]
|
||||
|
||||
self.floor_area_decile_thresholds = pd.DataFrame(
|
||||
self.decile_thresholds,
|
||||
columns=["floor_area_decile_thresholds"]
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def classify_floor_area(new_area, thresholds):
|
||||
"""
|
||||
Classify a given floor area into a decile based on provided thresholds.
|
||||
|
||||
:param new_area: The new floor area to be classified.
|
||||
:param thresholds: A list of thresholds used for classification.
|
||||
:return: An integer representing the decile index.
|
||||
"""
|
||||
|
||||
for i, threshold in enumerate(thresholds):
|
||||
if new_area <= threshold:
|
||||
return i # Returns the decile index (0 to 9)
|
||||
return len(thresholds)
|
||||
|
||||
def save(self):
|
||||
"""
|
||||
Save the processed data to an S3 bucket in the parquet format. This method also handles
|
||||
logging and validation to ensure data is present before saving.
|
||||
"""
|
||||
if self.photo_supply_lookup.empty:
|
||||
raise ValueError("No data to save")
|
||||
|
||||
logger.info("Storing outputs to S3")
|
||||
# Store this data in s3 as a parquet file
|
||||
|
||||
save_dataframe_to_s3_parquet(
|
||||
df=self.photo_supply_lookup,
|
||||
bucket_name="retrofit-data-dev",
|
||||
file_key="solar_pv_supply/photo_supply_lookup.parquet",
|
||||
)
|
||||
|
||||
save_dataframe_to_s3_parquet(
|
||||
df=self.floor_area_decile_thresholds,
|
||||
bucket_name="retrofit-data-dev",
|
||||
file_key=f"solar_pv_supply/floor_area_decile_thresholds.parquet",
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def load(bucket):
|
||||
"""
|
||||
Load datasets from an S3 bucket.
|
||||
|
||||
:param bucket: The name of the S3 bucket to load data from.
|
||||
:return: A tuple containing photo supply lookup and floor area decile thresholds dataframes.
|
||||
"""
|
||||
photo_supply_lookup = read_dataframe_from_s3_parquet(
|
||||
bucket_name=bucket, file_key="solar_pv_supply/photo_supply_lookup.parquet",
|
||||
)
|
||||
floor_area_decile_thresholds = read_dataframe_from_s3_parquet(
|
||||
bucket_name=bucket, file_key="solar_pv_supply/floor_area_decile_thresholds.parquet",
|
||||
)
|
||||
|
||||
return photo_supply_lookup, floor_area_decile_thresholds
|
||||
|
||||
@classmethod
|
||||
def filter_photo_supply_lookup(
|
||||
cls,
|
||||
photo_supply_lookup: pd.DataFrame,
|
||||
floor_area_decile_thresholds: pd.DataFrame,
|
||||
tenure: str,
|
||||
built_form: str,
|
||||
property_type: str,
|
||||
construction_age_band: str,
|
||||
is_flat: bool,
|
||||
is_pitched: bool,
|
||||
is_roof_room: bool,
|
||||
floor_area: float
|
||||
):
|
||||
|
||||
"""
|
||||
Filter the photo supply lookup to find the most appropriate photo supply for a given property.
|
||||
:param photo_supply_lookup: The photo supply lookup dataframe.
|
||||
:param floor_area_decile_thresholds: The floor area decile thresholds dataframe.
|
||||
:param tenure: The tenure of the property.
|
||||
:param built_form: The built form of the property.
|
||||
:param property_type: The property type of the property.
|
||||
:param construction_age_band: The construction age band of the property.
|
||||
:param is_flat: Whether the property has a flat roof.
|
||||
:param is_pitched: Whether the property has a pitched roof.
|
||||
:param is_roof_room: Whether the property has a roof room.
|
||||
:param floor_area: The floor area of the property.
|
||||
:return:
|
||||
"""
|
||||
|
||||
# Convert the tenure to lower case, as is done in the creation of the dataset
|
||||
tenure = tenure.lower()
|
||||
# We remap the "not defined"
|
||||
tenure = {
|
||||
"not defined - use in the case of a new dwelling for which the intended tenure in not known. it is not to "
|
||||
"be used for an existing dwelling":
|
||||
"not defined - use in the case of a new dwelling for which the intended tenure in not known. it is no"
|
||||
}.get(tenure, tenure)
|
||||
|
||||
photo_supply_matched = photo_supply_lookup[
|
||||
(photo_supply_lookup["tenure"] == tenure) &
|
||||
(photo_supply_lookup["built_form"] == built_form) &
|
||||
(photo_supply_lookup["property_type"] == property_type) &
|
||||
(photo_supply_lookup["construction_age_band"] == construction_age_band) &
|
||||
(photo_supply_lookup["is_flat"] == is_flat) &
|
||||
(photo_supply_lookup["is_pitched"] == is_pitched) &
|
||||
(photo_supply_lookup["is_roof_room"] == is_roof_room)
|
||||
]
|
||||
|
||||
if photo_supply_matched.empty:
|
||||
# There are a small number of cases where we don't get a full match so try again with a more aggregated
|
||||
# average
|
||||
photo_supply_matched = photo_supply_lookup[
|
||||
(photo_supply_lookup["tenure"] == tenure) &
|
||||
(photo_supply_lookup["built_form"] == built_form) &
|
||||
(photo_supply_lookup["property_type"] == property_type)
|
||||
]
|
||||
if construction_age_band in photo_supply_matched["construction_age_band"].values:
|
||||
photo_supply_matched = photo_supply_matched[
|
||||
photo_supply_matched["construction_age_band"] == construction_age_band
|
||||
]
|
||||
|
||||
if photo_supply_matched.empty:
|
||||
raise ValueError("No photo supply matches")
|
||||
|
||||
floor_area_decile = cls.classify_floor_area(
|
||||
floor_area, floor_area_decile_thresholds["floor_area_decile_thresholds"].values
|
||||
)
|
||||
|
||||
if floor_area_decile in photo_supply_matched["floor_area_decile"].values:
|
||||
photo_supply_matched = photo_supply_matched[
|
||||
photo_supply_matched["floor_area_decile"] == floor_area_decile
|
||||
]
|
||||
|
||||
return photo_supply_matched
|
||||
31
etl/solar/app.py
Normal file
31
etl/solar/app.py
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
from pathlib import Path
|
||||
from etl.epc.property_change_app import get_cleaned
|
||||
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
|
||||
|
||||
DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"
|
||||
|
||||
|
||||
def app():
|
||||
"""
|
||||
This code reads in the EPC data and attempt to produce a reasonable figure for the photo-supply variable, which
|
||||
is the following:
|
||||
"Percentage of photovoltaic area as a percentage of total roof area. 0% indicates that a Photovoltaic Supply
|
||||
is not present in the property."
|
||||
|
||||
When recommending solar, we want to simulate the retrofit by increasing this value from 0, so we need a sensible
|
||||
figure to increase this to. This script will pull the data for that, to allow us to try and deduce what
|
||||
a sensible figure would be
|
||||
:return:
|
||||
"""
|
||||
|
||||
directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
|
||||
cleaned_lookup = get_cleaned()
|
||||
|
||||
solar_data_client = SolarPhotoSupply(
|
||||
file_directories=directories,
|
||||
cleaned_lookup=cleaned_lookup
|
||||
)
|
||||
|
||||
solar_data_client.create_dataset()
|
||||
|
||||
solar_data_client.save()
|
||||
109
etl/solar/tests/test_solar_photo_supply.py
Normal file
109
etl/solar/tests/test_solar_photo_supply.py
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
import unittest
|
||||
import pandas as pd
|
||||
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
|
||||
|
||||
|
||||
class TestSolarPhotoSupply(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
# Mock data for photo_supply_lookup and floor_area_decile_thresholds
|
||||
self.photo_supply_lookup = pd.DataFrame({
|
||||
"tenure": ["leasehold", "freehold"],
|
||||
"built_form": ["detached", "semi-detached"],
|
||||
"property_type": ["house", "flat"],
|
||||
"construction_age_band": ["pre-1900", "1900-1929"],
|
||||
"is_flat": [False, True],
|
||||
"is_pitched": [True, False],
|
||||
"is_roof_room": [False, True],
|
||||
"floor_area_decile": [0, 1],
|
||||
"photo_supply": [100, 200]
|
||||
})
|
||||
|
||||
self.floor_area_decile_thresholds = pd.DataFrame({
|
||||
"floor_area_decile_thresholds": [50, 100]
|
||||
})
|
||||
|
||||
self.solar_photo_supply = SolarPhotoSupply([], {})
|
||||
|
||||
def test_correct_filtering(self):
|
||||
result = self.solar_photo_supply.filter_photo_supply_lookup(
|
||||
self.photo_supply_lookup,
|
||||
self.floor_area_decile_thresholds,
|
||||
"leasehold",
|
||||
"detached",
|
||||
"house",
|
||||
"pre-1900",
|
||||
False,
|
||||
True,
|
||||
False,
|
||||
45
|
||||
)
|
||||
self.assertEqual(len(result), 1)
|
||||
self.assertEqual(result.iloc[0]["photo_supply"], 100)
|
||||
|
||||
def test_no_matches(self):
|
||||
with self.assertRaises(ValueError):
|
||||
self.solar_photo_supply.filter_photo_supply_lookup(
|
||||
self.photo_supply_lookup,
|
||||
self.floor_area_decile_thresholds,
|
||||
"leasehold",
|
||||
"unknown",
|
||||
"house",
|
||||
"pre-1900",
|
||||
False,
|
||||
True,
|
||||
False,
|
||||
45
|
||||
)
|
||||
|
||||
def test_floor_area_decile_matching(self):
|
||||
result = self.solar_photo_supply.filter_photo_supply_lookup(
|
||||
self.photo_supply_lookup,
|
||||
self.floor_area_decile_thresholds,
|
||||
"freehold",
|
||||
"semi-detached",
|
||||
"flat",
|
||||
"1900-1929",
|
||||
True,
|
||||
False,
|
||||
True,
|
||||
60
|
||||
)
|
||||
self.assertEqual(len(result), 1)
|
||||
self.assertEqual(result.iloc[0]["photo_supply"], 200)
|
||||
|
||||
def test_invalid_parameters(self):
|
||||
with self.assertRaises(AttributeError):
|
||||
self.solar_photo_supply.filter_photo_supply_lookup(
|
||||
self.photo_supply_lookup,
|
||||
self.floor_area_decile_thresholds,
|
||||
123, # Invalid type for tenure
|
||||
"detached",
|
||||
"house",
|
||||
"pre-1900",
|
||||
False,
|
||||
True,
|
||||
False,
|
||||
45
|
||||
)
|
||||
|
||||
def test_classify_floor_area(self):
|
||||
# Setup
|
||||
thresholds = [10, 20, 30, 40, 50]
|
||||
solar_photo_supply = SolarPhotoSupply([], {})
|
||||
|
||||
# Test Case 1: Valid floor area
|
||||
floor_area = 25
|
||||
expected_decile = 2
|
||||
result = solar_photo_supply.classify_floor_area(floor_area, thresholds)
|
||||
self.assertEqual(result, expected_decile, "Decile classification did not match expected result")
|
||||
|
||||
# Test Case 2: Out of range floor area
|
||||
floor_area = 60
|
||||
expected_decile = len(thresholds)
|
||||
result = solar_photo_supply.classify_floor_area(floor_area, thresholds)
|
||||
self.assertEqual(result, expected_decile, "Decile classification for out of range value is incorrect")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
194
etl/testing_data/estimate_epc.py
Normal file
194
etl/testing_data/estimate_epc.py
Normal file
|
|
@ -0,0 +1,194 @@
|
|||
from pathlib import Path
|
||||
from random import choices, sample
|
||||
|
||||
import os
|
||||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
from dotenv import load_dotenv
|
||||
from utils.logger import setup_logger
|
||||
from backend.SearchEpc import SearchEpc, vartypes
|
||||
from BaseUtility import Definitions
|
||||
from etl.epc.settings import BUILT_FORM_REMAP
|
||||
|
||||
ENV_FILE = Path(__file__).parent / "backend" / ".env"
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"
|
||||
DIR_SAMPLE_SIZE = 500
|
||||
N_DIRECTORIES = 50
|
||||
|
||||
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
||||
|
||||
load_dotenv(ENV_FILE)
|
||||
|
||||
CATETORICALS_TO_IGNORE = [
|
||||
"postcode", "constituency", "local-authority", "built-form", "property-type", "address1", "constituency-label",
|
||||
"building-reference-number", "address2", "posttown", "transaction-type", "lmk-key", "address3",
|
||||
"local-authority-label", "county",
|
||||
]
|
||||
|
||||
|
||||
def check_numeric_performance(estimated_value, actual_value):
|
||||
# If we don't have anything to compare against, return None
|
||||
if pd.isnull(actual_value):
|
||||
return None
|
||||
|
||||
if pd.isnull(estimated_value):
|
||||
return 1
|
||||
|
||||
if actual_value == 0 and estimated_value == 0:
|
||||
return 0
|
||||
|
||||
if actual_value == 0 and estimated_value != 0:
|
||||
return 1
|
||||
|
||||
return abs(estimated_value - actual_value) / actual_value
|
||||
|
||||
|
||||
def app():
|
||||
"""
|
||||
This script is used to test the EPC estimation process.
|
||||
"""
|
||||
|
||||
numerical_vartypes = {key: value for key, value in vartypes.items() if value in ["float", "Int64"]}
|
||||
str_var_types = {key: value for key, value in vartypes.items() if value == "str"}
|
||||
# Make sure we have missed any keys
|
||||
if len(numerical_vartypes) + len(str_var_types) != len(vartypes):
|
||||
raise ValueError("Not all vartypes have been accounted for")
|
||||
|
||||
# Drop some keys that aren't important
|
||||
for k in CATETORICALS_TO_IGNORE:
|
||||
str_var_types.pop(k, None)
|
||||
|
||||
directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
|
||||
|
||||
directory_sample = choices(directories, k=N_DIRECTORIES)
|
||||
|
||||
results = []
|
||||
|
||||
for directory in tqdm(directory_sample):
|
||||
filepath = directory / "certificates.csv"
|
||||
df = pd.read_csv(filepath, low_memory=False)
|
||||
df["UPRN"] = df["UPRN"].astype("Int64").astype("str")
|
||||
df = df[~pd.isnull(df["UPRN"])]
|
||||
|
||||
# uprn_sample = sample(df["UPRN"].unique().tolist(), DIR_SAMPLE_SIZE)
|
||||
# Take a fixed sample based on the first DIR_SAMPLE_SIZE uprns
|
||||
uprn_sample = sorted(df["UPRN"].unique().tolist())[:DIR_SAMPLE_SIZE]
|
||||
df_sample = df[df["UPRN"].isin(uprn_sample)]
|
||||
# Take the record with the newest LODGEMENT_DATETIME by uprn
|
||||
df_sample = df_sample.sort_values("LODGEMENT_DATETIME", ascending=False).drop_duplicates("UPRN")
|
||||
# Convert the columns to lower case and replace underscores with hyphens, the same as the api
|
||||
df_sample.columns = df_sample.columns.str.lower().str.replace("_", "-")
|
||||
|
||||
# For each epc, we test the estimation process
|
||||
for _, epc in df_sample.iterrows():
|
||||
epc = epc.to_dict()
|
||||
address1 = epc["address1"]
|
||||
postcode = epc["postcode"]
|
||||
|
||||
# Get all EPCs for this urpn and we make sure they get dropped from the estimate_epc function
|
||||
epcs_for_uprn = df[df["UPRN"] == epc["uprn"]]
|
||||
lmks_to_drop = epcs_for_uprn["LMK_KEY"].tolist()
|
||||
searcher = SearchEpc(address1, postcode, auth_token=EPC_AUTH_TOKEN, os_api_key="")
|
||||
searcher.uprn = epc["uprn"]
|
||||
|
||||
# Perform the same remapping for built-form as in the Property class for this test, in case we get (e.g.)
|
||||
# Enclosed End-Terrace
|
||||
built_form = BUILT_FORM_REMAP.get(epc["built-form"], epc["built-form"])
|
||||
if ((epc["property-type"] == "Maisonette") & (built_form == "Detached")) or (
|
||||
built_form in Definitions.DATA_ANOMALY_MATCHES
|
||||
):
|
||||
built_form = ""
|
||||
|
||||
estimated_epc = searcher.estimate_epc(
|
||||
property_type=epc["property-type"], built_form=built_form, lmks_to_drop=lmks_to_drop
|
||||
)
|
||||
|
||||
# We now compare the difference between the estimated and original
|
||||
# TODO: We can convert windows and lighting to numeric versions and estimate how close we are
|
||||
numeric_performance = {
|
||||
key: check_numeric_performance(estimated_epc[key], epc[key]) for key, value in
|
||||
numerical_vartypes.items()
|
||||
}
|
||||
|
||||
# Remove Nones
|
||||
numeric_performance = {key: value for key, value in numeric_performance.items() if value is not None}
|
||||
# Get an average
|
||||
numeric_performance = sum(numeric_performance.values()) / len(numeric_performance)
|
||||
numeric_success = 1 - numeric_performance
|
||||
|
||||
# categorical performance
|
||||
categorical_performance = {
|
||||
key: 0 if estimated_epc[key] != epc[key] else 1 for key, value in str_var_types.items()
|
||||
}
|
||||
# Get an average
|
||||
categorical_success = sum(categorical_performance.values()) / len(categorical_performance)
|
||||
|
||||
results.append(
|
||||
{
|
||||
"uprn": epc["uprn"],
|
||||
"numeric_success": numeric_success,
|
||||
"categorical_success": categorical_success,
|
||||
"property_type": epc["property-type"],
|
||||
"built_form": epc["built-form"],
|
||||
"tenure": epc["tenure"],
|
||||
}
|
||||
)
|
||||
|
||||
# Get aggregate performance figures
|
||||
results_df = pd.DataFrame(results)
|
||||
results_df["tenure"] = results_df["tenure"].replace("Rented (social)", "rental (social)")
|
||||
|
||||
avg_numeric_succes = results_df["numeric_success"].median()
|
||||
avg_categorical_sucess = results_df["categorical_success"].median()
|
||||
|
||||
# With 20 nearest homes
|
||||
# 0.7718100840549558
|
||||
# 0.5116279069767442
|
||||
# 100 nearest homes
|
||||
# 0.7859617377809409
|
||||
# 0.5348837209302325
|
||||
|
||||
# Fixed sample, sqrt weights
|
||||
|
||||
# Group by tenure
|
||||
by_tenure = results_df.groupby("tenure").agg(
|
||||
{"numeric_success": "median", "categorical_success": "median", "uprn": "count"}
|
||||
)
|
||||
pd.set_option('display.max_rows', 500)
|
||||
pd.set_option('display.max_columns', 500)
|
||||
pd.set_option('display.width', 1000)
|
||||
|
||||
# With 20 nearest homes
|
||||
# numeric_success categorical_success uprn
|
||||
# tenure
|
||||
# NO DATA! 0.847840 0.581395 278
|
||||
# Not defined - use in the case of a new dwelling... 0.930282 0.651163 617
|
||||
# Owner-occupied 0.770330 0.511628 2588
|
||||
# Rented (private) 0.791885 0.558140 1232
|
||||
# owner-occupied 0.741088 0.488372 10912
|
||||
# rental (private) 0.749064 0.488372 3252
|
||||
# rental (social) 0.822109 0.581395 3878
|
||||
# unknown 0.895840 0.627907 1820
|
||||
|
||||
# 100 nearest homes
|
||||
# tenure
|
||||
# NO DATA! 0.899566 0.604651 233
|
||||
# Not defined - use in the case of a new dwelling... 0.927518 0.674419 608
|
||||
# Owner-occupied 0.777026 0.511628 3167
|
||||
# Rented (private) 0.805646 0.534884 1316
|
||||
# owner-occupied 0.762180 0.488372 10835
|
||||
# rental (private) 0.760503 0.511628 3181
|
||||
# rental (social) 0.830057 0.604651 3705
|
||||
# unknown 0.899948 0.627907 1571
|
||||
|
||||
# By property type - we also want to see how many properties we have for each property type
|
||||
by_property_type = results_df.groupby("property_type").agg(
|
||||
{"numeric_success": "median", "categorical_success": "median", "uprn": "count"}
|
||||
)
|
||||
# By property_type & built form
|
||||
by_property_type_built_form = results_df.groupby(["property_type", "built_form"]).agg(
|
||||
{"numeric_success": "median", "categorical_success": "median", "uprn": "count"}
|
||||
)
|
||||
42
etl/testing_data/no_epc_input.py
Normal file
42
etl/testing_data/no_epc_input.py
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
"""
|
||||
This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
|
||||
testing
|
||||
"""
|
||||
import pandas as pd
|
||||
from utils.s3 import save_csv_to_s3
|
||||
|
||||
USER_ID = 8
|
||||
PORTFOLIO_ID = 57
|
||||
|
||||
|
||||
def app():
|
||||
"""
|
||||
This portfolio is for testing windows recommendations
|
||||
:return:
|
||||
"""
|
||||
|
||||
test_file = pd.DataFrame(
|
||||
[
|
||||
{"address": "21 Butler House", "postcode": "E2 0PN", "Notes": None},
|
||||
{"address": "22 Butler House", "postcode": "E2 0PN", "Notes": None},
|
||||
{"address": "23 Butler House", "postcode": "E2 0PN", "Notes": None},
|
||||
{"address": "24 Butler House", "postcode": "E2 0PN", "Notes": None},
|
||||
]
|
||||
)
|
||||
|
||||
# Store the data in s3
|
||||
filename = f"{USER_ID}/{PORTFOLIO_ID}/no_epc.csv"
|
||||
save_csv_to_s3(
|
||||
dataframe=test_file,
|
||||
bucket_name="retrofit-plan-inputs-dev",
|
||||
file_name=filename
|
||||
)
|
||||
|
||||
body = {
|
||||
"portfolio_id": str(PORTFOLIO_ID),
|
||||
"housing_type": "Social",
|
||||
"goal": "Increase EPC",
|
||||
"goal_value": "A",
|
||||
"trigger_file_path": filename
|
||||
}
|
||||
print(body)
|
||||
43
etl/testing_data/windows_portfolio.py
Normal file
43
etl/testing_data/windows_portfolio.py
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
"""
|
||||
This script will create an input csv for the recommendation engine and upload it to S3, which can be used for
|
||||
testing
|
||||
"""
|
||||
import pandas as pd
|
||||
from utils.s3 import save_csv_to_s3
|
||||
|
||||
USER_ID = 8
|
||||
PORTFOLIO_ID = 56
|
||||
|
||||
|
||||
def app():
|
||||
"""
|
||||
This portfolio is for testing windows recommendations
|
||||
:return:
|
||||
"""
|
||||
|
||||
test_file = pd.DataFrame(
|
||||
[
|
||||
{"address": "3 Church Terrace", "postcode": "LE13 0PW", "Notes": None},
|
||||
{"address": "3, Main Street, Redmile", "postcode": "NG13 0GA", "Notes": None},
|
||||
{"address": "Manor House, Kennel Lane, Reepham", "postcode": "LN3 4DZ", "Notes": None},
|
||||
{"address": "13 Main Street", "postcode": "LE14 2JU", "Notes": None},
|
||||
{"address": "8 The Crescent, Coston Road, Buckminster", "postcode": "NG33 5SF", "Notes": None},
|
||||
]
|
||||
)
|
||||
|
||||
# Store the data in s3
|
||||
filename = f"{USER_ID}/{PORTFOLIO_ID}/windows_portfolio_inputs.csv"
|
||||
save_csv_to_s3(
|
||||
dataframe=test_file,
|
||||
bucket_name="retrofit-plan-inputs-dev",
|
||||
file_name=filename
|
||||
)
|
||||
|
||||
body = {
|
||||
"portfolio_id": str(PORTFOLIO_ID),
|
||||
"housing_type": "Social",
|
||||
"goal": "Increase EPC",
|
||||
"goal_value": "A",
|
||||
"trigger_file_path": filename
|
||||
}
|
||||
print(body)
|
||||
|
|
@ -18,6 +18,25 @@ regional_labour_variations = [
|
|||
{"Region": "Northern Ireland", "Adjustment_Factor": 0.76}
|
||||
]
|
||||
|
||||
# This data is based on the MCS database
|
||||
MCS_SOLAR_PV_COST_DATA = {
|
||||
"last_updated": "2024-01-04",
|
||||
"average_cost_per_kwh": 2013.94,
|
||||
"average_cost_per_kwh-Outer London": 2618.75,
|
||||
"average_cost_per_kwh-Inner London": 2618.75,
|
||||
"average_cost_per_kwh-South East England": 2083.33,
|
||||
"average_cost_per_kwh-South West England": 2113,
|
||||
"average_cost_per_kwh-East of England": 1973.86,
|
||||
"average_cost_per_kwh-East Midlands": 1981.86,
|
||||
"average_cost_per_kwh-West Midlands": 1926.55,
|
||||
"average_cost_per_kwh-North East England": 2028.49,
|
||||
"average_cost_per_kwh-North West England": 1620.42,
|
||||
"average_cost_per_kwh-Yorkshire and the Humber": 2060.9,
|
||||
"average_cost_per_kwh-Wales": 1898.83,
|
||||
"average_cost_per_kwh-Scotland": 1967.97,
|
||||
"average_cost_per_kwh-Northern Ireland": 2126.09,
|
||||
}
|
||||
|
||||
|
||||
class Costs:
|
||||
"""
|
||||
|
|
@ -42,7 +61,7 @@ class Costs:
|
|||
|
||||
# We use a higher contingency rate for internal wall insulation because of the potential for issues with moving
|
||||
# fittings and trimming doors, as well as scope for damage to the existing wall during preparation.
|
||||
IWI_CONTINGENCY = 0.15
|
||||
IWI_CONTINGENCY = 0.2
|
||||
|
||||
# Where there is more uncertainty, a higher contingency rate is used
|
||||
HIGH_RISK_CONTINGENCY = 0.2
|
||||
|
|
@ -58,12 +77,22 @@ class Costs:
|
|||
# have a preliminaries of 12-14% so we use 12% as the median for the preliminaries rate.
|
||||
# For External wall insulation (EWI), we use 15% as the preliminaries rate if we think the property might
|
||||
# need scaffolding, otherwise we use 12%. This is to account for any site preparation that might be required
|
||||
EWI_NO_SCAFFOLDING_PRELIMINARIES = 0.15
|
||||
EWI_SCAFFOLDING_PRELIMINARIES = 0.20
|
||||
EWI_NO_SCAFFOLDING_PRELIMINARIES = 0.2
|
||||
EWI_SCAFFOLDING_PRELIMINARIES = 0.25
|
||||
|
||||
VAT_RATE = 0.2
|
||||
PROFIT_MARGIN = 0.2
|
||||
|
||||
# Based on this greenmatch article, on average, a Sash window is around 50% more expensive than a casement window.
|
||||
# Therefore, for a conservative cost estimate, and allowance for a more premium window type, we inflate the material
|
||||
# cost of the windows to allow for a sash window type
|
||||
# https://www.greenmatch.co.uk/windows/double-glazing/cost
|
||||
SASH_WINDOW_INFLATION_FACTOR = 1.5
|
||||
|
||||
# Typically, secondary glazing can be installed for 25% of the cost of double glazed windows - to be conservative,
|
||||
# we scale the cost by half
|
||||
SECONDARY_GLAZING_SCALING_FACTOR = 0.5
|
||||
|
||||
def __init__(self, property_instance):
|
||||
"""
|
||||
Initializes the Costs class with a property instance.
|
||||
|
|
@ -147,12 +176,16 @@ class Costs:
|
|||
"""
|
||||
material_cost_per_m2 = material["material_cost"]
|
||||
|
||||
# We inflate material costs due to recent price increases
|
||||
material_cost_per_m2 = material_cost_per_m2 * 1.5
|
||||
|
||||
base_material_cost = material_cost_per_m2 * floor_area
|
||||
labour_cost = material["labour_cost"] * floor_area * self.labour_adjustment_factor
|
||||
|
||||
subtotal_before_profit = base_material_cost + labour_cost
|
||||
|
||||
contingency_cost = subtotal_before_profit * self.CONTINGENCY
|
||||
# We use high risk contingency because of the possibility of access issues and clearing existing insulation
|
||||
contingency_cost = subtotal_before_profit * self.HIGH_RISK_CONTINGENCY
|
||||
preliminaries_cost = subtotal_before_profit * self.PRELIMINARIES
|
||||
profit_cost = subtotal_before_profit * self.PROFIT_MARGIN
|
||||
|
||||
|
|
@ -719,3 +752,121 @@ class Costs:
|
|||
"labour_days": labour_days,
|
||||
"labour_cost": labour_costs
|
||||
}
|
||||
|
||||
def window_glazing(self, number_of_windows, material, is_secondary_glazing=False):
|
||||
"""
|
||||
We characterise the jobs to be done for window glazing as the following:
|
||||
1) Initial Assessment and Measurements: Before removing the existing window, it's essential to assess the
|
||||
condition of the window frame and opening. Precise measurements are taken to ensure the new double glazed
|
||||
windows fit perfectly.
|
||||
|
||||
2) Remove the Existing Window: This involves carefully dismantling and removing the old single glazed window. It
|
||||
requires skill to avoid damaging the surrounding wall and the window frame (if it's to be reused).
|
||||
|
||||
3) Dispose of the Existing Window: The old window, especially if it's a single glazed unit, needs to be
|
||||
disposed of responsibly. Glass and other materials should be recycled where possible.
|
||||
|
||||
4) Surface Preparation: The window opening might need some preparation, especially if there's damage or if
|
||||
adjustments are needed to accommodate the new window. This can include repairing or replacing parts of the
|
||||
window frame, sealing gaps, and ensuring the opening is level and square.
|
||||
|
||||
5) Install the Window Frame (if new frames are used): In many cases, double glazed windows come with their
|
||||
frames. These need to be installed securely into the window opening. This process involves aligning, leveling,
|
||||
and fixing the frame in place.
|
||||
|
||||
6) Install the Window Sill: If a new window sill is required, it is installed at this stage. It needs to be
|
||||
correctly aligned with the frame and securely attached.
|
||||
|
||||
7) Install the Double Glazed Glass Units: The glass units are carefully inserted into the frame. This step
|
||||
requires precision to ensure a snug fit without causing stress on the glass, which could lead to cracking or
|
||||
breaking.
|
||||
|
||||
8) Sealing and Weatherproofing: After the glass units are in place, it's crucial to seal around the frame and
|
||||
between the glass and frame to ensure there are no drafts and that the installation is weather-tight. This
|
||||
typically involves applying silicone sealant or other appropriate sealing materials.
|
||||
|
||||
9) Finishing Touches: This includes any cosmetic work, such as trimming, painting, or staining the frame and
|
||||
sill to match the rest of the property. It might also involve cleaning up any mess created during the
|
||||
installation.
|
||||
|
||||
10) Inspection and Testing: Finally, the new windows should be inspected to ensure they open, close, and lock
|
||||
correctly. This is also a good time to check for any gaps or issues with the sealing.
|
||||
|
||||
For this cost estimation process, we factor in initial assement into the preliminaries
|
||||
|
||||
"""
|
||||
|
||||
material_cost = material["material_cost"] * number_of_windows
|
||||
|
||||
labour_cost = (
|
||||
material["labour_cost"] * number_of_windows * self.labour_adjustment_factor
|
||||
)
|
||||
multiplier = self.SECONDARY_GLAZING_SCALING_FACTOR if is_secondary_glazing else (
|
||||
self.SASH_WINDOW_INFLATION_FACTOR)
|
||||
|
||||
subtotal = (material_cost + labour_cost) * multiplier
|
||||
|
||||
contingency_cost = subtotal * self.CONTINGENCY
|
||||
preliminaries_cost = subtotal * self.PRELIMINARIES
|
||||
profit_cost = subtotal * self.PROFIT_MARGIN
|
||||
|
||||
subtotal_before_vat = subtotal + contingency_cost + preliminaries_cost + profit_cost
|
||||
|
||||
vat_cost = subtotal_before_vat * self.VAT_RATE
|
||||
|
||||
total_cost = subtotal_before_vat + vat_cost
|
||||
|
||||
labour_hours = material["labour_hours_per_unit"] * number_of_windows
|
||||
labour_hours = labour_hours * self.SECONDARY_GLAZING_SCALING_FACTOR if is_secondary_glazing else labour_hours
|
||||
|
||||
# Assume a team of 2
|
||||
labour_days = (labour_hours / 8) / 2
|
||||
|
||||
return {
|
||||
"total": total_cost,
|
||||
"subtotal": subtotal_before_vat,
|
||||
"vat": vat_cost,
|
||||
"contingency": contingency_cost,
|
||||
"preliminaries": preliminaries_cost,
|
||||
"material": material_cost,
|
||||
"profit": profit_cost,
|
||||
"labour_hours": labour_hours,
|
||||
"labour_cost": labour_cost,
|
||||
"labour_days": labour_days
|
||||
}
|
||||
|
||||
def solar_pv(self, wattage: float):
|
||||
|
||||
"""
|
||||
Calculates the total cost for solar PV based data provided by the MCS dashboard, which contains
|
||||
costing data for installations of renewable and clean energy measures.
|
||||
|
||||
The data in the dashboard is filtered on domestic building installations and then the data across the
|
||||
various regions is manually collected. There is currently no automated way to get the data from the MCS
|
||||
dashboard
|
||||
|
||||
Price can also be benchmarked against this checkatrade article:
|
||||
https://www.checkatrade.com/blog/cost-guides/cost-of-solar-panel-installation/
|
||||
:param wattage: Peak wattage of the solar PV system
|
||||
:return:
|
||||
"""
|
||||
|
||||
# Get the cost data relevant to the region
|
||||
regional_cost = MCS_SOLAR_PV_COST_DATA["-".join(["average_cost_per_kwh", self.region])]
|
||||
|
||||
kw = wattage / 1000
|
||||
total_cost = kw * regional_cost
|
||||
|
||||
subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
|
||||
vat = total_cost - subtotal_before_vat
|
||||
|
||||
# Labour hours are based on estimates from online research but an average team seems to consist of 3 people
|
||||
# and most jobs take around 2 days. Assuming an 8 hour day for 3 people across 2 days, gives us 72 hours of
|
||||
# labour
|
||||
return {
|
||||
"total": total_cost,
|
||||
"subtotal": subtotal_before_vat,
|
||||
"vat": vat,
|
||||
"labour_hours": 72,
|
||||
"labour_days": 2,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@ from recommendations.RoofRecommendations import RoofRecommendations
|
|||
from recommendations.VentilationRecommendations import VentilationRecommendations
|
||||
from recommendations.FireplaceRecommendations import FireplaceRecommendations
|
||||
from recommendations.LightingRecommendations import LightingRecommendations
|
||||
from recommendations.SolarPvRecommendations import SolarPvRecommendations
|
||||
from recommendations.WindowsRecommendations import WindowsRecommendations
|
||||
from backend.ml_models.AnnualBillSavings import AnnualBillSavings
|
||||
|
||||
|
||||
|
|
@ -35,6 +37,8 @@ class Recommendations:
|
|||
)
|
||||
self.fireplace_recommender = FireplaceRecommendations(property_instance=property_instance)
|
||||
self.lighting_recommender = LightingRecommendations(property_instance=property_instance, materials=materials)
|
||||
self.windows_recommender = WindowsRecommendations(property_instance=property_instance, materials=materials)
|
||||
self.solar_recommender = SolarPvRecommendations(property_instance=property_instance)
|
||||
|
||||
def recommend(self):
|
||||
|
||||
|
|
@ -77,6 +81,16 @@ class Recommendations:
|
|||
if self.lighting_recommender.recommendation:
|
||||
property_recommendations.append(self.lighting_recommender.recommendation)
|
||||
|
||||
# Windows recommendations
|
||||
self.windows_recommender.recommend()
|
||||
if self.windows_recommender.recommendation:
|
||||
property_recommendations.append(self.windows_recommender.recommendation)
|
||||
|
||||
# Solar recommendations
|
||||
self.solar_recommender.recommend()
|
||||
if self.solar_recommender.recommendation:
|
||||
property_recommendations.append(self.solar_recommender.recommendation)
|
||||
|
||||
# We insert temporary ids into the recommendations which is important for the optimiser later
|
||||
property_recommendations = self.insert_temp_recommendation_id(property_recommendations)
|
||||
|
||||
|
|
@ -148,6 +162,8 @@ class Recommendations:
|
|||
# For the moment, we cap the number of SAP points that can be achieved by ventilation at 2
|
||||
rec["sap_points"] = min(rec["sap_points"], VentilationRecommendations.SAP_LIMIT)
|
||||
|
||||
# Round to 2 decimal places
|
||||
rec["sap_points"] = round(rec["sap_points"], 2)
|
||||
rec["co2_equivalent_savings"] = float(property_instance.data["co2-emissions-current"]) - new_carbon
|
||||
|
||||
# Energy consumption current is per meter squared, so we need to multiply by the floor area to get
|
||||
|
|
|
|||
65
recommendations/SolarPvRecommendations.py
Normal file
65
recommendations/SolarPvRecommendations.py
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
import numpy as np
|
||||
from recommendations.Costs import Costs
|
||||
|
||||
|
||||
class SolarPvRecommendations:
|
||||
# Approximate area of the solar panels
|
||||
SOLAR_PANEL_AREA = 1.6
|
||||
# Wattage per panel
|
||||
SOLAR_PANEL_WATTAGE = 360
|
||||
|
||||
def __init__(self, property_instance):
|
||||
"""
|
||||
:param property_instance: Instance of the Property class, for the home associated to property_id
|
||||
"""
|
||||
|
||||
self.property = property_instance
|
||||
self.costs = Costs(self.property)
|
||||
|
||||
self.recommendation = []
|
||||
|
||||
def recommend(self):
|
||||
"""
|
||||
We check if a property is potentially suitable for solar PV based on the following criteria:
|
||||
- The property is a house or bungalow
|
||||
- The property has a flat or pitched roof
|
||||
- The property does not have existing solar pv
|
||||
:return:
|
||||
"""
|
||||
|
||||
is_valid_property_type = self.property.data["property-type"] in ["House", "Bungalow"]
|
||||
is_valid_roof_type = (
|
||||
self.property.roof["is_flat"] or self.property.roof["is_pitched"] or self.property.roof["is_roof_room"]
|
||||
)
|
||||
# If there is no existing solar PV, the photo-supply field will be None or a missing value
|
||||
has_no_existing_solar_pv = self.property.data["photo-supply"] in [
|
||||
None, 0, self.property.DATA_ANOMALY_MATCHES
|
||||
]
|
||||
|
||||
if not is_valid_property_type or not is_valid_roof_type or not has_no_existing_solar_pv:
|
||||
return
|
||||
|
||||
# We now have a property which is potentially suitable for solar PV
|
||||
number_solar_panels = np.floor(self.property.solar_pv_roof_area / self.SOLAR_PANEL_AREA)
|
||||
solar_panel_wattage = number_solar_panels * self.SOLAR_PANEL_WATTAGE
|
||||
|
||||
# Given the wattage, we estimate the cost of the solar PV system. This is based on the MCS database
|
||||
# of solar PV installations
|
||||
cost_result = self.costs.solar_pv(wattage=solar_panel_wattage)
|
||||
|
||||
kw = int(np.round(solar_panel_wattage / 1000))
|
||||
|
||||
self.recommendation = [
|
||||
{
|
||||
"parts": [],
|
||||
"type": "solar_pv",
|
||||
"description": f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) panel system on the roof",
|
||||
"starting_u_value": None,
|
||||
"new_u_value": None,
|
||||
"sap_points": None,
|
||||
**cost_result,
|
||||
# This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we scale
|
||||
# back up here
|
||||
"photo_supply": 100 * self.property.solar_pv_percentage
|
||||
}
|
||||
]
|
||||
97
recommendations/WindowsRecommendations.py
Normal file
97
recommendations/WindowsRecommendations.py
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
|
||||
from backend.Property import Property
|
||||
from recommendations.Costs import Costs
|
||||
|
||||
|
||||
class WindowsRecommendations:
|
||||
# If the property has existing glazing, we scale down the number of windows that need to be glazed
|
||||
COVERAGE_MAP = {
|
||||
# If most of the windows have already been glazed, we assume that 2/3 are glazed and 1/2 are remaining to be
|
||||
# glazed
|
||||
"most": 0.33,
|
||||
# If glazing is partial, we assume 50/50 split between glazed and unglazed
|
||||
"partial": 0.5
|
||||
}
|
||||
|
||||
def __init__(self, property_instance: Property, materials: List):
|
||||
self.property = property_instance
|
||||
self.costs = Costs(self.property)
|
||||
|
||||
self.recommendation = []
|
||||
|
||||
self.glazing_material = [
|
||||
material for material in materials if material["type"] == "windows_glazing"
|
||||
]
|
||||
|
||||
if len(self.glazing_material) != 1:
|
||||
raise ValueError("There should only be one window glazing material")
|
||||
self.glazing_material = self.glazing_material[0]
|
||||
|
||||
def recommend(self):
|
||||
"""
|
||||
This method will recommend the best possible glazing options for a property.
|
||||
|
||||
In order to do this, we need to estimate the number of windows that the home has. This information will be
|
||||
stored in the property object, under property.number_of_windows
|
||||
:return:
|
||||
"""
|
||||
|
||||
# If the property is in a conservation area or is a listed building, it becomes more difficult to install
|
||||
# double glazing. Therefore, we don't recommend it. It is still possible but is not practical as it
|
||||
# requires planning permission and might require a more expensive window type, such as timber.
|
||||
|
||||
number_of_windows = self.property.number_of_windows
|
||||
is_secondary_glazing = self.property.restricted_measures or (
|
||||
self.property.windows["glazing_type"] == "secondary"
|
||||
)
|
||||
|
||||
if not number_of_windows:
|
||||
raise ValueError("Number of windows not specified")
|
||||
|
||||
if self.property.windows["has_glazing"] & (self.property.windows["glazing_coverage"] == "full"):
|
||||
return
|
||||
|
||||
# We scale the number of windows based on the proportion of existing glazing
|
||||
if self.property.data["multi-glaze-proportion"] != "":
|
||||
n_windows_scalar = 1 - (int(self.property.data["multi-glaze-proportion"]) / 100)
|
||||
else:
|
||||
n_windows_scalar = self.COVERAGE_MAP.get(self.property.windows["glazing_coverage"], 1)
|
||||
|
||||
number_of_windows *= n_windows_scalar
|
||||
number_of_windows = np.ceil(number_of_windows)
|
||||
|
||||
# We then price the job based on the number of windows that there are
|
||||
cost_result = self.costs.window_glazing(
|
||||
number_of_windows=number_of_windows,
|
||||
material=self.glazing_material,
|
||||
is_secondary_glazing=is_secondary_glazing
|
||||
)
|
||||
|
||||
glazing_type = "secondary glazing" if is_secondary_glazing else "double glazing"
|
||||
if self.property.windows["glazing_coverage"] in ["partial", "most"]:
|
||||
description = f"Install {glazing_type} to the remaining windows"
|
||||
else:
|
||||
description = f"Install {glazing_type} to all windows"
|
||||
|
||||
if self.property.is_listed:
|
||||
description += ". Secondary glazing recommended due to listed building status"
|
||||
elif self.property.is_heritage:
|
||||
description += ". Secondary glazing recommended due to herigate building status"
|
||||
elif self.property.in_conservation_area:
|
||||
description += ". Secondary glazing recommended due to conservation area status"
|
||||
|
||||
self.recommendation = [
|
||||
{
|
||||
"parts": [],
|
||||
"type": "windows_glazing",
|
||||
"description": description,
|
||||
"starting_u_value": None,
|
||||
"new_u_value": None,
|
||||
"sap_points": None,
|
||||
**cost_result,
|
||||
"is_secondary_glazing": is_secondary_glazing
|
||||
}
|
||||
]
|
||||
|
|
@ -1,4 +1,5 @@
|
|||
import math
|
||||
from datetime import datetime
|
||||
from copy import deepcopy
|
||||
from typing import Union
|
||||
|
||||
|
|
@ -565,7 +566,7 @@ def estimate_external_wall_area(num_floors, floor_height, perimeter, built_form)
|
|||
'Detached': 4,
|
||||
}
|
||||
|
||||
exposed_wall_area = total_wall_area * (number_exposed_walls[built_form] / 4)
|
||||
exposed_wall_area = total_wall_area * (number_exposed_walls.get(built_form, 3) / 4)
|
||||
|
||||
return exposed_wall_area
|
||||
|
||||
|
|
@ -669,3 +670,87 @@ def esimtate_pitched_roof_area(floor_area: float, floor_height: float) -> float:
|
|||
area = 2 * (slope * wall_width)
|
||||
|
||||
return area
|
||||
|
||||
|
||||
def estimate_windows(
|
||||
property_type, built_form, construction_age_band, floor_area, number_habitable_rooms, extension_count
|
||||
):
|
||||
# Base window count based on habitable rooms
|
||||
window_count = number_habitable_rooms
|
||||
|
||||
# Additional windows for non-habitable rooms (e.g., kitchen, bathroom)
|
||||
# Assuming most houses will have at least one kitchen and one bathroom
|
||||
# Scale non-habitable windows with the number of habitable rooms
|
||||
non_habitable_base = 2 # Base for kitchen and bathroom
|
||||
extra_non_habitable = max(0, (number_habitable_rooms - 3) // 2) # Extra for large houses
|
||||
window_count += non_habitable_base + extra_non_habitable
|
||||
|
||||
# Adjustments based on built form and property type
|
||||
if property_type in ["House", "Bungalow"] and built_form in ["Semi-Detached", "Detached"]:
|
||||
built_form_lookup = {
|
||||
"Semi-Detached": 3,
|
||||
"Detached": 4,
|
||||
}
|
||||
else:
|
||||
# For Flats and Maisonettes, adjustments might be less
|
||||
built_form_lookup = {
|
||||
"Mid-Terrace": 0,
|
||||
"End-Terrace": 1,
|
||||
"Semi-Detached": 1,
|
||||
"Detached": 2,
|
||||
}
|
||||
window_count += built_form_lookup.get(built_form, 0)
|
||||
|
||||
# Adjust for floor area (larger floor area might indicate more rooms/windows)
|
||||
if floor_area < 85: # Small to medium properties
|
||||
# Standard window count likely sufficient
|
||||
pass
|
||||
elif 85 <= floor_area <= 120: # Medium to large properties
|
||||
# More rooms or larger rooms likely, potentially more windows
|
||||
window_count += 1
|
||||
elif floor_area > 120: # Very large properties
|
||||
# Likely to have significantly more or larger rooms
|
||||
window_count += 2
|
||||
|
||||
# Adjust for construction age band
|
||||
if construction_age_band in ["England and Wales: before 1900", "England and Wales: 1900-1929"]:
|
||||
# Older houses with smaller, more numerous windows
|
||||
window_count += 1
|
||||
|
||||
# Adjust for extensions (each extension might add windows)
|
||||
window_count += extension_count
|
||||
|
||||
# Adjustments for specific property types
|
||||
if property_type in ["Flat", "Maisontte"]:
|
||||
# Flats might have fewer windows due to shared walls
|
||||
# Maisonettes might follow a similar pattern to flats or small houses
|
||||
window_count -= 1
|
||||
|
||||
# Ensure window count is not negative
|
||||
if window_count < 0:
|
||||
raise ValueError("Window count cannot be negative.")
|
||||
|
||||
return window_count
|
||||
|
||||
|
||||
def calculate_cavity_age(newest_epc, older_epcs, cleaned):
|
||||
all_epcs = [newest_epc] + older_epcs
|
||||
|
||||
df = []
|
||||
for x in all_epcs:
|
||||
# Get the cleaned mapping
|
||||
mapped = [y for y in cleaned["walls-description"] if y["original_description"] == x["walls-description"]]
|
||||
if not mapped:
|
||||
continue
|
||||
df.append(
|
||||
{
|
||||
**mapped[0],
|
||||
"inspection-date": x["lodgement-date"],
|
||||
}
|
||||
)
|
||||
|
||||
df = pd.DataFrame(df)
|
||||
df = df[df["is_cavity_wall"] & df["is_filled_cavity"]]
|
||||
|
||||
cavity_age = (datetime.now() - pd.to_datetime(df["inspection-date"].max())).days
|
||||
return cavity_age
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
from recommendations.Costs import Costs
|
||||
from unittest.mock import Mock
|
||||
import datetime
|
||||
import pytest
|
||||
|
||||
|
||||
class TestCosts:
|
||||
|
|
@ -58,9 +59,9 @@ class TestCosts:
|
|||
)
|
||||
|
||||
assert loft_results == {
|
||||
'total': 430.21445040000003, 'subtotal': 358.512042, 'vat': 71.70240840000001,
|
||||
'contingency': 25.608003000000004, 'preliminaries': 25.608003000000004, 'material': 198.29923000000002,
|
||||
'profit': 51.21600600000001, 'labour_hours': 3.685, 'labour_cost': 57.7808, 'labour_days': 0.460625
|
||||
'total': 639.4133610000001, 'subtotal': 532.8444675000001, 'vat': 106.56889350000002,
|
||||
'contingency': 71.045929, 'preliminaries': 35.5229645, 'material': 297.448845, 'profit': 71.045929,
|
||||
'labour_hours': 3.685, 'labour_cost': 57.7808, 'labour_days': 0.460625
|
||||
}
|
||||
|
||||
def test_internal_wall_insulation(self):
|
||||
|
|
@ -176,11 +177,9 @@ class TestCosts:
|
|||
)
|
||||
|
||||
assert iwi_results == {
|
||||
'total': 6650.889456921851, 'subtotal': 5542.407880768209, 'vat': 1108.4815761536418,
|
||||
'contingency': 573.3525393898148, 'preliminaries': 382.2350262598765,
|
||||
'material': 1747.488000615996,
|
||||
'profit': 764.470052519753, 'labour_hours': 88.23759388401297,
|
||||
'labour_days': 2.757424808875405,
|
||||
'total': 6880.2304726777775, 'subtotal': 5733.525393898148, 'vat': 1146.7050787796295,
|
||||
'contingency': 764.470052519753, 'preliminaries': 382.2350262598765, 'material': 1747.488000615996,
|
||||
'profit': 764.470052519753, 'labour_hours': 88.23759388401297, 'labour_days': 2.757424808875405,
|
||||
'labour_cost': 1927.1602026551818
|
||||
}
|
||||
|
||||
|
|
@ -414,8 +413,8 @@ class TestCosts:
|
|||
)
|
||||
|
||||
assert ewi_results == {
|
||||
'total': 14561.688989159393, 'subtotal': 12134.740824299493, 'vat': 2426.948164859899,
|
||||
'contingency': 808.9827216199662, 'preliminaries': 1617.9654432399325, 'material': 4020.565147410677,
|
||||
'total': 15047.078622131372, 'subtotal': 12539.232185109477, 'vat': 2507.8464370218953,
|
||||
'contingency': 808.9827216199662, 'preliminaries': 2022.4568040499155, 'material': 4020.565147410677,
|
||||
'profit': 1617.9654432399325, 'labour_hours': 187.02533486285358, 'labour_days': 5.8445417144641745,
|
||||
'labour_cost': 3921.5600094613983
|
||||
}
|
||||
|
|
@ -499,3 +498,48 @@ class TestCosts:
|
|||
'labour_hours': 24.79, 'labour_days': 1.549375, 'labour_cost': 186.9032}
|
||||
|
||||
assert costs.labour_adjustment_factor == 0.88
|
||||
|
||||
# Mock property instance for regional tests
|
||||
@pytest.fixture(params=[
|
||||
("Northamptonshire", "East Midlands", 7927.44),
|
||||
("Greater London Authority", "Inner London", 10475.0),
|
||||
("Adur", "South East England", 8333.32),
|
||||
("Bournemouth", "South West England", 8452),
|
||||
("Basildon", "East of England", 7895.44),
|
||||
("Birmingham", "West Midlands", 7706.2),
|
||||
("County Durham", "North East England", 8113.96),
|
||||
("Allerdale", "North West England", 6481.68),
|
||||
("York", "Yorkshire and the Humber", 8243.6),
|
||||
("Cardiff", "Wales", 7595.32),
|
||||
("Glasgow City", "Scotland", 7871.88),
|
||||
("Belfast", "Northern Ireland", 8504.36)
|
||||
])
|
||||
def mock_property_with_region(self, request):
|
||||
county, region, expected_cost = request.param
|
||||
mock_property = Mock()
|
||||
mock_property.data = {"county": county}
|
||||
return mock_property, region, expected_cost
|
||||
|
||||
# Test for different wattages
|
||||
@pytest.mark.parametrize("wattage, expected_cost", [
|
||||
(3000, 5945.58),
|
||||
(4000, 7927.44),
|
||||
(5000, 9909.3),
|
||||
(6000, 11891.16),
|
||||
])
|
||||
def test_solar_pv_different_wattages(self, wattage, expected_cost):
|
||||
mock_property = Mock()
|
||||
mock_property.data = {"county": "Mansfield"}
|
||||
costs = Costs(mock_property)
|
||||
result = costs.solar_pv(wattage)
|
||||
assert result['total'] == pytest.approx(expected_cost, rel=0.01)
|
||||
|
||||
def test_solar_pv_regional_variation(self, mock_property_with_region):
|
||||
# Test for regional cost variations
|
||||
property_instance, expected_region, expected_cost = mock_property_with_region
|
||||
costs = Costs(property_instance)
|
||||
|
||||
assert costs.region == expected_region
|
||||
|
||||
result = costs.solar_pv(4000) # Testing with a fixed wattage of 4000
|
||||
assert result['total'] == pytest.approx(expected_cost, rel=0.01)
|
||||
|
|
|
|||
|
|
@ -942,8 +942,24 @@ materials = [
|
|||
'https://www.hamuch.com/cost/led-spot-light#:~:text=It%20costs%20an%20average%20of,'
|
||||
'will%20drive%20up%20the%20cost.',
|
||||
'created_at': datetime.datetime(2023, 11, 28, 22, 49, 12, 244907), 'is_active': True, 'prime_material_cost': None,
|
||||
'material_cost': 20.0, 'labour_cost': 46.0, 'labour_hours_per_unit': 0.8, 'plant_cost': 0.0, 'total_cost': 66.0,
|
||||
'material_cost': 20.0, 'labour_cost': 15.0, 'labour_hours_per_unit': 0.8, 'plant_cost': 0.0, 'total_cost': 66.0,
|
||||
'notes': 'We estimate the unit economics from the checkatrade article. We assume that the average job consists '
|
||||
'of installing 6 lights based on the hamuch article. We use the median value of 400 for a job of 6 '
|
||||
'lights'}
|
||||
'lights'},
|
||||
{'id': 1235, 'type': 'windows_glazing',
|
||||
'description': 'uPVC windows; Profile 22 or other equal and approved; reinforced where appropriate with '
|
||||
'aluminium alloy; in refurbishment work, including standard ironmongery; sills and factory glazed '
|
||||
'with low-e 24 mm double glazing; removing existing windows and fixing new in position; including '
|
||||
'lugs plugged and screwed to brickwork or blockwork; Casement/fixed light; including vents; '
|
||||
'e.p.d.m. glazing gaskets and weather seals; 1770 mm × 1200 mm; ref P312WW',
|
||||
'depth': 0.0, 'depth_unit': None, 'cost': None, 'cost_unit': 'gbp_per_unit', 'r_value_per_mm': None,
|
||||
'r_value_unit': 'square_meter_kelvin_per_watt', 'thermal_conductivity': None, 'thermal_conductivity_unit': None,
|
||||
'link': 'SPONs',
|
||||
'created_at': datetime.datetime(2023, 11, 28, 22, 49, 12, 244907),
|
||||
'is_active': True, 'prime_material_cost': 176.55,
|
||||
'material_cost': 182.25, 'labour_cost': 163.36, 'labour_hours_per_unit': 6.5, 'plant_cost': 0.0,
|
||||
'total_cost': 345.61,
|
||||
'notes': 'This is the cost of removal of existing windows and installation of new windows. This is a casement '
|
||||
'style window, which is the most common but also the cheapest style. In the cost estimation framework, '
|
||||
'we can inflate prices for different finishes, to be conservative on price.'}
|
||||
]
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ from recommendations.FireplaceRecommendations import FireplaceRecommendations
|
|||
class TestFirepaceRecommendations:
|
||||
|
||||
def test_no_fireplaces(self):
|
||||
property_instance = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
|
||||
property_instance = Property(id=0, address="fake", postcode="fake")
|
||||
property_instance.data = {
|
||||
"number-open-fireplaces": 0
|
||||
}
|
||||
|
|
@ -22,7 +22,7 @@ class TestFirepaceRecommendations:
|
|||
assert recommender.recommendation is None
|
||||
|
||||
def test_one_fireplace(self):
|
||||
property_instance = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
|
||||
property_instance = Property(id=0, address="fake", postcode="fake")
|
||||
property_instance.data = {
|
||||
"number-open-fireplaces": 1
|
||||
}
|
||||
|
|
@ -40,7 +40,7 @@ class TestFirepaceRecommendations:
|
|||
assert recommender.recommendation[0]["total"] == 300
|
||||
|
||||
def test_multiple_fireplaces(self):
|
||||
property_instance = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
|
||||
property_instance = Property(id=0, address="fake", postcode="fake")
|
||||
property_instance.data = {
|
||||
"number-open-fireplaces": 3
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,16 +21,6 @@ class TestFloorRecommendations:
|
|||
) as f:
|
||||
return pickle.load(f)
|
||||
|
||||
@pytest.fixture
|
||||
def mock_floor_rec_instance(self):
|
||||
# Creating a mock instance of WallRecommendations with the necessary attributes
|
||||
property_mock = Mock()
|
||||
property_mock.full_sap_epc = {"lodgement-date": "2000-01-01"}
|
||||
property_mock.data = {"county": "York"}
|
||||
|
||||
mock_wall_rec_instance = FloorRecommendations(property_mock, materials)
|
||||
return mock_wall_rec_instance
|
||||
|
||||
def test_init(self, input_properties):
|
||||
input_properties[0].insulation_floor_area = 50
|
||||
input_properties[0].insulation_wall_area = 90
|
||||
|
|
@ -68,6 +58,7 @@ class TestFloorRecommendations:
|
|||
input_properties[2].wall_type = "solid brick"
|
||||
input_properties[2].floor_type = "suspended"
|
||||
input_properties[2].number_of_floors = 1
|
||||
input_properties[2].floor_level = 0
|
||||
|
||||
recommender = FloorRecommendations(property_instance=input_properties[2], materials=materials)
|
||||
assert recommender.estimated_u_value is None
|
||||
|
|
@ -93,6 +84,8 @@ class TestFloorRecommendations:
|
|||
input_properties[3].insulation_floor_area = 100
|
||||
input_properties[3].insulation_wall_area = 100
|
||||
input_properties[3].number_of_floors = 1
|
||||
input_properties[3].floor_level = 0
|
||||
|
||||
recommender = FloorRecommendations(property_instance=input_properties[3], materials=materials)
|
||||
assert recommender.estimated_u_value is None
|
||||
recommender.recommend()
|
||||
|
|
@ -114,6 +107,7 @@ class TestFloorRecommendations:
|
|||
input_properties[4].wall_type = "solid brick"
|
||||
input_properties[4].floor_type = "solid"
|
||||
input_properties[4].number_of_floors = 1
|
||||
input_properties[4].floor_level = 0
|
||||
|
||||
# In this case, we have no county, so in this case, it should yse the local-authority-label if possible
|
||||
input_properties[4].data["county"] = ""
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ from recommendations.tests.test_data.materials import materials
|
|||
class TestLightingRecommendations:
|
||||
|
||||
def test_init_invalid_materials(self):
|
||||
input_property0 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
|
||||
input_property0 = Property(id=1, postcode="F4k3 6", address="623 fake street")
|
||||
input_property0.lighting = {"low_energy_proportion": 0}
|
||||
input_property0.data = {"county": "Greater London Authority"}
|
||||
# Test for invalid materials
|
||||
|
|
@ -18,7 +18,7 @@ class TestLightingRecommendations:
|
|||
|
||||
def test_recommend_no_action_needed(self):
|
||||
# Case where no recommendation is needed
|
||||
input_property1 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
|
||||
input_property1 = Property(id=1, postcode="F4k3 6", address="623 fake street")
|
||||
input_property1.lighting = {"low_energy_proportion": 100}
|
||||
input_property1.data = {"county": "Greater London Authority"}
|
||||
|
||||
|
|
@ -28,7 +28,7 @@ class TestLightingRecommendations:
|
|||
|
||||
def test_recommend_action_needed(self):
|
||||
# Case where recommendation is needed
|
||||
input_property1 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
|
||||
input_property1 = Property(id=1, postcode="F4k3 6", address="623 fake street")
|
||||
input_property1.lighting = {"low_energy_proportion": 100}
|
||||
input_property1.data = {"county": "Greater London Authority"}
|
||||
input_property1.lighting = {"low_energy_proportion": 0.80}
|
||||
|
|
@ -40,8 +40,7 @@ class TestLightingRecommendations:
|
|||
|
||||
assert lr.recommendation == [
|
||||
{'parts': [], 'type': 'low_energy_lighting', 'description': 'Install low energy lighting in 4 outlets',
|
||||
'starting_u_value': None, 'new_u_value': None, 'sap_points': 0.4, 'total': 458.976, 'subtotal': 382.48,
|
||||
'vat': 76.49600000000001, 'contingency': 27.320000000000007, 'preliminaries': 27.320000000000007,
|
||||
'material': 80.0, 'profit': 54.640000000000015, 'labour_hours': 3.2, 'labour_days': 0.4,
|
||||
'labour_cost': 193.20000000000002}
|
||||
'starting_u_value': None, 'new_u_value': None, 'sap_points': 0.4, 'total': 240.24,
|
||||
'subtotal': 200.20000000000002, 'vat': 40.040000000000006, 'contingency': 14.3, 'preliminaries': 14.3,
|
||||
'material': 80.0, 'profit': 28.6, 'labour_hours': 3.2, 'labour_days': 0.4, 'labour_cost': 63.0}
|
||||
]
|
||||
|
|
|
|||
|
|
@ -427,3 +427,106 @@ def test_external_wall_area():
|
|||
for num_floors, floor_height, perimeter, built_form, expected in test_cases:
|
||||
result = recommendation_utils.estimate_external_wall_area(num_floors, floor_height, perimeter, built_form)
|
||||
assert result == expected, f"Test failed for {built_form}: Expected {expected}, got {result}"
|
||||
|
||||
|
||||
def test_estimate_windows():
|
||||
# Based on data from an EPR that has 4 windows
|
||||
windows_case_1 = recommendation_utils.estimate_windows(
|
||||
property_type="Flat",
|
||||
built_form="Semi-Detached",
|
||||
construction_age_band="England and Wales: 1976-1982",
|
||||
floor_area=37,
|
||||
number_habitable_rooms=2,
|
||||
extension_count=0,
|
||||
)
|
||||
|
||||
assert windows_case_1 == 4, f"Expected 4 windows, got {windows_case_1}"
|
||||
|
||||
# Based on data from an EPR that has 7 winows, however two of the windows were very small, having areas of
|
||||
# 0.21m^2 and 0.3m^2 respectively. We see 6 as a reasonable estimate for the number of windows
|
||||
windows_case_2 = recommendation_utils.estimate_windows(
|
||||
property_type="House",
|
||||
built_form="Mid-Terrace",
|
||||
construction_age_band="England and Wales: 1950-1966",
|
||||
floor_area=69,
|
||||
number_habitable_rooms=4,
|
||||
extension_count=0,
|
||||
)
|
||||
|
||||
assert windows_case_2 == 6, f"Expected 6 windows, got {windows_case_2}"
|
||||
|
||||
# Based on data from an EPR on a bungalow, that has 6 windows. Two of the windows are small, both have a 0.4m^2 area
|
||||
# and so 5 windows is an acceptable estimate
|
||||
windows_case_3 = recommendation_utils.estimate_windows(
|
||||
property_type="Bungalow",
|
||||
built_form="Mid-Terrace",
|
||||
construction_age_band="England and Wales: 1967-1975",
|
||||
floor_area=56,
|
||||
number_habitable_rooms=3,
|
||||
extension_count=0,
|
||||
)
|
||||
|
||||
assert windows_case_3 == 5, f"Expected 5 windows, got {windows_case_3}"
|
||||
|
||||
# Based on data from an EPR on a end terrace house that has 8 windows. One of the windows is very small, with an
|
||||
# area of 0.25 m^2 and so 7 windows is an acceptable estimate
|
||||
windows_case_4 = recommendation_utils.estimate_windows(
|
||||
property_type="House",
|
||||
built_form="End-Terrace",
|
||||
construction_age_band="England and Wales: 1967-1975",
|
||||
floor_area=77.28,
|
||||
number_habitable_rooms=4,
|
||||
extension_count=0,
|
||||
)
|
||||
|
||||
assert windows_case_4 == 7, f"Expected 7 windows, got {windows_case_4}"
|
||||
|
||||
# Based on data from an EPR on a Semi-detatched house that has 11 windows based on the associated condition report
|
||||
# Right now, we estimate 12 windows for this property
|
||||
windows_case_5 = recommendation_utils.estimate_windows(
|
||||
property_type="House",
|
||||
built_form="Semi-Detached",
|
||||
construction_age_band="England and Wales: 1950-1966",
|
||||
floor_area=88.4,
|
||||
number_habitable_rooms=5,
|
||||
extension_count=0,
|
||||
)
|
||||
|
||||
assert windows_case_5 == 12, f"Expected 12 windows, got {windows_case_5}"
|
||||
|
||||
# Based on Khalim's flat which has 3 windows. There is no construction age band on the EPC. The windows are large
|
||||
# so an estimate of 5 windows is a reasonable estimate
|
||||
windows_case_6 = recommendation_utils.estimate_windows(
|
||||
property_type="Flat",
|
||||
built_form="",
|
||||
construction_age_band="",
|
||||
floor_area=100,
|
||||
number_habitable_rooms=3,
|
||||
extension_count=0,
|
||||
)
|
||||
|
||||
assert windows_case_6 == 5, f"Expected 5 windows, got {windows_case_6}"
|
||||
|
||||
# Based on an EPR semi detatched house though we don't have the exact number of windows. We estimate 10
|
||||
windows_case_7 = recommendation_utils.estimate_windows(
|
||||
property_type="House",
|
||||
built_form="Semi-Detached",
|
||||
construction_age_band="England and Wales: 1967-1975",
|
||||
floor_area=85,
|
||||
number_habitable_rooms=4,
|
||||
extension_count=0,
|
||||
)
|
||||
|
||||
assert windows_case_7 == 10, f"Expected 10 windows, got {windows_case_7}"
|
||||
|
||||
# Base on Khalim's parents flat
|
||||
windows_case_8 = recommendation_utils.estimate_windows(
|
||||
property_type="Flat",
|
||||
built_form="End-Terrace",
|
||||
construction_age_band="",
|
||||
floor_area=50,
|
||||
number_habitable_rooms=3,
|
||||
extension_count=0,
|
||||
)
|
||||
|
||||
assert windows_case_8 == 5, f"Expected 5 windows, got {windows_case_8}"
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
from backend.Property import Property
|
||||
from unittest.mock import Mock
|
||||
from recommendations.RoofRecommendations import RoofRecommendations
|
||||
from recommendations.tests.test_data.materials import materials
|
||||
|
||||
|
|
@ -7,7 +6,7 @@ from recommendations.tests.test_data.materials import materials
|
|||
class TestRoofRecommendations:
|
||||
|
||||
def test_loft_insulation_recommendation_no_insulation(self):
|
||||
property_instance = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
|
||||
property_instance = Property(id=0, address="fake", postcode="fake")
|
||||
property_instance.age_band = "F"
|
||||
property_instance.insulation_floor_area = 100
|
||||
property_instance.roof = {
|
||||
|
|
@ -32,7 +31,7 @@ class TestRoofRecommendations:
|
|||
assert len(roof_recommender.recommendations)
|
||||
|
||||
def test_loft_insulation_recommendation_50mm_insulation(self):
|
||||
property_instance2 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
|
||||
property_instance2 = Property(id=0, address="fake", postcode="fake")
|
||||
property_instance2.age_band = "F"
|
||||
property_instance2.insulation_floor_area = 100
|
||||
property_instance2.roof = {
|
||||
|
|
@ -54,11 +53,11 @@ class TestRoofRecommendations:
|
|||
|
||||
assert len(roof_recommender2.recommendations) == 1
|
||||
|
||||
assert roof_recommender2.recommendations[0]["total"] == 1310.56464
|
||||
assert roof_recommender2.recommendations[0]["total"] == 1936.9206000000004
|
||||
assert roof_recommender2.recommendations[0]["new_u_value"] == 0.14
|
||||
assert roof_recommender2.recommendations[0]["starting_u_value"] == 0.68
|
||||
|
||||
property_instance3 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
|
||||
property_instance3 = Property(id=0, address="fake", postcode="fake")
|
||||
property_instance3.age_band = "F"
|
||||
property_instance3.insulation_floor_area = 100
|
||||
property_instance3.roof = {
|
||||
|
|
@ -83,7 +82,7 @@ class TestRoofRecommendations:
|
|||
assert roof_recommender3.recommendations[0]["parts"][0]["depth"] == 270
|
||||
|
||||
def test_loft_insulation_recommendation_150mm_insulation(self):
|
||||
property_instance4 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
|
||||
property_instance4 = Property(id=0, address="fake", postcode="fake")
|
||||
property_instance4.age_band = "F"
|
||||
property_instance4.insulation_floor_area = 100
|
||||
property_instance4.roof = {
|
||||
|
|
@ -105,12 +104,12 @@ class TestRoofRecommendations:
|
|||
|
||||
assert len(roof_recommender4.recommendations) == 4
|
||||
|
||||
assert roof_recommender4.recommendations[0]["total"] == 788.0544
|
||||
assert roof_recommender4.recommendations[0]["total"] == 1128.744
|
||||
assert roof_recommender4.recommendations[0]["new_u_value"] == 0.15
|
||||
assert roof_recommender4.recommendations[0]["starting_u_value"] == 0.3
|
||||
assert roof_recommender4.recommendations[0]["parts"][0]["depth"] == 150
|
||||
|
||||
property_instance5 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
|
||||
property_instance5 = Property(id=0, address="fake", postcode="fake")
|
||||
property_instance5.age_band = "F"
|
||||
property_instance5.insulation_floor_area = 100
|
||||
property_instance5.roof = {
|
||||
|
|
@ -137,7 +136,7 @@ class TestRoofRecommendations:
|
|||
|
||||
def test_loft_insulation_recommendation_270mm_insulation(self):
|
||||
# We shouldn't recommend anything in this case
|
||||
property_instance6 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
|
||||
property_instance6 = Property(id=0, address="fake", postcode="fake")
|
||||
property_instance6.age_band = "F"
|
||||
property_instance6.insulation_floor_area = 100
|
||||
property_instance6.roof = {
|
||||
|
|
@ -278,7 +277,7 @@ class TestRoofRecommendations:
|
|||
# "Insulate your room roof with 270mm of Example room roof insulation"
|
||||
|
||||
def test_flat_no_insulation(self):
|
||||
property_instance11 = Property(id=11, address1="fake", postcode="fake", epc_client=Mock())
|
||||
property_instance11 = Property(id=11, address="fake", postcode="fake")
|
||||
property_instance11.age_band = "D"
|
||||
property_instance11.insulation_floor_area = 33.5
|
||||
property_instance11.perimeter = 24
|
||||
|
|
@ -307,7 +306,7 @@ class TestRoofRecommendations:
|
|||
"Insulate the home's flat roof with 150mm of Ecotherm Eco-Versal General Purpose Insulation Board"
|
||||
|
||||
def test_flat_insulated(self):
|
||||
property_instance12 = Property(id=12, address1="fake", postcode="fake", epc_client=Mock())
|
||||
property_instance12 = Property(id=12, address="fake", postcode="fake")
|
||||
property_instance12.age_band = "D"
|
||||
property_instance12.insulation_floor_area = 40
|
||||
property_instance12.perimeter = 30
|
||||
|
|
@ -331,7 +330,7 @@ class TestRoofRecommendations:
|
|||
assert not roof_recommender12.recommendations
|
||||
|
||||
def test_flat_limited_insulation(self):
|
||||
property_instance13 = Property(id=12, address1="fake", postcode="fake", epc_client=Mock())
|
||||
property_instance13 = Property(id=12, address="fake", postcode="fake")
|
||||
property_instance13.age_band = "D"
|
||||
property_instance13.insulation_floor_area = 40
|
||||
property_instance13.perimeter = 40
|
||||
|
|
@ -363,7 +362,7 @@ class TestRoofRecommendations:
|
|||
"Insulate the home's flat roof with 150mm of Ecotherm Eco-Versal General Purpose Insulation Board"
|
||||
|
||||
def test_property_above(self):
|
||||
property_instance14 = Property(id=0, address1="fake", postcode="fake", epc_client=Mock())
|
||||
property_instance14 = Property(id=0, address="fake", postcode="fake")
|
||||
property_instance14.age_band = "F"
|
||||
property_instance14.insulation_floor_area = 100
|
||||
property_instance14.roof = {
|
||||
|
|
|
|||
79
recommendations/tests/test_solar_pv_recommendations.py
Normal file
79
recommendations/tests/test_solar_pv_recommendations.py
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
import pytest
|
||||
from recommendations.SolarPvRecommendations import SolarPvRecommendations
|
||||
from backend.Property import Property
|
||||
|
||||
|
||||
class TestSolarPvRecommendations:
|
||||
@pytest.fixture
|
||||
def property_instance_invalid_type(self):
|
||||
# Setup the property_instance with an invalid property type
|
||||
property_instance_invalid_type = Property(id=1, address="", postcode="")
|
||||
property_instance_invalid_type.data = {
|
||||
"property-type": "InvalidType", "county": "Broxbourne", "photo-supply": None
|
||||
}
|
||||
property_instance_invalid_type.roof = {"is_flat": False, "is_pitched": False, "is_roof_room": False}
|
||||
return property_instance_invalid_type
|
||||
|
||||
@pytest.fixture
|
||||
def property_instance_invalid_roof(self):
|
||||
# Setup the property_instance with invalid roof type
|
||||
property_instance_invalid_roof = Property(id=1, address="", postcode="")
|
||||
property_instance_invalid_roof.data = {
|
||||
"county": "Huntingdonshire", "property-type": "House", "photo-supply": None
|
||||
}
|
||||
property_instance_invalid_roof.roof = {"is_flat": False, "is_pitched": False, "is_roof_room": False}
|
||||
return property_instance_invalid_roof
|
||||
|
||||
@pytest.fixture
|
||||
def property_instance_has_solar_pv(self):
|
||||
# Setup the property_instance without existing solar pv
|
||||
property_instance_has_solar_pv = Property(id=1, address="", postcode="")
|
||||
property_instance_has_solar_pv.data = {"photo-supply": "40", "county": "Huntingdonshire",
|
||||
"property-type": "House"}
|
||||
property_instance_has_solar_pv.roof = {"is_flat": True}
|
||||
return property_instance_has_solar_pv
|
||||
|
||||
@pytest.fixture
|
||||
def property_instance_valid_all(self):
|
||||
# Setup a valid property_instance that passes all conditions
|
||||
property_instance_valid_all = Property(id=1, address="", postcode="")
|
||||
property_instance_valid_all.solar_pv_roof_area = 20
|
||||
property_instance_valid_all.solar_pv_percentage = 40
|
||||
property_instance_valid_all.data = {"property-type": "House", "photo-supply": None, "county": "Huntingdonshire"}
|
||||
property_instance_valid_all.roof = {"is_flat": True}
|
||||
return property_instance_valid_all
|
||||
|
||||
def test_invalid_property_type(self, property_instance_invalid_type):
|
||||
solar_pv = SolarPvRecommendations(property_instance_invalid_type)
|
||||
solar_pv.recommend()
|
||||
assert not solar_pv.recommendation
|
||||
|
||||
def test_invalid_roof_type(self, property_instance_invalid_roof):
|
||||
solar_pv = SolarPvRecommendations(property_instance_invalid_roof)
|
||||
solar_pv.recommend()
|
||||
assert not solar_pv.recommendation
|
||||
|
||||
def test_existing_solar_pv(self, property_instance_has_solar_pv):
|
||||
solar_pv = SolarPvRecommendations(property_instance_has_solar_pv)
|
||||
solar_pv.recommend()
|
||||
assert not solar_pv.recommendation
|
||||
|
||||
def test_valid_all_conditions(self, property_instance_valid_all):
|
||||
solar_pv = SolarPvRecommendations(property_instance_valid_all)
|
||||
solar_pv.recommend()
|
||||
assert solar_pv.recommendation == [
|
||||
{
|
||||
'parts': [],
|
||||
'type': 'solar_pv',
|
||||
'description': 'Install a 4 kilowatt-peak (kWp) solar photovoltaic (PV) panel system on the roof',
|
||||
'starting_u_value': None,
|
||||
'new_u_value': None,
|
||||
'sap_points': None,
|
||||
'total': 8527.0752,
|
||||
'subtotal': 7105.896,
|
||||
'vat': 1421.1791999999996,
|
||||
'labour_hours': 72,
|
||||
'labour_days': 2,
|
||||
'photo_supply': 4000
|
||||
}
|
||||
]
|
||||
|
|
@ -1,5 +1,4 @@
|
|||
from backend.Property import Property
|
||||
from unittest.mock import Mock
|
||||
from recommendations.VentilationRecommendations import VentilationRecommendations
|
||||
from recommendations.tests.test_data.materials import materials
|
||||
|
||||
|
|
@ -7,7 +6,7 @@ from recommendations.tests.test_data.materials import materials
|
|||
class TestVentilationRecommendations:
|
||||
|
||||
def test_natural_ventilation(self):
|
||||
input_property1 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
|
||||
input_property1 = Property(id=1, postcode="F4k3 6", address="623 fake street")
|
||||
input_property1.data = {"mechanical-ventilation": "natural"}
|
||||
|
||||
recommender = VentilationRecommendations(
|
||||
|
|
@ -28,7 +27,7 @@ class TestVentilationRecommendations:
|
|||
assert recommender.recommendation[0]["parts"][0]["quantity"] == 2
|
||||
|
||||
def test_missing_ventilation(self):
|
||||
input_property2 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
|
||||
input_property2 = Property(id=1, postcode="F4k3 6", address="623 fake street")
|
||||
input_property2.data = {"mechanical-ventilation": None}
|
||||
|
||||
recommender2 = VentilationRecommendations(
|
||||
|
|
@ -49,7 +48,7 @@ class TestVentilationRecommendations:
|
|||
assert recommender2.recommendation[0]["parts"][0]["quantity"] == 2
|
||||
|
||||
def test_nodata_ventilation(self):
|
||||
input_property3 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
|
||||
input_property3 = Property(id=1, postcode="F4k3 6", address="623 fake street")
|
||||
input_property3.data = {"mechanical-ventilation": "NO DATA!!"}
|
||||
|
||||
recommender3 = VentilationRecommendations(
|
||||
|
|
@ -70,7 +69,7 @@ class TestVentilationRecommendations:
|
|||
assert recommender3.recommendation[0]["parts"][0]["quantity"] == 2
|
||||
|
||||
def test_existing_ventilation_1(self):
|
||||
input_property4 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
|
||||
input_property4 = Property(id=1, postcode="F4k3 6", address="623 fake street")
|
||||
input_property4.data = {"mechanical-ventilation": 'mechanical, extract only'}
|
||||
|
||||
recommender4 = VentilationRecommendations(
|
||||
|
|
@ -86,7 +85,7 @@ class TestVentilationRecommendations:
|
|||
assert recommender4.has_ventilaion
|
||||
|
||||
def test_existing_ventilation_2(self):
|
||||
input_property5 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
|
||||
input_property5 = Property(id=1, postcode="F4k3 6", address="623 fake street")
|
||||
input_property5.data = {"mechanical-ventilation": 'mechanical, supply and extract'}
|
||||
|
||||
recommender5 = VentilationRecommendations(
|
||||
|
|
|
|||
|
|
@ -231,7 +231,7 @@ class TestWallRecommendationsBase:
|
|||
class TestCavityWallRecommensations:
|
||||
|
||||
def test_fill_empty_cavity(self):
|
||||
input_property = Property(id=1, postcode="F4k3", address1="123 fake street", epc_client=Mock())
|
||||
input_property = Property(id=1, postcode="F4k3", address="123 fake street")
|
||||
input_property.walls = {
|
||||
'original_description': 'Cavity wall, as built, no insulation (assumed)',
|
||||
'clean_description': 'Cavity wall, as built, no insulation',
|
||||
|
|
@ -265,7 +265,7 @@ class TestCavityWallRecommensations:
|
|||
assert np.isclose(recommender.recommendations[1]["total"], 2004.6600000000003)
|
||||
|
||||
def test_fill_partial_filled_cavity(self):
|
||||
input_property = Property(id=1, postcode="F4k3", address1="123 fake street", epc_client=Mock())
|
||||
input_property = Property(id=1, postcode="F4k3", address="123 fake street")
|
||||
input_property.walls = {
|
||||
'original_description': 'Cavity wall, as built, partial insulation (assumed)',
|
||||
'clean_description': 'Cavity wall, as built, partial insulation',
|
||||
|
|
@ -299,7 +299,7 @@ class TestCavityWallRecommensations:
|
|||
assert np.isclose(recommender.recommendations[1]["total"], 1999.9350000000002)
|
||||
|
||||
def test_system_built_wall(self):
|
||||
input_property2 = Property(id=1, postcode="F4k3 2", address1="223 fake street", epc_client=Mock())
|
||||
input_property2 = Property(id=1, postcode="F4k3 2", address="223 fake street")
|
||||
input_property2.walls = {
|
||||
'original_description': 'System built, as built, no insulation (assumed)',
|
||||
'clean_description': 'System built, as built, no insulation',
|
||||
|
|
@ -331,22 +331,22 @@ class TestCavityWallRecommensations:
|
|||
assert len(recommender2.recommendations) == 9
|
||||
assert recommender2.estimated_u_value == 1
|
||||
assert np.isclose(recommender2.recommendations[0]["new_u_value"], 0.19)
|
||||
assert np.isclose(recommender2.recommendations[0]["total"], 15899.9616)
|
||||
assert np.isclose(recommender2.recommendations[0]["total"], 16429.960320000002)
|
||||
assert recommender2.recommendations[0]["parts"][0]["type"] == "external_wall_insulation"
|
||||
assert recommender2.recommendations[0]["parts"][0]["depth"] == 100
|
||||
|
||||
assert np.isclose(recommender2.recommendations[8]["new_u_value"], 0.23)
|
||||
assert np.isclose(recommender2.recommendations[8]["total"], 10916.3424)
|
||||
assert np.isclose(recommender2.recommendations[8]["total"], 11292.768)
|
||||
assert recommender2.recommendations[8]["parts"][0]["type"] == "internal_wall_insulation"
|
||||
assert recommender2.recommendations[8]["parts"][0]["depth"] == 72.5
|
||||
|
||||
assert np.isclose(recommender2.recommendations[6]["new_u_value"], 0.29)
|
||||
assert np.isclose(recommender2.recommendations[6]["total"], 10621.934399999998)
|
||||
assert np.isclose(recommender2.recommendations[6]["total"], 10988.208)
|
||||
assert recommender2.recommendations[6]["parts"][0]["type"] == "internal_wall_insulation"
|
||||
assert recommender2.recommendations[6]["parts"][0]["depth"] == 52.5
|
||||
|
||||
def test_timber_frame_wall(self):
|
||||
input_property3 = Property(id=1, postcode="F4k3 2", address1="223 fake street", epc_client=Mock())
|
||||
input_property3 = Property(id=1, postcode="F4k3 2", address="223 fake street")
|
||||
input_property3.walls = {
|
||||
'original_description': 'Timber frame, as built, no insulation (assumed)',
|
||||
'clean_description': 'Timber frame, as built, no insulation',
|
||||
|
|
@ -378,17 +378,17 @@ class TestCavityWallRecommensations:
|
|||
assert len(recommender3.recommendations) == 6
|
||||
assert recommender3.estimated_u_value == 1.9
|
||||
assert np.isclose(recommender3.recommendations[0]["new_u_value"], 0.2)
|
||||
assert np.isclose(recommender3.recommendations[0]["total"], 13117.46832)
|
||||
assert np.isclose(recommender3.recommendations[0]["total"], 13554.717263999999)
|
||||
assert recommender3.recommendations[0]["parts"][0]["type"] == "external_wall_insulation"
|
||||
assert recommender3.recommendations[0]["parts"][0]["depth"] == 100.0
|
||||
|
||||
assert np.isclose(recommender3.recommendations[1]["new_u_value"], 0.23)
|
||||
assert np.isclose(recommender3.recommendations[1]["total"], 34070.50944)
|
||||
assert np.isclose(recommender3.recommendations[1]["total"], 35206.19308800001)
|
||||
assert recommender3.recommendations[1]["parts"][0]["type"] == "external_wall_insulation"
|
||||
assert recommender3.recommendations[1]["parts"][0]["depth"] == 150.0
|
||||
|
||||
def test_granite_or_whinstone_wall(self):
|
||||
input_property4 = Property(id=1, postcode="F4k3 2", address1="223 fake street", epc_client=Mock())
|
||||
input_property4 = Property(id=1, postcode="F4k3 2", address="223 fake street")
|
||||
input_property4.walls = {
|
||||
'original_description': 'Granite or whinstone, as built, no insulation (assumed)',
|
||||
'clean_description': 'Granite or whinstone, as built, no insulation',
|
||||
|
|
@ -420,17 +420,17 @@ class TestCavityWallRecommensations:
|
|||
assert len(recommender4.recommendations) == 6
|
||||
assert recommender4.estimated_u_value == 2.3
|
||||
assert np.isclose(recommender4.recommendations[0]["new_u_value"], 0.21)
|
||||
assert np.isclose(recommender4.recommendations[0]["total"], 28562.514352)
|
||||
assert np.isclose(recommender4.recommendations[0]["total"], 29547.42864)
|
||||
assert recommender4.recommendations[0]["parts"][0]["type"] == "external_wall_insulation"
|
||||
assert recommender4.recommendations[0]["parts"][0]["depth"] == 100
|
||||
|
||||
assert np.isclose(recommender4.recommendations[1]["new_u_value"], 0.23)
|
||||
assert np.isclose(recommender4.recommendations[1]["total"], 74186.52678400002)
|
||||
assert np.isclose(recommender4.recommendations[1]["total"], 76744.68288000001)
|
||||
assert recommender4.recommendations[1]["parts"][0]["type"] == "external_wall_insulation"
|
||||
assert recommender4.recommendations[1]["parts"][0]["depth"] == 150
|
||||
|
||||
def test_cob_wall(self):
|
||||
input_property5 = Property(id=1, postcode="F4k3 2", address1="223 fake street", epc_client=Mock())
|
||||
input_property5 = Property(id=1, postcode="F4k3 2", address="223 fake street")
|
||||
input_property5.walls = {
|
||||
'original_description': 'Cob, as built',
|
||||
'clean_description': 'Cob, as built',
|
||||
|
|
@ -462,17 +462,17 @@ class TestCavityWallRecommensations:
|
|||
assert len(recommender5.recommendations) == 5
|
||||
assert recommender5.estimated_u_value == 0.8
|
||||
assert np.isclose(recommender5.recommendations[0]["new_u_value"], 0.29)
|
||||
assert np.isclose(recommender5.recommendations[0]["total"], 8665.040384000002)
|
||||
assert np.isclose(recommender5.recommendations[0]["total"], 8963.834880000002)
|
||||
assert recommender5.recommendations[0]["parts"][0]["type"] == "external_wall_insulation"
|
||||
assert recommender5.recommendations[0]["parts"][0]["depth"] == 50
|
||||
|
||||
assert np.isclose(recommender5.recommendations[3]["new_u_value"], 0.26)
|
||||
assert np.isclose(recommender5.recommendations[3]["total"], 20078.742992)
|
||||
assert np.isclose(recommender5.recommendations[3]["total"], 20771.11344)
|
||||
assert recommender5.recommendations[3]["parts"][0]["type"] == "internal_wall_insulation"
|
||||
assert recommender5.recommendations[3]["parts"][0]["depth"] == 100
|
||||
|
||||
def test_sandstone_or_limestone_wall(self):
|
||||
input_property6 = Property(id=1, postcode="F4k3 6", address1="623 fake street", epc_client=Mock())
|
||||
input_property6 = Property(id=1, postcode="F4k3 6", address="623 fake street")
|
||||
input_property6.walls = {
|
||||
'original_description': 'Sandstone or limestone, as built, no insulation (assumed)',
|
||||
'clean_description': 'Sandstone or limestone, as built, no insulation',
|
||||
|
|
@ -504,16 +504,16 @@ class TestCavityWallRecommensations:
|
|||
assert len(recommender6.recommendations) == 9
|
||||
assert recommender6.estimated_u_value == 1
|
||||
assert np.isclose(recommender6.recommendations[0]["new_u_value"], 0.19)
|
||||
assert np.isclose(recommender6.recommendations[0]["total"], 44829.0584)
|
||||
assert np.isclose(recommender6.recommendations[0]["total"], 46374.888000000006)
|
||||
assert recommender6.recommendations[0]["parts"][0]["type"] == "external_wall_insulation"
|
||||
assert recommender6.recommendations[0]["parts"][0]["depth"] == 100
|
||||
|
||||
assert np.isclose(recommender6.recommendations[2]["new_u_value"], 0.21)
|
||||
assert np.isclose(recommender6.recommendations[2]["total"], 116436.25280000002)
|
||||
assert np.isclose(recommender6.recommendations[2]["total"], 120451.29600000002)
|
||||
assert recommender6.recommendations[2]["parts"][0]["type"] == "external_wall_insulation"
|
||||
assert recommender6.recommendations[2]["parts"][0]["depth"] == 150
|
||||
|
||||
assert np.isclose(recommender6.recommendations[4]["new_u_value"], 0.28)
|
||||
assert np.isclose(recommender6.recommendations[4]["total"], 91267.0136)
|
||||
assert np.isclose(recommender6.recommendations[4]["total"], 94414.15199999999)
|
||||
assert recommender6.recommendations[4]["parts"][0]["type"] == "internal_wall_insulation"
|
||||
assert recommender6.recommendations[4]["parts"][0]["depth"] == 100
|
||||
|
|
|
|||
252
recommendations/tests/test_window_recommendations.py
Normal file
252
recommendations/tests/test_window_recommendations.py
Normal file
|
|
@ -0,0 +1,252 @@
|
|||
from recommendations.WindowsRecommendations import WindowsRecommendations
|
||||
from backend.Property import Property
|
||||
from recommendations.tests.test_data.materials import materials
|
||||
|
||||
|
||||
class TestWindowRecommendations:
|
||||
|
||||
def test_fully_single_glazed(self):
|
||||
"""
|
||||
For this property, we expect all windows to be single glazed and should recommend full double glazing
|
||||
:return:
|
||||
"""
|
||||
|
||||
property_1 = Property(
|
||||
id=1,
|
||||
postcode='1',
|
||||
address='1',
|
||||
data={
|
||||
"county": "Wychavon",
|
||||
"multi-glaze-proportion": 0,
|
||||
"uprn": 0
|
||||
}
|
||||
)
|
||||
property_1.windows = {
|
||||
'original_description': 'Single glazed', 'has_glazing': False, 'glazing_coverage': 'full',
|
||||
'glazing_type': 'single',
|
||||
'no_data': False
|
||||
}
|
||||
property_1.number_of_windows = 7
|
||||
|
||||
recommender = WindowsRecommendations(property_instance=property_1, materials=materials)
|
||||
|
||||
assert not recommender.recommendation
|
||||
|
||||
recommender.recommend()
|
||||
|
||||
assert recommender.recommendation == [
|
||||
{'parts': [], 'type': 'windows_glazing', 'description': 'Install double glazing to all windows',
|
||||
'starting_u_value': None, 'new_u_value': None, 'sap_points': None, 'total': 5721.943248,
|
||||
'subtotal': 4768.28604, 'vat': 953.6572080000001, 'contingency': 340.59186, 'preliminaries': 340.59186,
|
||||
'material': 1275.75, 'profit': 681.18372, 'labour_hours': 45.5, 'labour_cost': 994.8624,
|
||||
'labour_days': 2.84375, 'is_secondary_glazing': False}]
|
||||
|
||||
def test_partial_double_glazed(self):
|
||||
"""
|
||||
For this property, the double glazing is describes as partial, therefore we recommend completion of
|
||||
double glazing
|
||||
:return:
|
||||
"""
|
||||
|
||||
property_2 = Property(
|
||||
id=1,
|
||||
postcode='1',
|
||||
address='1',
|
||||
data={
|
||||
"county": "Wychavon",
|
||||
"multi-glaze-proportion": 33,
|
||||
"uprn": 0
|
||||
}
|
||||
)
|
||||
property_2.windows = {'original_description': 'Mostly double glazing', 'has_glazing': True,
|
||||
'glazing_coverage': 'most',
|
||||
'glazing_type': 'double', 'no_data': False}
|
||||
property_2.number_of_windows = 7
|
||||
|
||||
recommender2 = WindowsRecommendations(property_instance=property_2, materials=materials)
|
||||
|
||||
assert not recommender2.recommendation
|
||||
|
||||
recommender2.recommend()
|
||||
|
||||
assert recommender2.recommendation == [
|
||||
{'parts': [], 'type': 'windows_glazing', 'description': 'Install double glazing to the remaining windows',
|
||||
'starting_u_value': None, 'new_u_value': None, 'sap_points': None, 'total': 4087.10232,
|
||||
'subtotal': 3405.9186, 'vat': 681.18372, 'contingency': 243.2799, 'preliminaries': 243.2799,
|
||||
'material': 911.25, 'profit': 486.5598, 'labour_hours': 32.5, 'labour_cost': 710.6160000000001,
|
||||
'labour_days': 2.03125, 'is_secondary_glazing': False}]
|
||||
|
||||
def test_fully_double_glazed(self):
|
||||
"""
|
||||
This property has full double glazing so we shouldn't recommend anything
|
||||
:return:
|
||||
"""
|
||||
|
||||
property_3 = Property(
|
||||
id=1,
|
||||
postcode='1',
|
||||
address='1',
|
||||
data={
|
||||
"county": "Wychavon",
|
||||
"multi-glaze-proportion": 80,
|
||||
"uprn": 0
|
||||
}
|
||||
)
|
||||
property_3.windows = {'original_description': 'Fully double glazed', 'has_glazing': True,
|
||||
'glazing_coverage': 'full',
|
||||
'glazing_type': 'double', 'no_data': False}
|
||||
property_3.number_of_windows = 7
|
||||
|
||||
recommender3 = WindowsRecommendations(property_instance=property_3, materials=materials)
|
||||
|
||||
assert not recommender3.recommendation
|
||||
|
||||
recommender3.recommend()
|
||||
|
||||
assert not recommender3.recommendation
|
||||
|
||||
def test_fully_secondary_glazed(self):
|
||||
property_4 = Property(
|
||||
id=1,
|
||||
postcode='1',
|
||||
address='1',
|
||||
data={
|
||||
"county": "Wychavon",
|
||||
"multi-glaze-proportion": 100,
|
||||
"uprn": 0
|
||||
}
|
||||
)
|
||||
property_4.windows = {'original_description': 'Full secondary glazing', 'has_glazing': True,
|
||||
'glazing_coverage': 'full',
|
||||
'glazing_type': 'secondary', 'no_data': False}
|
||||
property_4.number_of_windows = 7
|
||||
|
||||
recommender4 = WindowsRecommendations(property_instance=property_4, materials=materials)
|
||||
|
||||
assert not recommender4.recommendation
|
||||
|
||||
recommender4.recommend()
|
||||
|
||||
assert not recommender4.recommendation
|
||||
|
||||
def test_partial_secondary_glazing(self):
|
||||
property_5 = Property(
|
||||
id=1,
|
||||
postcode='1',
|
||||
address='1',
|
||||
data={
|
||||
"county": "Wychavon",
|
||||
"multi-glaze-proportion": 50,
|
||||
"uprn": 0
|
||||
}
|
||||
)
|
||||
property_5.windows = {'original_description': 'Partial secondary glazing', 'has_glazing': True,
|
||||
'glazing_coverage': 'partial',
|
||||
'glazing_type': 'secondary', 'no_data': False}
|
||||
property_5.number_of_windows = 7
|
||||
|
||||
recommender5 = WindowsRecommendations(property_instance=property_5, materials=materials)
|
||||
|
||||
assert not recommender5.recommendation
|
||||
|
||||
recommender5.recommend()
|
||||
|
||||
assert recommender5.recommendation == [
|
||||
{'parts': [], 'type': 'windows_glazing',
|
||||
'description': 'Install secondary glazing to the remaining windows',
|
||||
'starting_u_value': None, 'new_u_value': None, 'sap_points': None, 'total': 1089.893952,
|
||||
'subtotal': 908.24496, 'vat': 181.64899200000002, 'contingency': 64.87464, 'preliminaries': 64.87464,
|
||||
'material': 729.0, 'profit': 129.74928, 'labour_hours': 13.0, 'labour_cost': 568.4928,
|
||||
'labour_days': 0.8125, 'is_secondary_glazing': True}]
|
||||
|
||||
def test_single_glazed_restricted_measures(self):
|
||||
property_6 = Property(
|
||||
id=1,
|
||||
postcode='1',
|
||||
address='1',
|
||||
data={
|
||||
"county": "Wychavon",
|
||||
"multi-glaze-proportion": 0,
|
||||
"uprn": 0
|
||||
}
|
||||
)
|
||||
property_6.windows = {'original_description': 'Single glazed', 'has_glazing': False, 'glazing_coverage': None,
|
||||
'glazing_type': 'single',
|
||||
'no_data': False}
|
||||
property_6.number_of_windows = 7
|
||||
property_6.restricted_measures = True
|
||||
property_6.is_heritage = True
|
||||
|
||||
recommender6 = WindowsRecommendations(property_instance=property_6, materials=materials)
|
||||
|
||||
assert not recommender6.recommendation
|
||||
|
||||
recommender6.recommend()
|
||||
|
||||
assert recommender6.recommendation == [
|
||||
{'parts': [], 'type': 'windows_glazing',
|
||||
'description': 'Install secondary glazing to all windows. Secondary '
|
||||
'glazing recommended due to herigate building status',
|
||||
'starting_u_value': None, 'new_u_value': None, 'sap_points': None,
|
||||
'total': 1907.314416, 'subtotal': 1589.42868, 'vat': 317.885736,
|
||||
'contingency': 113.53062, 'preliminaries': 113.53062,
|
||||
'material': 1275.75, 'profit': 227.06124, 'labour_hours': 22.75,
|
||||
'labour_cost': 994.8624, 'labour_days': 1.421875, 'is_secondary_glazing': True}
|
||||
]
|
||||
|
||||
def test_full_triple_glazed(self):
|
||||
property_7 = Property(
|
||||
id=1,
|
||||
postcode='1',
|
||||
address='1',
|
||||
data={
|
||||
"county": "Wychavon",
|
||||
"multi-glaze-proportion": 100,
|
||||
"uprn": 0
|
||||
}
|
||||
)
|
||||
property_7.windows = {'original_description': 'Fully triple glazed', 'has_glazing': True,
|
||||
'glazing_coverage': 'full',
|
||||
'glazing_type': 'triple', 'no_data': False}
|
||||
property_7.number_of_windows = 7
|
||||
|
||||
recommender7 = WindowsRecommendations(property_instance=property_7, materials=materials)
|
||||
|
||||
assert not recommender7.recommendation
|
||||
|
||||
recommender7.recommend()
|
||||
|
||||
assert not recommender7.recommendation
|
||||
|
||||
def test_partial_triple_glazed(self):
|
||||
"""
|
||||
We should just recommend double glazing to the remaining windows, since it's a cheaper option
|
||||
"""
|
||||
|
||||
property_8 = Property(
|
||||
id=1,
|
||||
postcode='1',
|
||||
address='1',
|
||||
data={
|
||||
"county": "Wychavon",
|
||||
"multi-glaze-proportion": 80,
|
||||
"uprn": 1
|
||||
}
|
||||
)
|
||||
property_8.windows = {'original_description': 'Mostly triple glazing', 'has_glazing': True,
|
||||
'glazing_coverage': 'most',
|
||||
'glazing_type': 'triple', 'no_data': False}
|
||||
property_8.number_of_windows = 7
|
||||
|
||||
recommender8 = WindowsRecommendations(property_instance=property_8, materials=materials)
|
||||
|
||||
assert not recommender8.recommendation
|
||||
|
||||
recommender8.recommend()
|
||||
|
||||
assert recommender8.recommendation == [
|
||||
{'parts': [], 'type': 'windows_glazing', 'description': 'Install double glazing to the remaining windows',
|
||||
'starting_u_value': None, 'new_u_value': None, 'sap_points': None, 'total': 1634.840928,
|
||||
'subtotal': 1362.36744, 'vat': 272.47348800000003, 'contingency': 97.31196, 'preliminaries': 97.31196,
|
||||
'material': 364.5, 'profit': 194.62392, 'labour_hours': 13.0, 'labour_cost': 284.2464,
|
||||
'labour_days': 0.8125, 'is_secondary_glazing': False}]
|
||||
Loading…
Add table
Reference in a new issue