ignore env

This commit is contained in:
Michael Duong 2024-05-15 09:03:24 +00:00
commit e5a00e7028
70 changed files with 13133 additions and 1221 deletions

2
.idea/.gitignore generated vendored
View file

@ -1,3 +1,5 @@
# Default ignored files
/shelf/
/workspace.xml
# GitHub Copilot persisted chat sessions
/copilot/chatSessions

2
.idea/Model.iml generated
View file

@ -7,7 +7,7 @@
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
</content>
<orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyNamespacePackagesService">

2
.idea/misc.xml generated
View file

@ -3,7 +3,7 @@
<component name="Black">
<option name="sdkName" value="Python 3.10 (backend)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
<component name="PythonCompatibilityInspectionAdvertiser">
<option name="version" value="3" />
</component>

View file

@ -1,4 +1,5 @@
import os
import ast
from itertools import groupby
import pandas as pd
@ -11,11 +12,17 @@ from utils.s3 import read_dataframe_from_s3_parquet
from etl.epc.settings import DATA_ANOMALY_MATCHES
from recommendations.rdsap_tables import FLOOR_LEVEL_MAP
from recommendations.recommendation_utils import (
estimate_perimeter, get_wall_type, estimate_external_wall_area, esimtate_pitched_roof_area, estimate_windows
estimate_perimeter,
get_wall_type,
estimate_external_wall_area,
esimtate_pitched_roof_area,
estimate_windows,
)
ENVIRONMENT = os.environ.get('ENVIRONMENT', 'dev')
DATA_BUCKET = os.environ.get('DATA_BUCKET', 'retrofit-data-dev' if ENVIRONMENT == 'dev' else None)
ENVIRONMENT = os.environ.get("ENVIRONMENT", "dev")
DATA_BUCKET = os.environ.get(
"DATA_BUCKET", "retrofit-data-dev" if ENVIRONMENT == "dev" else None
)
logger = setup_logger()
@ -30,7 +37,7 @@ class Property:
"roof-description": "roof",
"walls-description": "walls",
"windows-description": "windows",
"lighting-description": "lighting"
"lighting-description": "lighting",
}
floor = None
@ -49,7 +56,14 @@ class Property:
DATA_ANOMALY_MATCHES = DATA_ANOMALY_MATCHES
def __init__(self, id, postcode, address, epc_record):
# Surplus information, that can be provided as optional inputs, by a customer
n_bathrooms = None
n_bedrooms = None
def __init__(
self, id, postcode, address, epc_record, already_installed=None, non_invasive_recommendations=None,
**kwargs
):
self.epc_record = epc_record
@ -57,9 +71,20 @@ class Property:
self.address = address
self.postcode = postcode
self.data = {k.replace("_", "-"): v for k, v in epc_record.get("prepared_epc").items()}
self.data = {
k.replace("_", "-"): v for k, v in epc_record.get("prepared_epc").items()
}
self.old_data = epc_record.get("old_data")
self.property_dimensions = None
# This is a list of measures that have already been installed in the property, typically found as a result
# of the non-invasive surveys. We reflect that this has been installed in the recommendations, but remove the
# cost and instead, provide a message that the measure has already been installed
self.already_installed = ast.literal_eval(already_installed['already_installed']) if already_installed else []
self.non_invasive_recommendations = (
ast.literal_eval(non_invasive_recommendations['recommendations']) if
non_invasive_recommendations else []
)
self.uprn = epc_record.get("uprn")
self.full_sap_epc = epc_record.get("full_sap_epc")
@ -92,7 +117,9 @@ class Property:
"wind_turbine": epc_record.prepared_epc.get("wind_turbine_count"),
}
self.number_of_open_fireplaces = {
"number_of_open_fireplaces": epc_record.prepared_epc.get("number_open_fireplaces"),
"number_of_open_fireplaces": epc_record.prepared_epc.get(
"number_open_fireplaces"
),
}
self.number_of_extensions = {
"number_of_extensions": epc_record.prepared_epc.get("extension_count"),
@ -105,22 +132,55 @@ class Property:
"length": epc_record.prepared_epc.get("unheated_corridor_length"),
"heat_loss_corridor_boolean": epc_record.get("heat_loss_corridor_bool"),
}
self.mains_gas = epc_record.prepared_epc.get('mains_gas_flag')
self.floor_height = epc_record.prepared_epc.get('floor_height')
self.mains_gas = epc_record.prepared_epc.get("mains_gas_flag")
self.floor_height = epc_record.prepared_epc.get("floor_height")
self.insulation_wall_area = None
self.floor_area = epc_record.prepared_epc.get('total_floor_area')
self.floor_area = epc_record.prepared_epc.get("total_floor_area")
self.pitched_roof_area = None
self.insulation_floor_area = None
self.number_lighting_outlets = epc_record.prepared_epc.get("fixed_lighting_outlets_count")
self.number_lighting_outlets = epc_record.prepared_epc.get(
"fixed_lighting_outlets_count"
)
self.floor_level = None
self.number_of_windows = None
self.solar_pv_percentage = None
self.current_adjusted_energy = None
self.expected_adjusted_energy = None
self.current_energy_bill = None
self.expected_energy_bill = None
self.recommendations_scoring_data = []
self.parse_kwargs(kwargs)
@classmethod
def extract_kwargs(cls, kwargs):
"""
This method is to be used in the router, to extract the kwargs from the request and prevent any errors such as
non-integer values, or inputs that clash with the __init__ method of this class
:param kwargs:
:return:
"""
n_bathrooms = kwargs.get("n_bathrooms", None)
if n_bathrooms is not None:
# We add on a small value to ensure that the number of bathrooms is rounded up, in case the value is 0.5
n_bathrooms = int(round(float(n_bathrooms) + 1e-5))
n_bedrooms = kwargs.get("n_bedrooms", None)
if n_bedrooms is not None:
n_bedrooms = int(round(float(n_bedrooms) + 1e-5))
return {
"n_bathrooms": n_bathrooms,
"n_bedrooms": n_bedrooms,
}
def parse_kwargs(self, kwargs):
# We extract the elements from kwargs that we recognise. Anything additional is ignored
self.n_bathrooms = kwargs.get("n_bathrooms", None)
self.n_bedrooms = kwargs.get("n_bedrooms", None)
def create_base_difference_epc_record(self, cleaned_lookup: dict):
"""
Creates a EPCDifferenceRecord object, which is used to store the difference between the current and
@ -131,18 +191,30 @@ class Property:
# difference_record = self.epc_record - self.epc_record
# TODO: change these lower and replace in the settings file
print("CHANGE THE LATEST FIELD TO REMOVE NUMBER HABITABLE ROOMS IF WE WANT TO USE STARTING/ENDING")
print(
"CHANGE THE LATEST FIELD TO REMOVE NUMBER HABITABLE ROOMS IF WE WANT TO USE STARTING/ENDING"
)
fixed_data_col_names = MANDATORY_FIXED_FEATURES + LATEST_FIELD
print("NEED TO CHANGE THE DASH TO LOWER CASE")
fixed_data_col_names = [x.lower().replace("_", "-") for x in fixed_data_col_names]
fixed_data_col_names = [
x.lower().replace("_", "-") for x in fixed_data_col_names
]
fixed_data = {k.replace("-", "_"): v for k, v in self.data.items() if k in fixed_data_col_names}
fixed_data = {
k.replace("-", "_"): v
for k, v in self.data.items()
if k in fixed_data_col_names
}
# difference_record.append_fixed_data(fixed_data)
difference_record = self.epc_record.create_EPCDifferenceRecord(self.epc_record, fixed_data)
difference_record = self.epc_record.create_EPCDifferenceRecord(
self.epc_record, fixed_data
)
self.base_difference_record = TrainingDataset(datasets=[difference_record], cleaned_lookup=cleaned_lookup)
self.base_difference_record = TrainingDataset(
datasets=[difference_record], cleaned_lookup=cleaned_lookup
)
# TODO: adjust the base difference record with the previously calculated u values + features
# estimated_perimeter is different to the perimeter in the epc record
@ -150,8 +222,7 @@ class Property:
# self.base_difference_record.df
def adjust_difference_record_with_recommendations(
self, property_recommendations,
property_representative_recommendations
self, property_recommendations, property_representative_recommendations
):
"""
This method will adjust the difference record, based on the recommendations made for the property
@ -163,13 +234,23 @@ class Property:
"""
self.recommendations_scoring_data = []
phases = sorted([r[0]["phase"] for r in property_recommendations if r[0]["phase"] is not None])
phases = sorted(
[
r[0]["phase"]
for r in property_recommendations
if r[0]["phase"] is not None
]
)
for phase in phases:
property_recommendations_by_phase = [r for r in property_recommendations if r[0]["phase"] == phase][0]
property_recommendations_by_phase = [
r for r in property_recommendations if r[0]["phase"] == phase
][0]
previous_phases = [p for p in phases if p < phase]
previous_phase_representatives = [
r for r in property_representative_recommendations if r["phase"] in previous_phases
r
for r in property_representative_recommendations
if r["phase"] in previous_phases
]
# For solid wall insulation, we will actually have 2 representative recommendations, since we consider
# both internal and external wall insulation as possible measures. We will use the representative that
@ -177,15 +258,20 @@ class Property:
# Take the representative with the lowest efficiency, by phase
# To be safe, we sort by phase
previous_phase_representatives = sorted(previous_phase_representatives, key=lambda x: x['phase'])
previous_phase_representatives = sorted(
previous_phase_representatives, key=lambda x: x["phase"]
)
previous_phase_representatives = [
min(group, key=lambda x: x['efficiency']) for _, group in groupby(
previous_phase_representatives, key=lambda x: x['phase']
min(group, key=lambda x: x["efficiency"])
for _, group in groupby(
previous_phase_representatives, key=lambda x: x["phase"]
)
]
recommendation_record = self.base_difference_record.df.to_dict("records")[0].copy()
recommendation_record = self.base_difference_record.df.to_dict("records")[
0
].copy()
for rec in property_recommendations_by_phase:
# We simulate the impact of the recommendation at this current phase, and all of the prior phases
@ -197,13 +283,18 @@ class Property:
property_id=self.id,
recommendation_record=recommendation_record,
recommendations=previous_phase_representatives + [rec],
primary_recommendation_id=rec["recommendation_id"]
primary_recommendation_id=rec["recommendation_id"],
non_invasive_recommendations=self.non_invasive_recommendations,
)
self.recommendations_scoring_data.append(scoring_dict)
@staticmethod
def create_recommendation_scoring_data(
property_id, recommendation_record, recommendations: list, primary_recommendation_id: int
property_id,
recommendation_record,
recommendations: list,
primary_recommendation_id: int,
non_invasive_recommendations: list = None,
):
"""
This function will iterate through a list of recommendations and apply a simulation for each recommendation
@ -212,13 +303,17 @@ class Property:
:param recommendation_record: The record of the property, which will be updated
:param recommendations: The list of recommendations to apply
:param primary_recommendation_id: The id of the primary recommendation, which is used to identify the record
:param non_invasive_recommendations: The list of non-invasive recommendations
:return: The updated recommendation record
"""
output = recommendation_record.copy()
non_invasive_recommendations = [] if non_invasive_recommendations is None else non_invasive_recommendations
for col in [
"walls_insulation_thickness", "floor_insulation_thickness", "roof_insulation_thickness"
"walls_insulation_thickness",
"floor_insulation_thickness",
"roof_insulation_thickness",
]:
if output[col] is None:
output[col] = "none"
@ -228,14 +323,25 @@ class Property:
# We update the description to indicate it's insulated
if recommendation["type"] in [
"internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation"
"internal_wall_insulation",
"external_wall_insulation",
"cavity_wall_insulation",
]:
# # If we have a non-incasive recommendation that the cavity wall is partially filled, we skip the
# # cavity wall insulation recommendation (since on the EPC, the property will look like how it did
# # before any works)
# if "cavity_surveyed_as_filled_is_partial" in non_invasive_recommendations:
# continue
# The upgrade made here is to the u-value of the walls and the description of the
# insulation thickness
output["walls_thermal_transmittance_ending"] = recommendation["new_u_value"]
output["walls_thermal_transmittance_ending"] = recommendation[
"new_u_value"
]
# Setting the insulation thickness here to above average should be tested further because we
# don't see a high volume of instances for this
output["walls_insulation_thickness_ending"] = "above average"
output["walls_insulation_thickness_ending"] = "average"
output["walls_energy_eff_ending"] = "Good"
# Note: often when the wall is insulatied, the internal/external insulation is not noted so we should
@ -265,10 +371,14 @@ class Property:
# Update description to indicate it's insulate
if recommendation["type"] in [
"solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation"
"solid_floor_insulation",
"suspended_floor_insulation",
"exposed_floor_insulation",
]:
if len(recommendation["parts"]) > 1:
raise NotImplementedError("Have more than 1 floor insulation part - handle this case")
raise NotImplementedError(
"Have more than 1 floor insulation part - handle this case"
)
# output["floor_thermal_transmittance_ending"] = recommendation["new_u_value"]
# We don't really see above average for this in the training data
@ -282,22 +392,43 @@ class Property:
if output["floor_insulation_thickness_ending"] is None:
output["floor_insulation_thickness_ending"] = "none"
if recommendation["type"] in ["loft_insulation", "room_roof_insulation", "flat_roof_insulation"]:
output["roof_thermal_transmittance_ending"] = recommendation["new_u_value"]
if recommendation["type"] in [
"loft_insulation",
"room_roof_insulation",
"flat_roof_insulation",
]:
output["roof_thermal_transmittance_ending"] = recommendation[
"new_u_value"
]
parts = recommendation["parts"]
if len(parts) != 1:
raise ValueError("More than one part for roof insulation - investiage me")
raise ValueError(
"More than one part for roof insulation - investiage me"
)
# This is based on the values we have in the training data
valid_numeric_values = [
12, 25, 50, 75, 100, 150, 200, 250, 270, 300, 350, 400
12,
25,
50,
75,
100,
150,
200,
250,
270,
300,
350,
400,
]
proposed_depth = int(parts[0]["depth"])
if proposed_depth not in valid_numeric_values:
# Take the nearest value for scoring
proposed_depth = min(valid_numeric_values, key=lambda x: abs(x - proposed_depth))
proposed_depth = min(
valid_numeric_values, key=lambda x: abs(x - proposed_depth)
)
output["roof_insulation_thickness_ending"] = str(proposed_depth)
if recommendation["type"] == "loft_insulation":
@ -331,11 +462,17 @@ class Property:
if output["glazing_type_ending"] == "multiple":
pass
elif output["glazing_type_ending"] == "single":
output["glazing_type_ending"] = "secondary" if is_secondary_glazing else "double"
output["glazing_type_ending"] = (
"secondary" if is_secondary_glazing else "double"
)
elif output["glazing_type_ending"] == "double":
output["glazing_type_ending"] = "multiple" if is_secondary_glazing else "double"
output["glazing_type_ending"] = (
"multiple" if is_secondary_glazing else "double"
)
elif output["glazing_type_ending"] == "secondary":
output["glazing_type_ending"] = "secondary" if is_secondary_glazing else "multiple"
output["glazing_type_ending"] = (
"secondary" if is_secondary_glazing else "multiple"
)
elif output["glazing_type_ending"] in ["triple", "high performance"]:
output["glazing_type_ending"] = "multiple"
else:
@ -344,9 +481,13 @@ class Property:
if is_secondary_glazing:
output["glazed_type_ending"] = "secondary glazing"
else:
output["glazed_type_ending"] = "double glazing installed during or after 2002"
output["glazed_type_ending"] = (
"double glazing installed during or after 2002"
)
if recommendation["type"] in ["heating", "hot_water_tank_insulation"]:
if recommendation["type"] in [
"heating", "hot_water_tank_insulation", "heating_control", "secondary_heating"
]:
# We update the data, as defined in the recommendaton
simulation_config = recommendation["simulation_config"]
@ -366,15 +507,20 @@ class Property:
"internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
"loft_insulation", "room_roof_insulation", "flat_roof_insulation",
"solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation",
"windows_glazing", "solar_pv", "heating", "hot_water_tank_insulation"
"windows_glazing", "solar_pv", "heating", "hot_water_tank_insulation",
"heating_control", "secondary_heating"
]:
raise NotImplementedError("Implement me, given type %s" % recommendation["type"])
raise NotImplementedError(
"Implement me, given type %s" % recommendation["type"]
)
output['id'] = "+".join([str(property_id), str(primary_recommendation_id)])
output["id"] = "+".join([str(property_id), str(primary_recommendation_id)])
return output
def get_components(self, cleaned, photo_supply_lookup, floor_area_decile_thresholds):
def get_components(
self, cleaned, photo_supply_lookup, floor_area_decile_thresholds
):
"""
Given the cleaning that has been performed, we'll use this to identify the property
components, from roof to walls to windows, heating and hot water
@ -399,10 +545,12 @@ class Property:
if self.data[description] in self.DATA_ANOMALY_MATCHES:
template = cleaned[description][0]
fill_dict = dict(zip(template.keys(), [None] * len(template)))
fill_dict.update({
"original_description": self.data[description],
"clean_description": self.data[description],
})
fill_dict.update(
{
"original_description": self.data[description],
"clean_description": self.data[description],
}
)
setattr(
self,
self.ATTRIBUTE_MAP[description],
@ -411,11 +559,15 @@ class Property:
continue
attributes = [
x for x in cleaned[description] if x["original_description"] == self.data[description]
x
for x in cleaned[description]
if x["original_description"] == self.data[description]
]
if len(attributes) > 1:
raise ValueError("Either No attributes or multiple found for %s" % description)
raise ValueError(
"Either No attributes or multiple found for %s" % description
)
if len(attributes) == 0:
# We attempt to perform the clean on the fly
@ -423,8 +575,12 @@ class Property:
cleaner_cls = cleaner_cls(self.data[description])
processed = {
"original_description": self.data[description],
"clean_description": cleaner_cls.description.replace("(assumed)", "").rstrip().capitalize(),
**cleaner_cls.process()
"clean_description": cleaner_cls.description.replace(
"(assumed)", ""
)
.rstrip()
.capitalize(),
**cleaner_cls.process(),
}
attributes = [processed]
@ -436,7 +592,8 @@ class Property:
self.set_floor_level()
self.set_windows_count()
self.set_solar_panel_area(
photo_supply_lookup=photo_supply_lookup, floor_area_decile_thresholds=floor_area_decile_thresholds
photo_supply_lookup=photo_supply_lookup,
floor_area_decile_thresholds=floor_area_decile_thresholds,
)
self.set_energy_source()
@ -453,7 +610,11 @@ class Property:
self.is_heritage = spatial["is_heritage_building"].values[0]
# We do an equals True, in the case of one of these variables being True
if (self.in_conservation_area == True) | (self.is_listed == True) | (self.is_heritage == True):
if (
(self.in_conservation_area == True)
| (self.is_listed == True)
| (self.is_heritage == True)
):
self.restricted_measures = True
spatial_dict = spatial.to_dict("records")[0]
@ -495,7 +656,7 @@ class Property:
"tenure": self.data["tenure"],
"current_epc_rating": self.data["current-energy-rating"],
"current_sap_points": self.data["current-energy-efficiency"],
"current_valuation": current_valuation
"current_valuation": current_valuation,
}
property_data = self._clean_upload_data(property_data)
@ -507,7 +668,11 @@ class Property:
"""
Utility function for usage in the lambda, for preparing the _rating fields
"""
return rating_lookup[field].value if (field not in cls.DATA_ANOMALY_MATCHES) and (field is not None) else None
return (
rating_lookup[field].value
if (field not in cls.DATA_ANOMALY_MATCHES) and (field is not None)
else None
)
def get_property_details_epc(self, portfolio_id: int, rating_lookup):
@ -517,21 +682,37 @@ class Property:
"full_address": self.data["address"],
"total_floor_area": float(self.data["total-floor-area"]),
"walls": self.walls["clean_description"],
"walls_rating": self._prepare_rating_field(self.data["walls-energy-eff"], rating_lookup),
"walls_rating": self._prepare_rating_field(
self.data["walls-energy-eff"], rating_lookup
),
"roof": self.roof["clean_description"],
"roof_rating": self._prepare_rating_field(self.data["roof-energy-eff"], rating_lookup),
"roof_rating": self._prepare_rating_field(
self.data["roof-energy-eff"], rating_lookup
),
"floor": self.floor["clean_description"],
"floor_rating": self._prepare_rating_field(self.data["floor-energy-eff"], rating_lookup),
"floor_rating": self._prepare_rating_field(
self.data["floor-energy-eff"], rating_lookup
),
"windows": self.windows["clean_description"],
"windows_rating": self._prepare_rating_field(self.data["windows-energy-eff"], rating_lookup),
"windows_rating": self._prepare_rating_field(
self.data["windows-energy-eff"], rating_lookup
),
"heating": self.main_heating["clean_description"],
"heating_rating": self._prepare_rating_field(self.data["mainheat-energy-eff"], rating_lookup),
"heating_rating": self._prepare_rating_field(
self.data["mainheat-energy-eff"], rating_lookup
),
"heating_controls": self.main_heating_controls["clean_description"],
"heating_controls_rating": self._prepare_rating_field(self.data["mainheatc-energy-eff"], rating_lookup),
"heating_controls_rating": self._prepare_rating_field(
self.data["mainheatc-energy-eff"], rating_lookup
),
"hot_water": self.hotwater["clean_description"],
"hot_water_rating": self._prepare_rating_field(self.data["hot-water-energy-eff"], rating_lookup),
"hot_water_rating": self._prepare_rating_field(
self.data["hot-water-energy-eff"], rating_lookup
),
"lighting": self.lighting["clean_description"],
"lighting_rating": self._prepare_rating_field(self.data["lighting-energy-eff"], rating_lookup),
"lighting_rating": self._prepare_rating_field(
self.data["lighting-energy-eff"], rating_lookup
),
"mainfuel": self.main_fuel["clean_description"],
"ventilation": self.ventilation["ventilation"],
"solar_pv": self.solar_pv["solar_pv"],
@ -540,7 +721,9 @@ class Property:
"floor_height": self.floor_height,
"heat_loss_corridor": self.heat_loss_corridor["heat_loss_corridor_boolean"],
"unheated_corridor_length": self.heat_loss_corridor["length"],
"number_of_open_fireplaces": self.number_of_open_fireplaces["number_of_open_fireplaces"],
"number_of_open_fireplaces": self.number_of_open_fireplaces[
"number_of_open_fireplaces"
],
"number_of_extensions": self.number_of_extensions["number_of_extensions"],
"number_of_storeys": self.number_of_storeys["number_of_storeys"],
"mains_gas": self.mains_gas,
@ -548,20 +731,21 @@ class Property:
"primary_energy_consumption": self.energy["primary_energy_consumption"],
"co2_emissions": self.energy["co2_emissions"],
"adjusted_energy_consumption": self.current_adjusted_energy,
"estimated": self.data.get("estimated", False)
"estimated": self.data.get("estimated", False),
}
return property_details_epc
def get_spatial_data(self, uprn_filenames):
"""
Given a property's UPRN, this method will pull the associated spatial data from s3
:return:
"""
if self.uprn is None:
logger.warning("We do not have a UPRN for this property - this needs to be implemented")
logger.warning(
"We do not have a UPRN for this property - this needs to be implemented"
)
self.in_conservation_area = False
self.is_listed = False
self.is_heritage = False
@ -569,12 +753,15 @@ class Property:
return
# We get the file name for the uprn
filtered_df = uprn_filenames[(uprn_filenames['lower'] <= self.uprn) & (uprn_filenames['upper'] >= self.uprn)]
filtered_df = uprn_filenames[
(uprn_filenames["lower"] <= self.uprn)
& (uprn_filenames["upper"] >= self.uprn)
]
if filtered_df.empty:
logger.warning("Could not find file containing UPRNS")
return None
filename = filtered_df.iloc[0]['filenames']
filename = filtered_df.iloc[0]["filenames"]
spatial_data = read_dataframe_from_s3_parquet(
bucket_name=DATA_BUCKET, file_key=f"spatial/{filename}"
@ -592,15 +779,27 @@ class Property:
:return: filtered property dimensions dataframe
"""
result = property_dimensions[(property_dimensions["PROPERTY_TYPE"] == self.data["property-type"])]
result = property_dimensions[
(property_dimensions["PROPERTY_TYPE"] == self.data["property-type"])
]
if self.construction_age_band is not None and self.construction_age_band not in self.DATA_ANOMALY_MATCHES:
result = result[(result["CONSTRUCTION_AGE_BAND"] == self.construction_age_band)]
if (
self.construction_age_band is not None
and self.construction_age_band not in self.DATA_ANOMALY_MATCHES
):
result = result[
(result["CONSTRUCTION_AGE_BAND"] == self.construction_age_band)
]
if self.data["built-form"] not in self.DATA_ANOMALY_MATCHES and self.data["built-form"] in result["BUILT_FORM"]:
if (
self.data["built-form"] not in self.DATA_ANOMALY_MATCHES
and self.data["built-form"] in result["BUILT_FORM"]
):
result = result[(result["BUILT_FORM"] == self.data["built-form"])]
return result[["NUMBER_HABITABLE_ROOMS", "TOTAL_FLOOR_AREA", "FLOOR_HEIGHT"]].mean()
return result[
["NUMBER_HABITABLE_ROOMS", "TOTAL_FLOOR_AREA", "FLOOR_HEIGHT"]
].mean()
def set_basic_property_dimensions(self):
"""
@ -619,7 +818,8 @@ class Property:
# They could also be added as attributes to the EPC Record
self.perimeter = estimate_perimeter(
self.floor_area / self.number_of_floors, self.number_of_rooms / self.number_of_floors
self.floor_area / self.number_of_floors,
self.number_of_rooms / self.number_of_floors,
)
self.insulation_wall_area = estimate_external_wall_area(
@ -637,8 +837,9 @@ class Property:
def set_floor_level(self):
self.floor_level = (
FLOOR_LEVEL_MAP[self.data["floor-level"]] if
self.data["floor-level"] not in self.DATA_ANOMALY_MATCHES and self.data['floor-level'] is not None
FLOOR_LEVEL_MAP[self.data["floor-level"]]
if self.data["floor-level"] not in self.DATA_ANOMALY_MATCHES
and self.data["floor-level"] is not None
else None
)
@ -700,22 +901,30 @@ class Property:
raise NotImplementedError("Implement this floor type")
@staticmethod
def _extract_component(component_data, component_rename_cols, component_drop_cols, rename_prefix=None):
def _extract_component(
component_data, component_rename_cols, component_drop_cols, rename_prefix=None
):
for k in component_rename_cols:
component_data[f"{rename_prefix}_{k}"] = component_data.get(k)
component_data = {
k: v for k, v in component_data.items() if k not in component_drop_cols + component_rename_cols
k: v
for k, v in component_data.items()
if k not in component_drop_cols + component_rename_cols
}
return component_data
def set_adjusted_energy(self, current_adjusted_energy, expected_adjusted_energy):
def set_adjusted_energy(
self, current_adjusted_energy, expected_adjusted_energy, current_energy_bill, expected_energy_bill
):
"""
Stores these values for usage later
"""
self.current_adjusted_energy = current_adjusted_energy
self.expected_adjusted_energy = expected_adjusted_energy
self.current_energy_bill = current_energy_bill
self.expected_energy_bill = expected_energy_bill
def set_windows_count(self):
"""
@ -753,7 +962,7 @@ class Property:
is_flat=self.roof["is_flat"],
is_pitched=self.roof["is_pitched"],
is_roof_room=self.roof["is_roof_room"],
floor_area=self.floor_area
floor_area=self.floor_area,
)
percentage_of_roof = photo_supply_matched["photo_supply_median"].mean()
@ -769,8 +978,9 @@ class Property:
"""
return (
self.insulation_floor_area * percentage_of_roof if self.roof["is_flat"] else
self.pitched_roof_area * percentage_of_roof
self.insulation_floor_area * percentage_of_roof
if self.roof["is_flat"]
else self.pitched_roof_area * percentage_of_roof
)
def set_energy_source(self):
@ -783,7 +993,12 @@ class Property:
# If the tariff explicitly indicates electricity use without a dual indication and mains_gas_flag is not True
# We check for the common electricity tariffs
if not self.data["mains-gas-flag"] and self.data["energy-tariff"] in [
"Single", "off-peak 7 hour", "off-peak 10 hour", "off-peak 18 hour", "standard tariff", "24 hour"
"Single",
"off-peak 7 hour",
"off-peak 10 hour",
"off-peak 18 hour",
"standard tariff",
"24 hour",
]:
energy_source = "electricity"

View file

@ -30,7 +30,7 @@ vartypes = {
'environment-impact-potential': "Int64",
'glazed-type': 'str',
'heating-cost-current': 'float',
'address3': 'str',
# 'address3': 'str',
'mainheatcont-description': 'str',
'sheating-energy-eff': 'str',
'property-type': 'str',
@ -40,7 +40,7 @@ vartypes = {
'mechanical-ventilation': 'str',
'hot-water-cost-current': 'str',
'county': 'str',
'postcode': 'str',
# 'postcode': 'str',
'solar-water-heating-flag': 'str',
'constituency': 'str',
'co2-emissions-potential': 'float',
@ -55,7 +55,7 @@ vartypes = {
# 'inspection-date': str,
'mains-gas-flag': 'str',
'co2-emiss-curr-per-floor-area': 'float',
'address1': 'str',
# 'address1': 'str',
'heat-loss-corridor': 'str',
'flat-storey-count': "Int64",
'constituency-label': 'str',
@ -67,7 +67,7 @@ vartypes = {
'roof-description': 'str',
'floor-energy-eff': 'str',
'number-habitable-rooms': 'float',
'address2': 'str',
# 'address2': 'str',
'hot-water-env-eff': 'str',
'posttown': 'str',
'mainheatc-energy-eff': 'str',
@ -98,7 +98,7 @@ vartypes = {
# 'lodgement-date',
'extension-count': "Int64",
'mainheatc-env-eff': 'str',
'lmk-key': 'str',
# 'lmk-key': 'str',
'wind-turbine-count': "Int64",
'tenure': 'str',
'floor-level': 'str',
@ -147,6 +147,7 @@ class SearchEpc:
uprn: [int, None] = None,
size=None,
property_type=None,
fast=False
):
"""
Address lines 1 and postcode are mandatory fields. The other address lines are optional
@ -187,37 +188,37 @@ class SearchEpc:
self.size = size if size is not None else 25
self.property_type = property_type
self.fast = fast
@classmethod
def get_house_number(cls, address: str) -> str | None:
"""
This method will use the usaddress library to parse an address and extract the house number
:return:
This method uses the usaddress library to parse an address and extract the primary house or flat number.
"""
try:
parsed = usaddress.parse(address)
parsed_house_number = [x for x in parsed if (x[1] == "AddressNumber")]
parsed_house_number = parsed_house_number[0][0] if parsed_house_number else None
if parsed_house_number is None:
# Because usaddress isn't optimal for parsing addresses with some prefixes such as 'Flat',
# we also add a custom approach
# Pattern to look for 'Flat' or 'Apartment' followed by a number, or just a number at the beginning
# Custom regex to catch a broad range of cases
pattern = r'(?i)(?:flat|apartment)\s*(\d+)|^\s*(\d+)'
match = re.search(pattern, address)
if match:
# Return the first non-None group found
return next(g for g in match.groups() if g is not None)
else:
return None
# Remove training commas
parsed_house_number = parsed_house_number.replace(",", "")
parsed = usaddress.parse(address)
# First, try to get the 'OccupancyIdentifier' if 'OccupancyType' is detected
for part, type_ in parsed:
if type_ == 'OccupancyIdentifier':
return part # This assumes the first 'OccupancyIdentifier' after 'OccupancyType' is the primary
# number
return parsed_house_number
# Fallback to 'AddressNumber' if no 'OccupancyIdentifier' is found
address_number = next((part for part, type_ in parsed if type_ == 'AddressNumber'), None)
if address_number:
return address_number.replace(",", "") # Remove any trailing commas
except Exception as e:
print(f"Error parsing address: {e}")
return None
@staticmethod
def extract_numeric_housenumber_part(house_number: str | None) -> int | None:
@ -365,9 +366,6 @@ class SearchEpc:
# Finally, we identify the newest epc and the rest, and then return
newest_epc, older_epcs = self.filter_newest_epc(list_of_epcs=rows)
# Retrieve postcode and address
address_epc, postcode_epc = self.format_address(newest_epc=newest_epc)
# Ge the uprn from the newest record for this home
uprns = {r["uprn"] for r in rows if r["uprn"]}
# We can sometimes have no uprn for a property
@ -384,6 +382,12 @@ class SearchEpc:
uprn = uprns.pop() if uprns else None
if self.fast:
return newest_epc, [], {}, "", "", None
# Retrieve postcode and address
address_epc, postcode_epc = self.format_address(newest_epc=newest_epc)
return newest_epc, older_epcs, full_sap_epc, address_epc, postcode_epc, uprn
@staticmethod
@ -575,6 +579,11 @@ class SearchEpc:
property_type=property_type
)
# If we have missing lodgment date, we fill it with inspection-date
epc_data["lodgement-datetime"] = epc_data["lodgement-datetime"].fillna(epc_data["inspection-date"])
# If we still have missing dates, we set it to the mean of the non NA dates
epc_data["lodgement-datetime"] = epc_data["lodgement-datetime"].fillna(epc_data["lodgement-datetime"].mean())
# For each attribute, we need to determine the datatype and use an appropriate method
# to estimate.
estimated_epc = {}
@ -609,7 +618,11 @@ class SearchEpc:
# Insert an estimated lodgement datetime, with a weighted average
estimated_epc["lodgement-datetime"] = self.calculate_weighted_lodgement_datetime(epc_data=epc_data)
# Extract logement date
estimated_epc["lodgement-date"] = estimated_epc["lodgement-datetime"].strftime("%Y-%m-%d")
# It is possible that there is still no lodgement date, so we need to handle this
if pd.isnull(estimated_epc["lodgement-datetime"]):
estimated_epc["lodgement-date"] = None
else:
estimated_epc["lodgement-date"] = estimated_epc["lodgement-datetime"].strftime("%Y-%m-%d")
estimated_epc["postcode"] = self.postcode
estimated_epc["uprn"] = self.uprn
@ -695,8 +708,13 @@ class SearchEpc:
self.full_sap_epc = {}
# Finally, set a standardised address 1 and postcode
self.address_clean = self.ordnance_survey_client.address_os
self.postcode_clean = self.ordnance_survey_client.postcode_os
self.address_clean = (
self.ordnance_survey_client.address_os if self.ordnance_survey_client.address_os else self.address1
)
self.postcode_clean = (
self.ordnance_survey_client.postcode_os if self.ordnance_survey_client.postcode_os else
self.postcode
)
return
os_response = self.ordnance_survey_client.get_places_api()

View file

@ -0,0 +1,50 @@
from sqlalchemy.orm import Session
from backend.app.db.models.non_intrusive_surveys import NonIntrusiveSurvey, NonIntrusiveSurveyNotes
def upload_non_intrusive_survey_notes(session: Session, non_invasive_notes, batch_size=500):
"""
Uploads a list of non-intrusive survey notes into the database in batches. Each dictionary in the list represents
one survey and its associated notes.
:param session: SQLAlchemy Session object through which all database transactions are handled.
:param non_invasive_notes: List of dictionaries where each dictionary contains survey details including 'uprn',
'survey_date', 'surveyor', and other notes as key-value pairs.
:param batch_size: The size of each batch to be processed (default is 500).
:return: None
"""
# Helper function to process each batch
def process_batch(batch):
surveys = []
notes = []
for note in batch:
survey = NonIntrusiveSurvey(
uprn=note['uprn'],
survey_date=note['survey_date'],
surveyor=note['surveyor']
)
surveys.append(survey)
session.add_all(surveys)
session.flush() # Get IDs for surveys
for note, survey in zip(batch, surveys):
for key, value in note.items():
if key not in ['uprn', 'survey_date', 'surveyor']:
notes.append(NonIntrusiveSurveyNotes(
survey_id=survey.id,
title=key,
note=value
))
session.bulk_save_objects(notes)
session.commit()
# Split the data into batches and process each batch
total = len(non_invasive_notes)
for start in range(0, total, batch_size):
end = min(start + batch_size, total)
batch = non_invasive_notes[start:end]
process_batch(batch)

View file

@ -4,14 +4,14 @@ from backend.app.db.models.portfolio import Portfolio
def aggregate_portfolio_recommendations(
session, portfolio_id: int, total_valuation_increase: float, labour_days: float
session, portfolio_id: int, total_valuation_increase: float, labour_days: float, aggregated_data: dict
):
# Aggregate multiple fields
aggregates = (
session.query(
func.sum(Recommendation.estimated_cost).label("cost"),
func.sum(Recommendation.total_work_hours).label("total_work_hours"),
func.sum(Recommendation.heat_demand).label("energy_savings"),
func.sum(Recommendation.adjusted_heat_demand).label("energy_savings"),
func.sum(Recommendation.co2_equivalent_savings).label("co2_equivalent_savings"),
func.sum(Recommendation.energy_cost_savings).label("energy_cost_savings"),
)
@ -27,6 +27,7 @@ def aggregate_portfolio_recommendations(
"energy_savings": aggregates.energy_savings or 0,
"co2_equivalent_savings": aggregates.co2_equivalent_savings or 0,
"energy_cost_savings": aggregates.energy_cost_savings or 0,
**aggregated_data
}
# Get the portfolio and update the fields

View file

@ -85,7 +85,8 @@ def upload_recommendations(session: Session, recommendations_to_upload, property
"co2_equivalent_savings": rec["co2_equivalent_savings"],
"total_work_hours": rec["labour_hours"],
"energy_cost_savings": rec["energy_cost_savings"],
"labour_days": rec["labour_days"]
"labour_days": rec["labour_days"],
"already_installed": rec["already_installed"],
}
for rec in recommendations_to_upload
]

View file

@ -0,0 +1,22 @@
from sqlalchemy import Column, BigInteger, String, TIMESTAMP, ForeignKey, Integer
from sqlalchemy.orm import declarative_base
Base = declarative_base()
class NonIntrusiveSurvey(Base):
__tablename__ = 'non_intrusive_survey'
id = Column(BigInteger, primary_key=True, autoincrement=True)
uprn = Column(Integer, nullable=False)
survey_date = Column(TIMESTAMP, nullable=False)
surveyor = Column(String, nullable=False)
class NonIntrusiveSurveyNotes(Base):
__tablename__ = 'non_intrusive_survey_notes'
id = Column(BigInteger, primary_key=True, autoincrement=True)
survey_id = Column(BigInteger, ForeignKey('non_intrusive_survey.id'), nullable=False)
title = Column(String, nullable=False)
note = Column(String, nullable=False)

View file

@ -45,6 +45,21 @@ class Portfolio(Base):
labour_days = Column(Float)
created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
# Aggregations for summary
epc_breakdown_pre_retrofit = Column(Text)
epc_breakdown_post_retrofit = Column(Text)
n_units_to_retrofit = Column(Integer)
co2_per_unit_pre_retrofit = Column(Text)
co2_per_unit_post_retrofit = Column(Text)
energy_bill_per_unit_pre_retrofit = Column(Text)
energy_bill_per_unit_post_retrofit = Column(Text)
energy_consumption_per_unit_pre_retrofit = Column(Text)
energy_consumption_per_unit_post_retrofit = Column(Text)
valuation_improvement_per_unit = Column(Text)
cost_per_unit = Column(Text)
cost_per_co2_saved = Column(Text)
cost_per_sap_point = Column(Text)
valuation_return_on_investment = Column(Text)
class PropertyCreationStatus(enum.Enum):

View file

@ -30,6 +30,7 @@ class Recommendation(Base):
rental_yield_increase = Column(Float)
total_work_hours = Column(Float)
labour_days = Column(Float)
already_installed = Column(Boolean, nullable=False, default=False)
class RecommendationMaterials(Base):

View file

@ -1,3 +1,4 @@
import json
from datetime import datetime
from tqdm import tqdm
@ -24,7 +25,7 @@ from backend.app.db.models.portfolio import rating_lookup
from backend.app.dependencies import validate_token
from backend.app.plan.schemas import PlanTriggerRequest
from backend.app.plan.utils import get_cleaned
from backend.app.utils import epc_to_sap_lower_bound, read_csv_from_s3, sap_to_epc
from backend.app.utils import epc_to_sap_lower_bound, sap_to_epc
from backend.ml_models.api import ModelApi
from backend.Property import Property
@ -35,32 +36,186 @@ from recommendations.optimiser.GainOptimiser import GainOptimiser
from recommendations.optimiser.optimiser_functions import prepare_input_measures
from recommendations.Recommendations import Recommendations
from utils.logger import setup_logger
from utils.s3 import read_dataframe_from_s3_parquet
from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3
from backend.ml_models.Valuation import PropertyValuation
logger = setup_logger()
BATCH_SIZE = 5
SCORING_BATCH_SIZE = 400
def patch_epc(config, epc_records):
def patch_epc(patch, epc_records):
"""
This utility function is useful to patch the epc data if we have data from the customer
:return:
"""
number_habitable_rooms = config.get("number-habitable-rooms", None)
number_heated_rooms = config.get("number-heated-rooms", None)
for patch_variable, patch_value in patch.items():
if number_habitable_rooms is not None:
epc_records["original_epc"]["number-habitable-rooms"] = int(number_habitable_rooms)
if patch_variable in ["address", "postcode"]:
continue
if number_heated_rooms is not None:
epc_records["original_epc"]["number-heated-rooms"] = int(number_heated_rooms)
if patch_value == "":
continue
if patch_variable in epc_records["original_epc"]:
epc_records["original_epc"][patch_variable] = patch_value
return epc_records
def extract_portfolio_aggregation_data(
input_properties, total_valuation_increase, recommendations, new_epc_bands, property_value_increase_ranges
):
# We aggregate a number of metrics for the portfolio:
# 1) A breakdown of the number of properties in each EPC band
# a) before retrofit
# b) after retrofit
# 2) Number of units
# 3) Co2/unit
# a) before retrofit
# b) after retrofit
# 4) Energy bill/unit
# a) before retrofit
# b) after retrofit
# 5) Average valuation improvement/unit
# 6) Total cost
# 7) Cost per unit
# 8) £ per CO2 saved
# 9) £ per SAP point
# We need to construct the underlyind data for this
# Helper function to reformat the EPC data
def reformat_epc_data(epc_counts):
# Define all possible EPC bands in the required order
epc_bands = ["G", "F", "E", "D", "C", "B", "A"]
# Create the formatted data list by checking each band in the order
formatted_data = []
for band in epc_bands:
# Get the count from the dictionary, defaulting to 0 if not present
count = epc_counts.get(band, 0)
# Append the formatted dictionary to the list
formatted_data.append({"name": band, band: count})
return formatted_data
n_units = len(input_properties)
agg_data = []
for p in input_properties:
# Get the recommendations for the property - we include all properties, even ones without recommendations
property_recommendations = recommendations.get(p.id, [])
# Get just the default recommendations
default_recommendations = [r for r in property_recommendations if r["default"]]
has_recommendations = len(default_recommendations) > 0
# We can now calculate multiple outputs based on default recommendations
carbon_savings = sum([r["co2_equivalent_savings"] for r in default_recommendations])
pre_retrofit_co2 = p.data["co2-emissions-current"]
post_retrofit_co2 = pre_retrofit_co2 - carbon_savings
pre_retrofit_energy_bill = p.current_energy_bill
post_retrofit_energy_bill = p.current_energy_bill - sum(
[r["energy_cost_savings"] for r in default_recommendations]
)
pre_retrofit_energy_consumption = p.current_adjusted_energy
post_retrofit_energy_consumption = p.current_adjusted_energy - sum(
[r["adjusted_heat_demand"] for r in default_recommendations]
)
# Add up energy savings
cost = sum([r["total"] for r in default_recommendations])
sap_point_improvement = sum([r["sap_points"] for r in default_recommendations])
lower_bound_valuation_uplift = (
property_value_increase_ranges[p.id]["lower_bound_increased_value"] -
property_value_increase_ranges[p.id]["current_value"]
)
upper_bound_valuation_uplift = (
property_value_increase_ranges[p.id]["upper_bound_increased_value"] -
property_value_increase_ranges[p.id]["current_value"]
)
agg_data.append({
"pre_retrofit_epc": p.data["current-energy-rating"],
"post_retrofit_epc": new_epc_bands[p.id],
"pre_retrofit_co2": pre_retrofit_co2,
"post_retrofit_co2": post_retrofit_co2,
"pre_retrofit_energy_bill": pre_retrofit_energy_bill,
"post_retrofit_energy_bill": post_retrofit_energy_bill,
"pre_retrofit_energy_consumption": pre_retrofit_energy_consumption,
"post_retrofit_energy_consumption": post_retrofit_energy_consumption,
"cost": cost,
"sap_point_improvement": sap_point_improvement,
"lower_bound_valuation_uplift": lower_bound_valuation_uplift,
"upper_bound_valuation_uplift": upper_bound_valuation_uplift,
"has_recommendations": has_recommendations
})
agg_data = pd.DataFrame(agg_data)
n_units_to_retrofit = agg_data["has_recommendations"].sum()
valuation_improvement_lower_bound_per_unit = (
agg_data["lower_bound_valuation_uplift"].mean()
)
valuation_improvement_upper_bound_per_unit = (
agg_data["upper_bound_valuation_uplift"].mean()
)
total_carbon_saved = agg_data["pre_retrofit_co2"].sum() - agg_data["post_retrofit_co2"].sum()
total_sap_points = agg_data["sap_point_improvement"].sum()
def format_money(amount):
return f"£{amount:,.0f}"
valuation_improvment_per_unit = str(
format_money(
total_valuation_increase / n_units) + (f" ({format_money(valuation_improvement_lower_bound_per_unit)} - "
f"{format_money(valuation_improvement_upper_bound_per_unit)})")
)
valuation_return_on_investment = str(
str(round(total_valuation_increase / agg_data["cost"].sum(), 2)) +
f" ("
f"{agg_data['lower_bound_valuation_uplift'].sum() / agg_data['cost'].sum():,.2f} - "
f"{agg_data['upper_bound_valuation_uplift'].sum() / agg_data['cost'].sum():,.2f})"
)
aggregation_data = {
"epc_breakdown_pre_retrofit": json.dumps(
reformat_epc_data(agg_data["pre_retrofit_epc"].value_counts().to_dict())
),
"epc_breakdown_post_retrofit": json.dumps(
reformat_epc_data(agg_data["post_retrofit_epc"].value_counts().to_dict())
),
"number_of_properties": int(n_units),
"n_units_to_retrofit": int(n_units_to_retrofit),
"co2_per_unit_pre_retrofit": str(round(agg_data["pre_retrofit_co2"].mean(), 2)) + "t",
"co2_per_unit_post_retrofit": str(round(agg_data["post_retrofit_co2"].mean(), 2)) + "t",
"energy_bill_per_unit_pre_retrofit": format_money(agg_data["pre_retrofit_energy_bill"].mean()),
"energy_bill_per_unit_post_retrofit": format_money(agg_data["post_retrofit_energy_bill"].mean()),
"energy_consumption_per_unit_pre_retrofit": str(
round(agg_data["pre_retrofit_energy_consumption"].mean())) + "kWh",
"energy_consumption_per_unit_post_retrofit": str(
round(agg_data["post_retrofit_energy_consumption"].mean())) + "kWh",
"valuation_improvement_per_unit": valuation_improvment_per_unit,
"cost_per_unit": format_money(agg_data["cost"].mean()),
"cost_per_co2_saved": format_money(agg_data["cost"].sum() / total_carbon_saved),
"cost_per_sap_point": format_money(agg_data["cost"].sum() / total_sap_points),
"valuation_return_on_investment": valuation_return_on_investment,
# TODO: Could we add 10yr carbon credits value?
}
return aggregation_data
router = APIRouter(
prefix="/plan",
tags=["plan"],
@ -78,12 +233,29 @@ async def trigger_plan(body: PlanTriggerRequest):
# TODO: We should store the trigger file path in the database with the plan so we can track the file that
# triggered the plan
# TODO: Create the ability to congigure/switch off certain measures
# TODO: if the measure is already installed, it should actually be the very first phase
try:
session.begin()
logger.info("Getting the inputs")
plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path)
# If we have patches or overrides, we should read them in here
patches = []
if body.patches_file_path:
patches = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.patches_file_path)
already_installed = []
if body.already_installed_file_path:
already_installed = read_csv_from_s3(
bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.already_installed_file_path
)
non_invasive_recommendations = []
if body.non_invasive_recommendations_file_path:
non_invasive_recommendations = read_csv_from_s3(
bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.non_invasive_recommendations_file_path
)
cleaning_data = read_dataframe_from_s3_parquet(
bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
)
@ -91,19 +263,25 @@ async def trigger_plan(body: PlanTriggerRequest):
input_properties = []
for config in tqdm(plan_input):
# We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
uprn = config.get("uprn", None)
if uprn:
uprn = int(float(uprn))
epc_searcher = SearchEpc(
address1=config["address"],
postcode=config["postcode"],
uprn=uprn,
auth_token=get_settings().EPC_AUTH_TOKEN,
os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY
os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY,
)
epc_searcher.find_property()
epc_searcher.ordnance_survey_client.built_form = config.get("built_form", None)
epc_searcher.ordnance_survey_client.property_type = config.get("property_type", None)
# For the moment, our OS API access is unavailable, so we skip and interpolate
epc_searcher.find_property(skip_os=True)
# Create a record in db
property_id, is_new = create_property(
session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
)
# if a new record was not created, we don't produduce recommendations
if not is_new:
continue
@ -120,7 +298,11 @@ async def trigger_plan(body: PlanTriggerRequest):
'full_sap_epc': epc_searcher.full_sap_epc.copy(),
'old_data': epc_searcher.older_epcs.copy(),
}
epc_records = patch_epc(config, epc_records)
patch = next((
x for x in patches if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
), {})
epc_records = patch_epc(patch, epc_records)
prepared_epc = EPCRecord(
epc_records=epc_records,
@ -128,12 +310,25 @@ async def trigger_plan(body: PlanTriggerRequest):
cleaning_data=cleaning_data
)
property_already_installed = next((
x for x in already_installed if
(x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
), {})
property_non_invasive_recommendations = next((
x for x in non_invasive_recommendations if
(x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
), {})
input_properties.append(
Property(
id=property_id,
address=epc_searcher.address_clean,
postcode=epc_searcher.postcode_clean,
epc_record=prepared_epc,
already_installed=property_already_installed,
non_invasive_recommendations=property_non_invasive_recommendations,
**Property.extract_kwargs(config)
)
)
@ -160,15 +355,13 @@ async def trigger_plan(body: PlanTriggerRequest):
recommendations = {}
recommendations_scoring_data = []
representative_recommendations = {}
for p in input_properties:
for p in tqdm(input_properties):
# Property recommendations
p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
recommender = Recommendations(property_instance=p, materials=materials)
# TODO: portfolio id as an input is temp
print("DELETE PORTFOLIO ID AS AN INPUT!!")
property_recommendations, property_representative_recommendations = recommender.recommend(body.portfolio_id)
recommender = Recommendations(property_instance=p, materials=materials, exclusions=body.exclusions)
property_recommendations, property_representative_recommendations = recommender.recommend()
if not property_recommendations:
continue
@ -187,6 +380,7 @@ async def trigger_plan(body: PlanTriggerRequest):
logger.info("Preparing data for scoring in sap change api")
recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
recommendations_scoring_data = recommendations_scoring_data.drop(
columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
"carbon_ending"]
@ -194,15 +388,26 @@ async def trigger_plan(body: PlanTriggerRequest):
model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
all_predictions = model_api.predict_all(
df=recommendations_scoring_data,
bucket=get_settings().DATA_BUCKET,
prediction_buckets={
"sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET,
"heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET,
"carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET
}
)
all_predictions = {
"sap_change_predictions": pd.DataFrame(),
"heat_demand_predictions": pd.DataFrame(),
"carbon_change_predictions": pd.DataFrame()
}
to_loop_over = range(0, recommendations_scoring_data.shape[0], SCORING_BATCH_SIZE)
for chunk in tqdm(to_loop_over, total=len(to_loop_over)):
predictions_dict = model_api.predict_all(
df=recommendations_scoring_data.iloc[chunk:chunk + SCORING_BATCH_SIZE],
bucket=get_settings().DATA_BUCKET,
prediction_buckets={
"sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET,
"heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET,
"carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET
}
)
# Append the predictions to the predictions dictionary
for key, scored in predictions_dict.items():
all_predictions[key] = pd.concat([all_predictions[key], scored])
# Insert the predictions into the recommendations and run the optimiser
# TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a
@ -214,7 +419,13 @@ async def trigger_plan(body: PlanTriggerRequest):
property_instance = [p for p in input_properties if p.id == property_id][0]
recommendations_with_impact, current_adjusted_energy, expected_adjusted_energy = (
(
recommendations_with_impact,
current_adjusted_energy,
expected_adjusted_energy,
current_energy_bill,
expected_energy_bill
) = (
Recommendations.calculate_recommendation_impact(
property_instance=property_instance,
all_predictions=all_predictions,
@ -225,10 +436,12 @@ async def trigger_plan(body: PlanTriggerRequest):
# Store the resulting adjusted energy in the property instance
property_instance.set_adjusted_energy(
current_adjusted_energy=current_adjusted_energy,
expected_adjusted_energy=expected_adjusted_energy
expected_adjusted_energy=expected_adjusted_energy,
current_energy_bill=current_energy_bill,
expected_energy_bill=expected_energy_bill
)
input_measures = prepare_input_measures(recommendations_with_impact, body.goal, body.housing_type)
input_measures = prepare_input_measures(recommendations_with_impact, body.goal)
current_sap_points = int(property_instance.data["current-energy-efficiency"])
target_sap_points = epc_to_sap_lower_bound(body.goal_value)
@ -256,16 +469,14 @@ async def trigger_plan(body: PlanTriggerRequest):
if any(x in [r["type"] for r in solution] for x in [
"internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation"
]):
ventilation_rec = [
r for r in recommendations_with_impact if r[0]["type"] == "mechanical_ventilation"
][0]
selected_recommendations = set(
list(selected_recommendations) + [ventilation_rec[0]["recommendation_id"]]
ventilation_rec = next(
(r[0] for r in recommendations_with_impact if r[0]["type"] == "mechanical_ventilation"),
None
)
# We check if the selected recommendation is wall ventilation and if so, we make sure
# mechanical ventilation is selected
# If a matching recommendation was found, add its ID to the selected recommendations
if ventilation_rec:
selected_recommendations.add(ventilation_rec["recommendation_id"])
# We'll use the set of selected recommendations to filter the recommendations to upload
final_recommendations = [
@ -289,6 +500,8 @@ async def trigger_plan(body: PlanTriggerRequest):
logger.info("Uploading recommendations to the database")
property_valuation_increases = []
session.commit()
new_epc_bands = {}
property_value_increase_ranges = {}
for i in range(0, len(input_properties), BATCH_SIZE):
try:
# Take a slice of the input_properties list to make a batch
@ -300,8 +513,10 @@ async def trigger_plan(body: PlanTriggerRequest):
total_sap_points = sum([r["sap_points"] for r in default_recommendations])
new_sap_points = float(p.data["current-energy-efficiency"]) + total_sap_points
new_epc = sap_to_epc(new_sap_points)
new_epc_bands[p.id] = new_epc
valuations = PropertyValuation.estimate(property_instance=p, target_epc=new_epc)
property_value_increase_ranges[p.id] = valuations
# Your existing operations
property_details_epc = p.get_property_details_epc(
@ -365,11 +580,20 @@ async def trigger_plan(body: PlanTriggerRequest):
[sum(r["labour_days"] for r in rec_group if r["default"]) for p_id, rec_group in recommendations.items()]
))
aggregated_data = extract_portfolio_aggregation_data(
input_properties=input_properties,
total_valuation_increase=total_valuation_increase,
recommendations=recommendations,
new_epc_bands=new_epc_bands,
property_value_increase_ranges=property_value_increase_ranges
)
aggregate_portfolio_recommendations(
session,
portfolio_id=body.portfolio_id,
total_valuation_increase=total_valuation_increase,
labour_days=labour_days
labour_days=labour_days,
aggregated_data=aggregated_data
)
# Commit final changes

View file

@ -1,10 +1,54 @@
from pydantic import BaseModel
from pydantic import BaseModel, conlist, validator
from typing import Optional
class PlanTriggerRequest(BaseModel):
budget: float | None = None
budget: Optional[float] = None
goal: str
housing_type: str
goal_value: str
portfolio_id: int
trigger_file_path: str
already_installed_file_path: Optional[str] = None
patches_file_path: Optional[str] = None
non_invasive_recommendations_file_path: Optional[str] = None
exclusions: Optional[conlist(str, min_items=1)] = None
# Pre-defined list of possibilities for exclusions
_allowed_exclusions = {
"wall_insulation",
"ventilation",
"roof_insulation",
"floor_insulation",
"windows",
"fireplace",
"heating",
"hot_water",
"lighting",
"solar_pv"
}
_allowed_goals = {"Increase EPC"}
_allowed_housing_types = {"Social", "Private"}
# Validator to ensure exclusions are within the pre-defined possibilities
@validator('exclusions', each_item=True)
def check_exclusions(cls, v):
if v not in cls._allowed_exclusions:
raise ValueError(f"{v} is not an allowed exclusion")
return v
# Validator to ensure that the goal is within the pre-defined possibilities
@validator('goal')
def check_goal(cls, v):
if v not in cls._allowed_goals:
raise ValueError(f"{v} is not a valid goal")
return v
# Validator to ensure that the housing type is within the pre-defined possibilities
@validator('housing_type')
def check_housing_type(cls, v):
if v not in cls._allowed_housing_types:
raise ValueError(f"{v} is not a valid housing type")
return v

View file

@ -1,6 +1,4 @@
import boto3
import csv
from io import StringIO
import string
import secrets
import logging
@ -41,25 +39,6 @@ def setup_logger(log_file=None, level=logging.INFO, overwrite_handler=False):
return logger
def read_csv_from_s3(bucket_name, filepath):
s3 = boto3.client('s3')
# Get the object from s3
s3_object = s3.get_object(Bucket=bucket_name, Key=filepath)
# Read the CSV body from the s3 object
body = s3_object['Body'].read()
# Use StringIO to create a file-like object from the string
csv_data = StringIO(body.decode('utf-8'))
# Use csv library to read it into a list of dictionaries
reader = csv.DictReader(csv_data)
data = list(reader)
return data
def generate_api_key():
# Define the characters that will be used to generate the api key
characters = string.ascii_letters + string.digits

View file

@ -10,13 +10,17 @@ class AnnualBillSavings:
AVERAGE_ELECTRICITY_CONSUMPTION = 2700
AVERAGE_GAS_CONSUMPTION = 11500
# Latest price cap figures from Ofgem are for January 2024
# https://www.ofgem.gov.uk/publications/changes-energy-price-cap-1-january-2024
ELECTRICITY_PRICE_CAP = 0.29
GAS_PRICE_CAP = 0.07
# Latest price cap figures from Ofgem are for April 2024
# https://www.ofgem.gov.uk/publications/new-energy-price-cap-level-april-june-2024-starts-today
ELECTRICITY_PRICE_CAP = 0.245
GAS_PRICE_CAP = 0.0604
# This is a weighted mean of the price caps, using the consumption figures above as weights
PRICE_FACTOR = 0.11183098591549295
PRICE_FACTOR = 0.09549999999999999
# Daily standard charge, based on average across England, Scotland and Wales, and includes VAT
DAILY_STANDARD_CHARGE_GAS = 0.3143
DAILY_STANDARD_CHARGE_ELECTRICITY = 0.601
EPC_BANDS = ["G", "F", "E", "D", "C", "B", "A"]
@ -38,6 +42,17 @@ class AnnualBillSavings:
"""
return cls.ELECTRICITY_PRICE_CAP * kwh
@classmethod
def calculate_annual_bill(cls, kwh):
"""
This method will estimate the total annual bill for a property
It assumed gas & electricity are used
:param kwh: The total kwh consumption
:return: An estimate for annual bill
"""
return cls.PRICE_FACTOR * kwh + (cls.DAILY_STANDARD_CHARGE_GAS + cls.DAILY_STANDARD_CHARGE_ELECTRICITY * 365)
@classmethod
def adjust_energy_to_metered(cls, epc_energy_consumption, current_epc_rating):
"""

View file

@ -52,6 +52,33 @@ class PropertyValuation:
10070056829: 76_000,
10070056920: 76_000,
10023345463: 76_000,
# IMMO Dudley Pilot - search by going to https://www.zoopla.co.uk/property/uprn/{uprn}/
90070461: 172_000, # Based on Zoopla
90022227: 181_000, # Based on Zoopla
90106884: 180_000, # Based on Zoopla
90051858: 201_000, # Based on Zoopla
90060989: 172_000, # Based on Zoopla
90048026: 196_000, # Based on Zoopla
90077535: 192_000, # Based on Zoopla
90093693: 279_000, # Based on Zoopla
90055152: 149_000, # Based on Zoopla
90028499: 238_000, # Based on Zoopla
# IMMO Dudley Pilot 2- search by going to https://www.zoopla.co.uk/property/uprn/{uprn}/
90039318: 177_000, # Based on Zoopla
90038384: 170_000, # Based on Zoopla
90105380: 185_000, # Based on Zoopla
90124001: 165_000, # Based on Zoopla
90013980: 148_000, # Based on Zoopla
90087154: 184_000, # Based on Zoopla
90046817: 167_000, # Based on Zoopla
# Goldman Sachs Pilot for inrto - search by going to https://www.zoopla.co.uk/property/uprn/{uprn}/
100070358888: 153_000, # Based on Zoopla
10090436544: 282_000, # Based on Zoopla
100070365751: 177_000, # Based on Zoopla
10095952767: 168_000, # Based on Zoopla
100070520130: 177_000, # Based on Zoopla
100070333957: 185_000, # Based on Zoopla
100070543258: 211_000, # Based on Zoopla
}
# We base our valuation uplifts on a number of sources
@ -89,6 +116,29 @@ class PropertyValuation:
# {"start": "D", "end": "A", "increase_percentage": 0.017},
]
# Found here: https://www.rightmove.co.uk/news/articles/property-news/green-premium-epc-ratings/
# F -> C is + 15%
# E -> C is +7%
# D -> C is +3%
RIGHTMOVE_MAPPING = [
{"start": "G", "end": "C", "increase_percentage": 0.15},
{"start": "G", "end": "B", "increase_percentage": 0.15},
{"start": "G", "end": "A", "increase_percentage": 0.15},
{"start": "F", "end": "C", "increase_percentage": 0.15},
{"start": "F", "end": "B", "increase_percentage": 0.15},
{"start": "F", "end": "A", "increase_percentage": 0.15},
{"start": "E", "end": "C", "increase_percentage": 0.07},
{"start": "E", "end": "B", "increase_percentage": 0.07},
{"start": "E", "end": "A", "increase_percentage": 0.07},
{"start": "D", "end": "C", "increase_percentage": 0.03},
{"start": "D", "end": "B", "increase_percentage": 0.03},
{"start": "D", "end": "A", "increase_percentage": 0.03},
]
EPC_BANDS = ["G", "F", "E", "D", "C", "B", "A"]
@classmethod
@ -140,14 +190,18 @@ class PropertyValuation:
msm_increase, lloyds_increase = cls.get_increase(epc_band_range)
# We now use the knight frank and nationwide data to get further valuation evidence, if we have it
# We now use the knight frank, nationwide and Rightmove data to get further valuation evidence, if we have it
kf_increase = [x for x in cls.KNIGHT_FRANK_MAPPING if x["start"] == current_epc and x["end"] == target_epc]
nw_increase = [x for x in cls.NATIONWIDE_MAPPING if x["start"] == current_epc and x["end"] == target_epc]
rm_increase = [x for x in cls.RIGHTMOVE_MAPPING if x["start"] == current_epc and x["end"] == target_epc]
kf_increase = kf_increase[0]["increase_percentage"] if kf_increase else None
nw_increase = nw_increase[0]["increase_percentage"] if nw_increase else None
rm_increase = rm_increase[0]["increase_percentage"] if rm_increase else None
all_increases = [x for x in [msm_increase, lloyds_increase, kf_increase, nw_increase] if x is not None]
all_increases = [
x for x in [msm_increase, lloyds_increase, kf_increase, nw_increase, rm_increase] if x is not None
]
max_increase = max(all_increases)
min_increase = min(all_increases)

View file

@ -0,0 +1,114 @@
import pandas as pd
from tqdm import tqdm
from utils.s3 import save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
from utils.logger import setup_logger
from etl.epc.settings import EARLIEST_EPC_DATE
logger = setup_logger()
class AirSourceHeatPumpEfficiency:
def __init__(self, file_directories, cleaned_lookup):
"""
:param file_directories: A list of directories where files are stored.
:param cleaned_lookup: A dictionary containing cleaned lookup data.
"""
self.file_directories = file_directories
self.cleaned_lookup = cleaned_lookup
self.results = []
def create_dataset(self):
logger.info("Creating solar photo supply dataset")
all_counts = []
for dir in tqdm(self.file_directories):
filepath = dir / "certificates.csv"
df = pd.read_csv(filepath, low_memory=False)
df = df[~pd.isnull(df["UPRN"])]
df["UPRN"] = df["UPRN"].astype(int).astype(str)
# Take entries after SAP12
df["LODGEMENT_DATE"] = pd.to_datetime(df["LODGEMENT_DATE"])
df = df[df["LODGEMENT_DATE"] > EARLIEST_EPC_DATE]
df = df[
~df["TENURE"].isin(
[
"unknown",
"Not defined - use in the case of a new dwelling for which the intended tenure in not known. "
"It is not to be used for an existing dwelling"
]
)
]
# Take entries that contain an air source heat pump
df = df[
df["MAINHEAT_DESCRIPTION"].str.contains("air source heat pump", case=False, na=False)
]
# Drop rows that have a missing PROPERTY_TYPE, BUILT_FORM, CONSTRUCTION_AGE_BAND, TOTAL_FLOOR_AREA
for col in ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA"]:
df = df[~pd.isnull(df[col])]
# Get the columns we're interested in
df = df[
[
"PROPERTY_TYPE",
"BUILT_FORM",
"MAINHEAT_DESCRIPTION",
"MAINHEAT_ENERGY_EFF",
"MAINHEATCONT_DESCRIPTION",
"MAINHEATC_ENERGY_EFF",
"MAIN_FUEL",
"HOTWATER_DESCRIPTION",
"HOT_WATER_ENERGY_EFF",
"MAINS_GAS_FLAG"
]
]
counts = df.groupby(
[
"PROPERTY_TYPE",
"BUILT_FORM",
"MAINHEAT_DESCRIPTION",
"MAINHEAT_ENERGY_EFF",
"MAINHEATCONT_DESCRIPTION",
"MAINHEATC_ENERGY_EFF",
"MAIN_FUEL",
"HOTWATER_DESCRIPTION",
"HOT_WATER_ENERGY_EFF",
"MAINS_GAS_FLAG"
]
).size().reset_index(name="count")
all_counts.append(counts)
all_counts = pd.concat(all_counts)
all_counts_agg = all_counts.groupby(
[
"PROPERTY_TYPE",
"BUILT_FORM",
"MAINHEAT_DESCRIPTION",
"MAINHEAT_ENERGY_EFF",
"MAINHEATCONT_DESCRIPTION",
"MAINHEATC_ENERGY_EFF",
"MAIN_FUEL",
"HOTWATER_DESCRIPTION",
"HOT_WATER_ENERGY_EFF",
"MAINS_GAS_FLAG"
]
)["count"].sum().reset_index()
all_counts_agg.groupby("PROPERTY_TYPE")["count"].sum()
# In houses, 68% of the cases where we see air source heat pumps are in detached and semi-detached houses
all_counts_agg[all_counts_agg["PROPERTY_TYPE"] == "House"]["BUILT_FORM"].value_counts(normalize=True)
all_counts_agg[all_counts_agg["PROPERTY_TYPE"] == "Flat"]["BUILT_FORM"].value_counts()
# In Bungalows, 74% of cases where we see air source heat pumps are in detached and semi-detached houses
all_counts_agg[all_counts_agg["PROPERTY_TYPE"] == "Bungalow"]["BUILT_FORM"].value_counts(normalize=True)
# TODO: Research options for mid and end-terrace houses
# TODO: Research the options for flats - we see them appear in flats, but practically speaking, how does the
# install process work?

View file

@ -0,0 +1,24 @@
from pathlib import Path
from backend.app.plan.utils import get_cleaned
from etl.air_source_heat_pump.AirSourceHeatPumpEfficiency import AirSourceHeatPumpEfficiency
DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"
def app():
"""
This code reads in the EPC dataset and looks at the efficiency values for heating systems that inclue air source
heat pumps. This dataset is then used to inform the recommendations for the air source heat pump, so we know
how to set the simulation
:return:
"""
directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
cleaned_lookup = get_cleaned()
ashp_data_client = AirSourceHeatPumpEfficiency(
file_directories=directories,
cleaned_lookup=cleaned_lookup
)
ashp_data_client.create_dataset()

View file

@ -0,0 +1,208 @@
import pandas as pd
from utils.s3 import save_csv_to_s3
USER_ID = 8
PORTFOLIO_ID = 67
archetype_1_uprns = [100020604138, 200001188299, 100020578756, 200001187196, 200001192253, 100020581792, 200001188304,
100020625813, 100020618060, 100020585305, 100020617489, 100020615039, 100020618076, 100020588913,
200001187197, 100020671205, 100020576940, 100020619814, 100020576472, 100020618083]
archetype_2_uprns = [100020698027, 10001007455, 100020653785, 10090383198, 100020665632, 100020620659, 100020615603,
100020609610, 100020625597, 100020665656, 100020665640, 100020587905, 100020665630, 100020624351,
100020625451, 100020624348, 100020666735, 100020653786, 100020576458, 100020657902, 100020624350,
100020637405, 100020666734, 100020616325, 100020666716, 100020653783, 100020665645, 100020642337,
100020665638, 100022904981, 100020688226, 100020630285, 100020626800, 100020665634, 100022907528,
100020665652, 100020624347, 100020666721, 100020585002, 10014055968, 10001008257, 100020621438,
100020576459, 100020665643, 100020665654, 100022917303]
archetype_3_uprns = [100020577523, 100020616446, 100020605342, 100020594652, 100020585394, 100020601138, 100020597485,
100020614883, 100020633162, 100020697787, 200001185785, 100020646842, 100020581449, 100020595611,
100020641814, 100020575611, 100020652986, 100020654671, 100020647336, 100020610518, 100020607980,
100020692380, 100020581690]
archetype_4_uprns = [100020650603, 100020582907, 100020605116, 100020650607, 100020589325, 100020655500, 100020642537,
200001187539, 100020631683, 100020610165, 100020596436, 100020598277, 100020660228]
def app():
"""
We shall define a small portfolio of properties, based in Croydon
:return:
"""
# Firstly, read in the EPC data for Croydon
epc_data = pd.read_csv(
"local_data/all-domestic-certificates/domestic-E09000008-Croydon/certificates.csv",
low_memory=False
)
# Filter on entries where we have a UPRN
epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
# Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this
epc_data["LODGEMENT_DATE"] = pd.to_datetime(epc_data["LODGEMENT_DATE"])
epc_data = epc_data.sort_values("LODGEMENT_DATE", ascending=False).drop_duplicates("UPRN")
# Now filter on social properties
epc_data = epc_data[epc_data["TENURE"].isin(["rental (social)", "Rented (social)"])]
# There are 17337 properties with a registered EPC in Croydon
# Take below EPC C properties
epc_data = epc_data[epc_data["CURRENT_ENERGY_EFFICIENCY"].astype(int) < 69]
# 7994 properties are below EPC C (46%)
# 79% D, 19% E, 1% F, 0.2% G - it probably makes the most sense to focus on E and D properties
epc_data["CURRENT_ENERGY_RATING"].value_counts(normalize=True)
# For the purpose of the sample, take the properties have surveys done in the last 3 years
# This gives us 1351 remaining properties
three_years_ago = pd.Timestamp.now() - pd.DateOffset(days=int(3 * 365))
epc_data = epc_data[epc_data["LODGEMENT_DATE"] >= three_years_ago]
# Archetype 1: defined below:
# 1) House
# 2) Unfilled cavity
# 3) A roof that could be insulated (flat or pitched with no more than 50mm insulation)
# 4) EPC E or D
# 24 properties
archetype_1_sample = epc_data[
epc_data["PROPERTY_TYPE"].isin(["House"]) &
(epc_data["CURRENT_ENERGY_RATING"].isin(["D", "E"])) &
epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"]) &
epc_data["ROOF_DESCRIPTION"].isin(
[
"Pitched, 12 mm loft insulation",
"Pitched, 0 mm loft insulation",
"Pitched, no insulation",
"Pitched, 50 mm loft insulation",
"Flat, no insulation (assumed)",
"Pitched, no insulation (assumed)"
]
)
]
archetype_1_sample_asset_list = archetype_1_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
archetype_1_sample_asset_list["ARCHETYPE"] = "Archetype 1"
# Archetype 2: defined below:
# 1) Flat
# 2) Unfilled cavity
# 3) Another property above
# 4) EPC E
# 57 properties here
archetype_2_sample = epc_data[
epc_data["PROPERTY_TYPE"].isin(["Flat"]) &
(epc_data["CURRENT_ENERGY_RATING"].isin(["E", "D"])) &
epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"]) &
epc_data["ROOF_DESCRIPTION"].isin(
[
"(another dwelling above)"
]
)
]
archetype_2_sample_asset_list = archetype_2_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
archetype_2_sample_asset_list["ARCHETYPE"] = "Archetype 2"
# Archetype 3: defined below:
# 1) EPC E or below
# 2) Solid brick wall
# 3) House
# 4) Pitched roof with no insulation
# Just 7 properties (more expensive to retrofit)
archetype_3_sample = epc_data[
epc_data["PROPERTY_TYPE"].isin(["House"]) &
(epc_data["CURRENT_ENERGY_RATING"].isin(["E", "F", "G"])) &
epc_data["WALLS_DESCRIPTION"].isin(["Solid brick, as built, no insulation (assumed)"]) &
epc_data["ROOF_DESCRIPTION"].isin(
[
"Pitched, no insulation",
"Pitched, limited insulation (assumed)",
"Pitched, 100 mm loft insulation",
"Pitched, no insulation (assumed)",
]
)
]
archetype_3_sample_asset_list = archetype_3_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
archetype_3_sample_asset_list["ARCHETYPE"] = "Archetype 3"
# Archetype 4: defined below:
# 1) Maisonette
# 2) Empty cavity
# 3) EPC E
# 16 properties here
archetype_4_sample = epc_data[
epc_data["PROPERTY_TYPE"].isin(["Maisonette"]) &
epc_data["WALLS_DESCRIPTION"].isin(
["Cavity wall, as built, no insulation (assumed)"]
)
]
archetype_4_sample_asset_list = archetype_4_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
archetype_4_sample_asset_list["ARCHETYPE"] = "Archetype 4"
asset_list = pd.concat(
[
archetype_1_sample_asset_list,
archetype_2_sample_asset_list,
archetype_3_sample_asset_list,
archetype_4_sample_asset_list
]
)
asset_list = asset_list.rename(
columns={
"UPRN": "uprn",
"ADDRESS1": "address",
"POSTCODE": "postcode",
"ARCHETYPE": "archetype"
}
)
asset_list["uprn"] = asset_list["uprn"].astype(int)
# We end up with some properties that are currently an EPC C, but we do not have this data in the download, so we
# manually remove
# 1) 3 Reid Close, CR5 3BL
# 2) Flat 6, Collier Court 2A, St. Peters Road CR0 1HD
asset_list = asset_list[
~asset_list["uprn"].isin(
[
100020576460,
100020624352,
]
)
]
# We have slightly too many properties, so we take a random sample of each archetype
# achetype_1_size = 20
# achetype_2_size = 46
# achetype_3_size = 23
# achetype_4_size = 13
# archetype_1_uprns = asset_list[asset_list["archetype"] == "Archetype 1"]["uprn"].sample(
# int(achetype_1_size)
# ).tolist()
# archetype_2_uprns = asset_list[asset_list["archetype"] == "Archetype 2"]["uprn"].sample(
# int(achetype_2_size)
# ).tolist()
# archetype_3_uprns = asset_list[asset_list["archetype"] == "Archetype 3"]["uprn"].sample(
# int(achetype_3_size)
# ).tolist()
# archetype_4_uprns = asset_list[asset_list["archetype"] == "Archetype 4"]["uprn"].sample(
# int(achetype_4_size)
# ).tolist()
uprns_to_keep = archetype_1_uprns + archetype_2_uprns + archetype_3_uprns + archetype_4_uprns
asset_list = asset_list[asset_list["uprn"].isin(uprns_to_keep)]
filename = f"{USER_ID}/{PORTFOLIO_ID}/inputs.csv"
save_csv_to_s3(
dataframe=asset_list,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Social",
"goal": "Increase EPC",
"goal_value": "C",
"trigger_file_path": filename,
"budget": None,
"exclusions": ["floor_insulation"]
}
print(body)

View file

@ -0,0 +1,760 @@
"""
This script contains the code to generate the data required to populate the slides
We connect to the database amd extract the data for the portfolio needed so it is recommended to use
a environment akin to the backend to run this script
"""
import pandas as pd
import numpy as np
from backend.app.db.connection import db_engine
from sqlalchemy.orm import sessionmaker
from utils.s3 import read_csv_from_s3
from etl.customers.slide_utils import (
plot_epc_distribution,
get_property_details_by_portfolio_id,
get_plan_by_portfolio_id,
get_properties_with_default_recommendations,
create_powerpoint,
create_recommendations_summary
)
from backend.ml_models.AnnualBillSavings import AnnualBillSavings
USER_ID = 8
PORTFOLIO_ID_1 = 67
PORTFOLIO_ID_2 = 68
EPC_TARGET_1 = "C"
EPC_TARGET_2 = "A"
SAP_TARGET_1 = 69
SAP_TARGET_2 = 100
CUSTOMER_KEY = "gla-demo"
# Sample UPRNS
archetype_1_sample = ['100020604138', '200001192253', '100020581792', '100020576940', '200001187196', '100020618060',
'100020625813', '100020578756', '100020618076', '200001187197', '100020619814', '100020617489',
'100020588913']
archetype_2_sample = ['100020585002', '100020615603', '100020665652', '100020626800', '100020624347', '100020624348',
'100020576459', '10001007455', '100020666716', '100020609610', '100020625451', '100020625597',
'100020624351', '100020665634', '100020624350', '100020665640', '100020665632', '100022917303',
'100020665656', '10014055968', '100020630285', '100020665638', '100020616325', '100020637405',
'100020698027', '100020657902', '100020688226', '100020653786', '100020642337', '100020665643']
archetype_3_sample = ['100020594652', '100020697787', '100020577523', '100020633162', '100020601138', '100020595611',
'100020597485', '100020614883', '100020605342', '100020654671', '100020575611', '100020607980',
'200001185785', '100020616446', '100020692380']
archetype_4_sample = ['100020596436', '100020610165', '200001187539', '100020655500', '100020582907', '100020598277',
'100020650607', '100020605116', '100020650603']
def scenario_1():
# Connect to database
session = sessionmaker(bind=db_engine)()
########################################################################
# Get the data we need
########################################################################
portfolio_id = PORTFOLIO_ID_1
# Get the asset list
asset_list = read_csv_from_s3(
"retrofit-plan-inputs-dev", f"{USER_ID}/67/inputs.csv"
)
asset_list = pd.DataFrame(asset_list)
# Get the properties for the portfolio
properties = get_properties_with_default_recommendations(session, portfolio_id)
properties_df = pd.DataFrame(properties)
# We now pull the data for the property details
property_details = get_property_details_by_portfolio_id(session, portfolio_id)
property_details_df = pd.DataFrame(property_details)
# We estimate bills based on the adjusted_energy_consumption
property_details_df["energy_bill"] = property_details_df["adjusted_energy_consumption"].apply(
lambda x: AnnualBillSavings.calculate_annual_bill(x)
)
# Merge on uprn
property_details_df = property_details_df.merge(
properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
on="property_id"
)
plans = get_plan_by_portfolio_id(session, portfolio_id)
plans_df = pd.DataFrame(plans)
# Unnest the recommendations. Each recommendation is a list of dictionaries
recommendations_exploded = properties_df["recommendations"].explode().tolist()
recommendations_df = pd.DataFrame([r for r in recommendations_exploded if not pd.isnull(r)])
# Add uprn on
recommendations_df = recommendations_df.merge(
properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
how="left",
on="property_id"
)
recommendations_summary = create_recommendations_summary(
recommendations_df,
properties_df,
property_details_df,
SAP_TARGET_1
)
# Calculate % changes of energ, co2 and abs
recommendations_summary["carbon_percent_change"] = (
recommendations_summary["total_carbon"] / recommendations_summary["current_co2"]
)
recommendations_summary["energy_percent_change"] = (
recommendations_summary["adjusted_heat_demand"] / recommendations_summary["current_energy"]
)
recommendations_summary["bills_percent_change"] = (
recommendations_summary["total_bill_savings"] / recommendations_summary["current_energy_bill"]
)
########################
# Overview
########################
overview_totals = recommendations_summary.sum()
overview_means = recommendations_summary.mean()
########################
# Measures
########################
measures_count = recommendations_df.groupby("type")["id"].count().reset_index()
wall_insulation_measures = measures_count[
measures_count["type"].isin(["cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation"])
]["id"].sum()
ventilation_measures = measures_count[
measures_count["type"].isin(["mechanical_ventilation"])
]["id"].sum()
roof_insulation_measures = measures_count[
measures_count["type"].isin(["loft_insulation", "flat_roof_insulation"])
]["id"].sum()
floor_insulation_measures = measures_count[
measures_count["type"].isin(["solid_floor_insulation", "suspended_floor_insulation"])
]["id"].sum()
windows = measures_count[
measures_count["type"].isin(["windows_glazing"])
]["id"].sum()
heating = measures_count[
measures_count["type"].isin(["heating"])
]["id"].sum()
heating_controls = measures_count[
measures_count["type"].isin(["heating_control"])
]["id"].sum()
solar = measures_count[
measures_count["type"].isin(["solar_pv"])
]["id"].sum()
other = measures_count[
~measures_count["type"].isin([
"cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation",
"loft_insulation", "flat_roof_insulation", "solid_floor_insulation",
"suspended_floor_insulation", "windows_glazing", "heating", "heating_control", "solar_pv",
"mechanical_ventilation"
])
]["id"].sum()
# Summary information by each archetype
########################
# Archetype 1
########################
archetype_1 = asset_list[asset_list["archetype"] == "Archetype 1"]
recommendations_arch_1_summary = recommendations_summary[
recommendations_summary["uprn"].astype(str).isin(archetype_1["uprn"].values)
]
arch_1_property_details = property_details_df[
property_details_df["uprn"].astype(str).isin(archetype_1["uprn"].values)
]
arch_1_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
# Take the mean, median and maximum of each value
cols_to_keep = ["total_cost", "total_carbon", "total_bill_savings", "total_sap_points", "adjusted_heat_demand",
"energy_percent_change", "carbon_percent_change", "bills_percent_change"]
arch_1_recommendation_min = recommendations_arch_1_summary.min()[cols_to_keep]
arch_1_recommendation_max = recommendations_arch_1_summary.max()[cols_to_keep]
arch_1_recommendation_means = recommendations_arch_1_summary.mean()[cols_to_keep]
arch_1_totals = recommendations_arch_1_summary.sum()[cols_to_keep]
annual_total_co2 = recommendations_arch_1_summary["total_carbon"].sum()
annual_total_bills = recommendations_arch_1_summary["total_bill_savings"].sum()
annual_total_energy_savings = recommendations_arch_1_summary["adjusted_heat_demand"].sum()
archetype_measures = \
recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_1["uprn"].values)].groupby("type")[
"id"].count().reset_index()
cost_text = (f"{round(arch_1_recommendation_means['total_cost'], 2)}: "
f"{arch_1_recommendation_min['total_cost']} - {arch_1_recommendation_max['total_cost']}")
sap_text = (f"{round(arch_1_recommendation_means['total_sap_points'], 2)}: "
f"{arch_1_recommendation_min['total_sap_points']} - {arch_1_recommendation_max['total_sap_points']}")
energy_text = (f"{round(arch_1_recommendation_means['adjusted_heat_demand'], 2)}: "
f"{arch_1_recommendation_min['adjusted_heat_demand']} - "
f"{arch_1_recommendation_max['adjusted_heat_demand']}")
energy_percent_text = (f"{round(arch_1_recommendation_means['energy_percent_change'], 2)}: "
f"{arch_1_recommendation_min['energy_percent_change']} - "
f"{arch_1_recommendation_max['energy_percent_change']}")
carbon_text = (f"{round(arch_1_recommendation_means['total_carbon'], 2)}: "
f"{arch_1_recommendation_min['total_carbon']} - {arch_1_recommendation_max['total_carbon']}")
carbon_percent_text = (f"{round(arch_1_recommendation_means['carbon_percent_change'], 2)}: "
f"{arch_1_recommendation_min['carbon_percent_change']} - "
f"{arch_1_recommendation_max['carbon_percent_change']}")
bill_text = (f"{round(arch_1_recommendation_means['total_bill_savings'], 2)}: "
f"{arch_1_recommendation_min['total_bill_savings']} - "
f"{arch_1_recommendation_max['total_bill_savings']}")
bill_percent_text = (f"{round(arch_1_recommendation_means['bills_percent_change'], 2)}: "
f"{arch_1_recommendation_min['bills_percent_change']} - "
f"{arch_1_recommendation_max['bills_percent_change']}")
########################
# Archetype 2
########################
archetype_2 = asset_list[asset_list["archetype"] == "Archetype 2"]
recommendations_arch_2_summary = recommendations_summary[
recommendations_summary["uprn"].astype(str).isin(archetype_2["uprn"].values)
]
arch_2_property_details = property_details_df[
property_details_df["uprn"].astype(str).isin(archetype_2["uprn"].values)
]
arch_2_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
# Take the mean, median and maximum of each value
arch_2_recommendation_min = recommendations_arch_2_summary.min()
arch_2_recommendation_max = recommendations_arch_2_summary.max()
arch_2_recommendation_means = recommendations_arch_2_summary.mean().round(2)
total_cost = recommendations_arch_2_summary["total_cost"].sum()
annual_total_co2 = recommendations_arch_2_summary["total_carbon"].sum()
annual_total_bills = recommendations_arch_2_summary["total_bill_savings"].sum()
annual_total_energy_savings = recommendations_arch_2_summary["adjusted_heat_demand"].sum()
archetype_measures = \
recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_2["uprn"].values)].groupby("type")[
"id"].count().reset_index()
cost_text = (f"{round(arch_2_recommendation_means['total_cost'], 2)}: "
f"{arch_2_recommendation_min['total_cost']} - {arch_2_recommendation_max['total_cost']}")
sap_text = (f"{round(arch_2_recommendation_means['total_sap_points'], 2)}: "
f"{arch_2_recommendation_min['total_sap_points']} - {arch_2_recommendation_max['total_sap_points']}")
energy_text = (f"{round(arch_2_recommendation_means['adjusted_heat_demand'], 2)}: "
f"{arch_2_recommendation_min['adjusted_heat_demand']} - "
f"{arch_2_recommendation_max['adjusted_heat_demand']}")
energy_percent_text = (f"{round(arch_2_recommendation_means['energy_percent_change'], 2)}: "
f"{arch_2_recommendation_min['energy_percent_change']} - "
f"{arch_2_recommendation_max['energy_percent_change']}")
carbon_text = (f"{round(arch_2_recommendation_means['total_carbon'], 2)}: "
f"{arch_2_recommendation_min['total_carbon']} - {arch_2_recommendation_max['total_carbon']}")
carbon_percent_text = (f"{round(arch_2_recommendation_means['carbon_percent_change'], 2)}: "
f"{arch_2_recommendation_min['carbon_percent_change']} - "
f"{arch_2_recommendation_max['carbon_percent_change']}")
bill_text = (f"{round(arch_2_recommendation_means['total_bill_savings'], 2)}: "
f"{arch_2_recommendation_min['total_bill_savings']} - "
f"{arch_2_recommendation_max['total_bill_savings']}")
bill_percent_text = (f"{round(arch_2_recommendation_means['bills_percent_change'], 2)}: "
f"{arch_2_recommendation_min['bills_percent_change']} - "
f"{arch_2_recommendation_max['bills_percent_change']}")
########################
# Archetype 3
########################
archetype_3 = asset_list[asset_list["archetype"] == "Archetype 3"]
recommendations_arch_3_summary = recommendations_summary[
recommendations_summary["uprn"].astype(str).isin(archetype_3["uprn"].values)
]
arch_3_property_details = property_details_df[
property_details_df["uprn"].astype(str).isin(archetype_3["uprn"].values)
]
arch_3_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
# Take the mean, median and maximum of each value
arch_3_recommendation_min = recommendations_arch_3_summary.min()
arch_3_recommendation_max = recommendations_arch_3_summary.max()
arch_3_recommendation_means = recommendations_arch_3_summary.mean()
total_cost = recommendations_arch_3_summary["total_cost"].sum()
annual_total_co2 = recommendations_arch_3_summary["total_carbon"].sum()
annual_total_bills = recommendations_arch_3_summary["total_bill_savings"].sum()
annual_total_energy_savings = recommendations_arch_3_summary["adjusted_heat_demand"].sum()
archetype_measures = \
recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_3["uprn"].values)].groupby("type")[
"id"].count().reset_index()
cost_text = (f"{round(arch_3_recommendation_means['total_cost'], 2)}: "
f"{arch_3_recommendation_min['total_cost']} - {arch_3_recommendation_max['total_cost']}")
sap_text = (f"{round(arch_3_recommendation_means['total_sap_points'], 2)}: "
f"{arch_3_recommendation_min['total_sap_points']} - {arch_3_recommendation_max['total_sap_points']}")
energy_text = (f"{round(arch_3_recommendation_means['adjusted_heat_demand'], 2)}: "
f"{arch_3_recommendation_min['adjusted_heat_demand']} - "
f"{arch_3_recommendation_max['adjusted_heat_demand']}")
energy_percent_text = (f"{round(arch_3_recommendation_means['energy_percent_change'], 2)}: "
f"{arch_3_recommendation_min['energy_percent_change']} - "
f"{arch_3_recommendation_max['energy_percent_change']}")
carbon_text = (f"{round(arch_3_recommendation_means['total_carbon'], 2)}: "
f"{arch_3_recommendation_min['total_carbon']} - {arch_3_recommendation_max['total_carbon']}")
carbon_percent_text = (f"{round(arch_3_recommendation_means['carbon_percent_change'], 2)}: "
f"{arch_3_recommendation_min['carbon_percent_change']} - "
f"{arch_3_recommendation_max['carbon_percent_change']}")
bill_text = (f"{round(arch_3_recommendation_means['total_bill_savings'], 2)}: "
f"{arch_3_recommendation_min['total_bill_savings']} - "
f"{arch_3_recommendation_max['total_bill_savings']}")
bill_percent_text = (f"{round(arch_3_recommendation_means['bills_percent_change'], 2)}: "
f"{arch_3_recommendation_min['bills_percent_change']} - "
f"{arch_3_recommendation_max['bills_percent_change']}")
########################
# Archetype 4
########################
archetype_4 = asset_list[asset_list["archetype"] == "Archetype 4"]
recommendations_arch_4_summary = recommendations_summary[
recommendations_summary["uprn"].astype(str).isin(archetype_4["uprn"].values)
]
arch_4_property_details = property_details_df[
property_details_df["uprn"].astype(str).isin(archetype_4["uprn"].values)
]
arch_4_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
# Take the mean, median and maximum of each value
arch_4_recommendation_min = recommendations_arch_4_summary.min()
arch_4_recommendation_max = recommendations_arch_4_summary.max()
arch_4_recommendation_means = recommendations_arch_4_summary.mean()
total_cost = recommendations_arch_4_summary["total_cost"].sum()
annual_total_co2 = recommendations_arch_4_summary["total_carbon"].sum()
annual_total_bills = recommendations_arch_4_summary["total_bill_savings"].sum()
annual_total_energy_savings = recommendations_arch_4_summary["adjusted_heat_demand"].sum()
archetype_measures = \
recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_4["uprn"].values)].groupby("type")[
"id"].count().reset_index()
cost_text = (f"{round(arch_4_recommendation_means['total_cost'], 2)}: "
f"{arch_4_recommendation_min['total_cost']} - {arch_4_recommendation_max['total_cost']}")
sap_text = (f"{round(arch_4_recommendation_means['total_sap_points'], 2)}: "
f"{arch_4_recommendation_min['total_sap_points']} - {arch_4_recommendation_max['total_sap_points']}")
energy_text = (f"{round(arch_4_recommendation_means['adjusted_heat_demand'], 2)}: "
f"{arch_4_recommendation_min['adjusted_heat_demand']} - "
f"{arch_4_recommendation_max['adjusted_heat_demand']}")
energy_percent_text = (f"{round(arch_4_recommendation_means['energy_percent_change'], 2)}: "
f"{arch_4_recommendation_min['energy_percent_change']} - "
f"{arch_4_recommendation_max['energy_percent_change']}")
carbon_text = (f"{round(arch_4_recommendation_means['total_carbon'], 2)}: "
f"{arch_4_recommendation_min['total_carbon']} - {arch_4_recommendation_max['total_carbon']}")
carbon_percent_text = (f"{round(arch_4_recommendation_means['carbon_percent_change'], 2)}: "
f"{arch_4_recommendation_min['carbon_percent_change']} - "
f"{arch_4_recommendation_max['carbon_percent_change']}")
bill_text = (f"{round(arch_4_recommendation_means['total_bill_savings'], 2)}: "
f"{arch_4_recommendation_min['total_bill_savings']} - "
f"{arch_4_recommendation_max['total_bill_savings']}")
bill_percent_text = (f"{round(arch_4_recommendation_means['bills_percent_change'], 2)}: "
f"{arch_4_recommendation_min['bills_percent_change']} - "
f"{arch_4_recommendation_max['bills_percent_change']}")
########################
# Overview
########################
overview_totals = recommendations_summary.sum()
def make_sample():
# sample_proportion = 67 / 102
# Get the asset list
asset_list = read_csv_from_s3(
"retrofit-plan-inputs-dev", f"{USER_ID}/67/inputs.csv"
)
asset_list = pd.DataFrame(asset_list)
# From the asset list, we deduce how many properties we need
# Need to figure out the sizes
archetype_1_sample_size = 13
archetype_2_sample_size = 30
archetype_3_sample_size = 15
archetype_4_sample_size = 9
# We take the sample and we'll keep the uprns static
archetype_1_sample = asset_list[
asset_list["archetype"] == "Archetype 1"
].sample(archetype_1_sample_size)["uprn"].to_list()
archetype_2_sample = asset_list[
asset_list["archetype"] == "Archetype 2"
].sample(archetype_2_sample_size)["uprn"].to_list()
archetype_3_sample = asset_list[
asset_list["archetype"] == "Archetype 3"
].sample(archetype_3_sample_size)["uprn"].to_list()
archetype_4_sample = asset_list[
asset_list["archetype"] == "Archetype 4"
].sample(archetype_4_sample_size)["uprn"].to_list()
def scenario_2():
# Connect to database
session = sessionmaker(bind=db_engine)()
########################################################################
# Get the data we need
########################################################################
portfolio_id = PORTFOLIO_ID_2
# Get the asset list
asset_list = read_csv_from_s3(
"retrofit-plan-inputs-dev", f"{USER_ID}/67/inputs.csv"
)
asset_list = pd.DataFrame(asset_list)
sample_uprns = archetype_1_sample + archetype_2_sample + archetype_3_sample + archetype_4_sample
# Filter on sample uprns
asset_list = asset_list[asset_list["uprn"].astype(str).isin(sample_uprns)]
# Get the properties for the portfolio
properties = get_properties_with_default_recommendations(session, portfolio_id)
properties_df = pd.DataFrame(properties)
properties_df = properties_df[properties_df["uprn"].astype(str).isin(sample_uprns)]
# We now pull the data for the property details
property_details = get_property_details_by_portfolio_id(session, portfolio_id)
property_details_df = pd.DataFrame(property_details)
property_details_df = property_details_df[property_details_df["property_id"].isin(properties_df["id"].values)]
# We estimate bills based on the adjusted_energy_consumption
property_details_df["energy_bill"] = property_details_df["adjusted_energy_consumption"].apply(
lambda x: AnnualBillSavings.calculate_annual_bill(x)
)
# Merge on uprn
property_details_df = property_details_df.merge(
properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
on="property_id"
)
plans = get_plan_by_portfolio_id(session, portfolio_id)
plans_df = pd.DataFrame(plans)
# Unnest the recommendations. Each recommendation is a list of dictionaries
recommendations_exploded = properties_df["recommendations"].explode().tolist()
recommendations_df = pd.DataFrame([r for r in recommendations_exploded if not pd.isnull(r)])
# Add uprn on
recommendations_df = recommendations_df.merge(
properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
how="left",
on="property_id"
)
recommendations_summary = create_recommendations_summary(
recommendations_df,
properties_df,
property_details_df,
SAP_TARGET_1
)
# Calculate % changes of energ, co2 and abs
recommendations_summary["carbon_percent_change"] = (
recommendations_summary["total_carbon"] / recommendations_summary["current_co2"]
)
recommendations_summary["energy_percent_change"] = (
recommendations_summary["adjusted_heat_demand"] / recommendations_summary["current_energy"]
)
recommendations_summary["bills_percent_change"] = (
recommendations_summary["total_bill_savings"] / recommendations_summary["current_energy_bill"]
)
########################
# Overview
########################
overview_totals = recommendations_summary.sum()
overview_means = recommendations_summary.mean()
########################
# Measures
########################
measures_count = recommendations_df.groupby("type")["id"].count().reset_index()
wall_insulation_measures = measures_count[
measures_count["type"].isin(["cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation"])
]["id"].sum()
ventilation_measures = measures_count[
measures_count["type"].isin(["mechanical_ventilation"])
]["id"].sum()
roof_insulation_measures = measures_count[
measures_count["type"].isin(["loft_insulation", "flat_roof_insulation"])
]["id"].sum()
floor_insulation_measures = measures_count[
measures_count["type"].isin(["solid_floor_insulation", "suspended_floor_insulation"])
]["id"].sum()
windows = measures_count[
measures_count["type"].isin(["windows_glazing"])
]["id"].sum()
heating = measures_count[
measures_count["type"].isin(["heating"])
]["id"].sum()
heating_controls = measures_count[
measures_count["type"].isin(["heating_control"])
]["id"].sum()
solar = measures_count[
measures_count["type"].isin(["solar_pv"])
]["id"].sum()
other = measures_count[
~measures_count["type"].isin([
"cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation",
"loft_insulation", "flat_roof_insulation", "solid_floor_insulation",
"suspended_floor_insulation", "windows_glazing", "heating", "heating_control", "solar_pv",
"mechanical_ventilation"
])
]["id"].sum()
z = recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_3_sample)]
recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_3_sample)]["type"].value_counts()
# Summary information by each archetype
########################
# Archetype 1
########################
archetype_1 = asset_list[asset_list["archetype"] == "Archetype 1"]
recommendations_arch_1_summary = recommendations_summary[
recommendations_summary["uprn"].astype(str).isin(archetype_1["uprn"].values)
]
arch_1_property_details = property_details_df[
property_details_df["uprn"].astype(str).isin(archetype_1["uprn"].values)
]
arch_1_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
# Take the mean, median and maximum of each value
arch_1_recommendation_min = recommendations_arch_1_summary.min()
arch_1_recommendation_max = recommendations_arch_1_summary.max()
arch_1_recommendation_means = recommendations_arch_1_summary.mean()
arch_1_totals = recommendations_arch_1_summary.sum()
annual_total_co2 = recommendations_arch_1_summary["total_carbon"].sum()
annual_total_bills = recommendations_arch_1_summary["total_bill_savings"].sum()
annual_total_energy_savings = recommendations_arch_1_summary["adjusted_heat_demand"].sum()
archetype_measures = \
recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_1["uprn"].values)].groupby("type")[
"id"].count().reset_index()
cost_text = (f"{round(arch_1_recommendation_means['total_cost'], 2)}: "
f"{arch_1_recommendation_min['total_cost']} - {arch_1_recommendation_max['total_cost']}")
sap_text = (f"{round(arch_1_recommendation_means['total_sap_points'], 2)}: "
f"{arch_1_recommendation_min['total_sap_points']} - {arch_1_recommendation_max['total_sap_points']}")
energy_text = (f"{round(arch_1_recommendation_means['adjusted_heat_demand'], 2)}: "
f"{arch_1_recommendation_min['adjusted_heat_demand']} - "
f"{arch_1_recommendation_max['adjusted_heat_demand']}")
energy_percent_text = (f"{round(arch_1_recommendation_means['energy_percent_change'], 2)}: "
f"{arch_1_recommendation_min['energy_percent_change']} - "
f"{arch_1_recommendation_max['energy_percent_change']}")
carbon_text = (f"{round(arch_1_recommendation_means['total_carbon'], 2)}: "
f"{arch_1_recommendation_min['total_carbon']} - {arch_1_recommendation_max['total_carbon']}")
carbon_percent_text = (f"{round(arch_1_recommendation_means['carbon_percent_change'], 2)}: "
f"{arch_1_recommendation_min['carbon_percent_change']} - "
f"{arch_1_recommendation_max['carbon_percent_change']}")
bill_text = (f"{round(arch_1_recommendation_means['total_bill_savings'], 2)}: "
f"{arch_1_recommendation_min['total_bill_savings']} - "
f"{arch_1_recommendation_max['total_bill_savings']}")
bill_percent_text = (f"{round(arch_1_recommendation_means['bills_percent_change'], 2)}: "
f"{arch_1_recommendation_min['bills_percent_change']} - "
f"{arch_1_recommendation_max['bills_percent_change']}")
########################
# Archetype 2
########################
archetype_2 = asset_list[asset_list["archetype"] == "Archetype 2"]
recommendations_arch_2_summary = recommendations_summary[
recommendations_summary["uprn"].astype(str).isin(archetype_2["uprn"].values)
]
arch_2_property_details = property_details_df[
property_details_df["uprn"].astype(str).isin(archetype_2["uprn"].values)
]
arch_2_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
# Take the mean, median and maximum of each value
arch_2_recommendation_min = recommendations_arch_2_summary.min()
arch_2_recommendation_max = recommendations_arch_2_summary.max()
arch_2_recommendation_means = recommendations_arch_2_summary.mean().round(2)
total_cost = recommendations_arch_2_summary["total_cost"].sum()
annual_total_co2 = recommendations_arch_2_summary["total_carbon"].sum()
annual_total_bills = recommendations_arch_2_summary["total_bill_savings"].sum()
annual_total_energy_savings = recommendations_arch_2_summary["adjusted_heat_demand"].sum()
archetype_measures = \
recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_2["uprn"].values)].groupby("type")[
"id"].count().reset_index()
cost_text = (f"{round(arch_2_recommendation_means['total_cost'], 2)}: "
f"{arch_2_recommendation_min['total_cost']} - {arch_2_recommendation_max['total_cost']}")
sap_text = (f"{round(arch_2_recommendation_means['total_sap_points'], 2)}: "
f"{arch_2_recommendation_min['total_sap_points']} - {arch_2_recommendation_max['total_sap_points']}")
energy_text = (f"{round(arch_2_recommendation_means['adjusted_heat_demand'], 2)}: "
f"{arch_2_recommendation_min['adjusted_heat_demand']} - "
f"{arch_2_recommendation_max['adjusted_heat_demand']}")
energy_percent_text = (f"{round(arch_2_recommendation_means['energy_percent_change'], 2)}: "
f"{arch_2_recommendation_min['energy_percent_change']} - "
f"{arch_2_recommendation_max['energy_percent_change']}")
carbon_text = (f"{round(arch_2_recommendation_means['total_carbon'], 2)}: "
f"{arch_2_recommendation_min['total_carbon']} - {arch_2_recommendation_max['total_carbon']}")
carbon_percent_text = (f"{round(arch_2_recommendation_means['carbon_percent_change'], 2)}: "
f"{arch_2_recommendation_min['carbon_percent_change']} - "
f"{arch_2_recommendation_max['carbon_percent_change']}")
bill_text = (f"{round(arch_2_recommendation_means['total_bill_savings'], 2)}: "
f"{arch_2_recommendation_min['total_bill_savings']} - "
f"{arch_2_recommendation_max['total_bill_savings']}")
bill_percent_text = (f"{round(arch_2_recommendation_means['bills_percent_change'], 2)}: "
f"{arch_2_recommendation_min['bills_percent_change']} - "
f"{arch_2_recommendation_max['bills_percent_change']}")
########################
# Archetype 3
########################
archetype_3 = asset_list[asset_list["archetype"] == "Archetype 3"]
recommendations_arch_3_summary = recommendations_summary[
recommendations_summary["uprn"].astype(str).isin(archetype_3["uprn"].values)
]
arch_3_property_details = property_details_df[
property_details_df["uprn"].astype(str).isin(archetype_3["uprn"].values)
]
arch_3_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
# Take the mean, median and maximum of each value
arch_3_recommendation_min = recommendations_arch_3_summary.min()
arch_3_recommendation_max = recommendations_arch_3_summary.max()
arch_3_recommendation_means = recommendations_arch_3_summary.mean()
total_cost = recommendations_arch_3_summary["total_cost"].sum()
annual_total_co2 = recommendations_arch_3_summary["total_carbon"].sum()
annual_total_bills = recommendations_arch_3_summary["total_bill_savings"].sum()
annual_total_energy_savings = recommendations_arch_3_summary["adjusted_heat_demand"].sum()
archetype_measures = \
recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_3["uprn"].values)].groupby("type")[
"id"].count().reset_index()
cost_text = (f"{round(arch_3_recommendation_means['total_cost'], 2)}: "
f"{arch_3_recommendation_min['total_cost']} - {arch_3_recommendation_max['total_cost']}")
sap_text = (f"{round(arch_3_recommendation_means['total_sap_points'], 2)}: "
f"{arch_3_recommendation_min['total_sap_points']} - {arch_3_recommendation_max['total_sap_points']}")
energy_text = (f"{round(arch_3_recommendation_means['adjusted_heat_demand'], 2)}: "
f"{arch_3_recommendation_min['adjusted_heat_demand']} - "
f"{arch_3_recommendation_max['adjusted_heat_demand']}")
energy_percent_text = (f"{round(arch_3_recommendation_means['energy_percent_change'], 2)}: "
f"{arch_3_recommendation_min['energy_percent_change']} - "
f"{arch_3_recommendation_max['energy_percent_change']}")
carbon_text = (f"{round(arch_3_recommendation_means['total_carbon'], 2)}: "
f"{arch_3_recommendation_min['total_carbon']} - {arch_3_recommendation_max['total_carbon']}")
carbon_percent_text = (f"{round(arch_3_recommendation_means['carbon_percent_change'], 2)}: "
f"{arch_3_recommendation_min['carbon_percent_change']} - "
f"{arch_3_recommendation_max['carbon_percent_change']}")
bill_text = (f"{round(arch_3_recommendation_means['total_bill_savings'], 2)}: "
f"{arch_3_recommendation_min['total_bill_savings']} - "
f"{arch_3_recommendation_max['total_bill_savings']}")
bill_percent_text = (f"{round(arch_3_recommendation_means['bills_percent_change'], 2)}: "
f"{arch_3_recommendation_min['bills_percent_change']} - "
f"{arch_3_recommendation_max['bills_percent_change']}")
########################
# Archetype 4
########################
archetype_4 = asset_list[asset_list["archetype"] == "Archetype 4"]
recommendations_arch_4_summary = recommendations_summary[
recommendations_summary["uprn"].astype(str).isin(archetype_4["uprn"].values)
]
arch_4_property_details = property_details_df[
property_details_df["uprn"].astype(str).isin(archetype_4["uprn"].values)
]
arch_4_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
# Take the mean, median and maximum of each value
arch_4_recommendation_min = recommendations_arch_4_summary.min()
arch_4_recommendation_max = recommendations_arch_4_summary.max()
arch_4_recommendation_means = recommendations_arch_4_summary.mean()
total_cost = recommendations_arch_4_summary["total_cost"].sum()
annual_total_co2 = recommendations_arch_4_summary["total_carbon"].sum()
annual_total_bills = recommendations_arch_4_summary["total_bill_savings"].sum()
annual_total_energy_savings = recommendations_arch_4_summary["adjusted_heat_demand"].sum()
archetype_measures = \
recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_4["uprn"].values)].groupby("type")[
"id"].count().reset_index()
cost_text = (f"{round(arch_4_recommendation_means['total_cost'], 2)}: "
f"{arch_4_recommendation_min['total_cost']} - {arch_4_recommendation_max['total_cost']}")
sap_text = (f"{round(arch_4_recommendation_means['total_sap_points'], 2)}: "
f"{arch_4_recommendation_min['total_sap_points']} - {arch_4_recommendation_max['total_sap_points']}")
energy_text = (f"{round(arch_4_recommendation_means['adjusted_heat_demand'], 2)}: "
f"{arch_4_recommendation_min['adjusted_heat_demand']} - "
f"{arch_4_recommendation_max['adjusted_heat_demand']}")
energy_percent_text = (f"{round(arch_4_recommendation_means['energy_percent_change'], 2)}: "
f"{arch_4_recommendation_min['energy_percent_change']} - "
f"{arch_4_recommendation_max['energy_percent_change']}")
carbon_text = (f"{round(arch_4_recommendation_means['total_carbon'], 2)}: "
f"{arch_4_recommendation_min['total_carbon']} - {arch_4_recommendation_max['total_carbon']}")
carbon_percent_text = (f"{round(arch_4_recommendation_means['carbon_percent_change'], 2)}: "
f"{arch_4_recommendation_min['carbon_percent_change']} - "
f"{arch_4_recommendation_max['carbon_percent_change']}")
bill_text = (f"{round(arch_4_recommendation_means['total_bill_savings'], 2)}: "
f"{arch_4_recommendation_min['total_bill_savings']} - "
f"{arch_4_recommendation_max['total_bill_savings']}")
bill_percent_text = (f"{round(arch_4_recommendation_means['bills_percent_change'], 2)}: "
f"{arch_4_recommendation_min['bills_percent_change']} - "
f"{arch_4_recommendation_max['bills_percent_change']}")

View file

@ -0,0 +1,63 @@
import pandas as pd
from utils.s3 import read_excel_from_s3
from utils.s3 import save_csv_to_s3
PORTFOLIO_ID = 75
USER_ID = 8
def app():
asset_list = [
{
"address": "19 Emily Gardens",
"postcode": "B16 0ED",
},
{
"address": "Flat 6 41 Bradford Street",
"postcode": "B5 6HX",
},
{
"address": "197 FIELD LANE",
"postcode": "B32 4HL",
},
{
"address": "FLAT 4 108 SUMMER ROAD",
"postcode": "B23 6DY",
},
{
"address": "1, St. Benedicts Road",
"postcode": "B10 9DP",
},
{
"address": "29 COOKSEY LANE",
"postcode": "B44 9QL",
},
{
"address": "40 TRITTIFORD ROAD",
"postcode": "B13 0HG",
}
]
asset_list = pd.DataFrame(asset_list)
# Store the asset list in s3
filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
save_csv_to_s3(
dataframe=asset_list,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
# EPC C portoflio
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increase EPC",
"goal_value": "B",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": "",
"budget": None,
}
print(body)

View file

@ -0,0 +1,25 @@
import pandas as pd
def app():
"""
Pulling the list of EPC G & F properties in Birmingham for Goldman Sachs
"""
epc_data = pd.read_csv(
"local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv",
low_memory=False
)
epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
epc_data["UPRN"] = epc_data["UPRN"].astype(int).astype(str)
# Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this
epc_data["LODGEMENT_DATETIME"] = pd.to_datetime(epc_data["LODGEMENT_DATETIME"], format='mixed')
epc_data = epc_data.sort_values("LODGEMENT_DATETIME", ascending=False).drop_duplicates("UPRN")
# Get G & F properties
epc_data = epc_data[epc_data["CURRENT_ENERGY_RATING"].isin(["G", "F"])]
# Save as an excel
epc_data.to_excel("Birmingham EPC F & G Properties.xlsx", index=False)

View file

@ -0,0 +1,407 @@
import re
import pandas as pd
from tqdm import tqdm
import Levenshtein
from backend.SearchEpc import SearchEpc
# Average value of a property in the midlands in 2024 was £238,000. Since these are EPC F & G properties, we assume
# £207,000 since they trade at a discount. This is based on the rightmove study where moving from an EPC F/G -> C has a
# +15% impact on valuation and D -> C has a +3% impact on valuation.
# The mode EPC rating is D, so we associate the £238k valuation with an EPC D property
# Therefore value_of_F * 1.15 = value_of_D * 1.03
# Therefore value_of_F = value_of_D * 1.03/1.15 = 238k * (1.03/1.15) = 213165
PROPERTY_VALUE_ESTIMATE = 213_165
def aggregate_matches(matching_lookup, company_ownership, properties):
df = matching_lookup.merge(
company_ownership, how="left", on="Title Number"
).merge(
properties[["UPRN", "LOCAL_AUTHORITY_LABEL"]], how="left", on="UPRN"
)
counts = (
df.groupby(["Company Registration No. (1)", "Proprietor Name (1)", "LOCAL_AUTHORITY_LABEL"])["UPRN"]
.count()
.reset_index(name="number_of_properties")
)
counts = counts.sort_values("number_of_properties", ascending=False)
pivot_counts = counts.pivot_table(
index=["Company Registration No. (1)", "Proprietor Name (1)"], # Rows: companies and proprietors
columns="LOCAL_AUTHORITY_LABEL", # Columns: each local authority
values="number_of_properties", # The counts of properties
fill_value=0 # Fill missing values with 0 (where there are no properties owned)
).reset_index()
total_counts = (
df.groupby(["Company Registration No. (1)", "Proprietor Name (1)"])["UPRN"]
.count()
.reset_index(name="total_number_of_properties")
)
pivot_counts = pivot_counts.merge(
total_counts, how="left", on=["Company Registration No. (1)", "Proprietor Name (1)"]
)
pivot_counts = pivot_counts.sort_values("total_number_of_properties", ascending=False)
pivot_counts["approx_value"] = PROPERTY_VALUE_ESTIMATE * pivot_counts["total_number_of_properties"]
pivot_counts["cumulative_value"] = pivot_counts["approx_value"].cumsum()
return pivot_counts
def find_f_g_properties(paths):
data = []
for path in tqdm(paths):
epc_data = pd.read_csv(path, low_memory=False)
epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
epc_data["UPRN"] = epc_data["UPRN"].astype(int).astype(str)
# Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this
epc_data["LODGEMENT_DATETIME"] = pd.to_datetime(epc_data["LODGEMENT_DATETIME"], format='mixed')
epc_data = epc_data.sort_values("LODGEMENT_DATETIME", ascending=False).drop_duplicates("UPRN")
# Get G & F properties
epc_data = epc_data[epc_data["CURRENT_ENERGY_RATING"].isin(["G", "F"])]
data.append(epc_data)
data = pd.concat(data)
# Save as an excel
data.to_excel("EPC F & G Properties.xlsx", index=False)
def remove_text_in_brackets(address: str) -> str:
"""
Removes any text within parentheses, including the parentheses themselves.
Parameters:
- address (str): The address string to clean.
Returns:
- str: The cleaned address with text in parentheses removed.
"""
# Regex to find and remove content in parentheses
cleaned_address = re.sub(r'\s*\([^)]*\)', '', address)
return cleaned_address
def extract_numeric_part(house_number: str) -> str:
"""
Extracts only the numeric part from a house number that may contain letters.
Parameters:
- house_number (str): The house number string possibly containing letters.
Returns:
- str: The numeric part of the house number.
"""
# Use regular expression to replace all non-digit characters with nothing
numeric_part = re.sub(r'\D', '', house_number)
return numeric_part
def levenstein_match(matching_string, df, address_col):
match_to = df[address_col].tolist()
# Strip out punctuation and spaces
match_to = [re.sub(r'[^\w\s]', '', x) for x in match_to]
match_to = [x.replace(" ", "") for x in match_to]
# Perform matching between full key and match_to
distances = [Levenshtein.distance(matching_string, s) for s in match_to]
best_match_index = distances.index(min(distances))
# We might want to consider a threshold for the distance, however for the momeny,
# we don't consider this for the moment
df = df.iloc[best_match_index:best_match_index + 1]
return df
def extract_range_from_house_number(house_number_range: str):
"""
Detects if the house number includes a numeric range (formatted as 'x-y') and extracts all values within this range.
Non-numeric strings containing hyphens are ignored.
Parameters:
- house_number_range (str): The house number string that might contain a range.
Returns:
- list of str: A list of all numbers within the range if it is a range; otherwise, returns None.
"""
if not house_number_range:
return None
if '-' in house_number_range:
parts = house_number_range.split('-')
if len(parts) == 2 and parts[0].isdigit() and parts[1].isdigit():
# Both parts are numeric, so it's a valid range
start, end = map(int, parts) # Convert parts to integers
return [str(x) for x in range(start, end + 1)]
else:
# Not a valid numeric range
return None
else:
# No hyphen present or not a range
return None
def is_in_range(row, house_no):
""" Check if the house number is within the range provided in the row. """
if row and any(house_no == num for num in row):
return True
return False
def remove_duplicate_matches(matching_lookup, properties, company_ownership):
duplicated_titles = matching_lookup[matching_lookup["Title Number"].duplicated()]["Title Number"].unique()
to_drop = []
for dupe_title in duplicated_titles:
dupe_data = matching_lookup[matching_lookup["Title Number"] == dupe_title].copy()
matched_addresses = dupe_data.merge(
properties[["UPRN", "ADDRESS"]].rename(columns={"ADDRESS": "epc_address"}),
how="left", on="UPRN"
).merge(
company_ownership[["Title Number", "Property Address"]],
how="left", on="Title Number"
)
# We perform levenstein to get the best match
best_match = levenstein_match(
matching_string=matched_addresses["Property Address"].values[0],
df=matched_addresses,
address_col="epc_address"
)
matches_to_drop = matched_addresses[
~matched_addresses["UPRN"].isin(best_match["UPRN"].values)
]
to_drop.append(
matches_to_drop[["UPRN", "Title Number"]].copy()
)
to_drop = pd.concat(to_drop)
if not to_drop.empty:
merged = pd.merge(matching_lookup, to_drop, on=['UPRN', 'Title Number'], how='left', indicator=True)
merged[merged['_merge'] == 'left_only'].drop(columns=['_merge'])
return merged
return matching_lookup
def app():
"""
This script is for scoping property ownership for EPC F & G rated properties in Birmingam, for Goldman Sachs
"""
# paths = [
# "local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv",
# "local_data/all-domestic-certificates/domestic-E08000031-Wolverhampton/certificates.csv",
# "local_data/all-domestic-certificates/domestic-E08000026-Coventry/certificates.csv",
# "local_data/all-domestic-certificates/domestic-E06000016-Leicester/certificates.csv",
# "local_data/all-domestic-certificates/domestic-E06000015-Derby/certificates.csv",
# "local_data/all-domestic-certificates/domestic-E06000021-Stoke-on-Trent/certificates.csv",
# "local_data/all-domestic-certificates/domestic-E06000018-Nottingham/certificates.csv",
# "local_data/all-domestic-certificates/domestic-E07000154-Northampton/certificates.csv",
# "local_data/all-domestic-certificates/domestic-E06000061-North-Northamptonshire/certificates.csv",
# "local_data/all-domestic-certificates/domestic-E06000062-West-Northamptonshire/certificates.csv",
# "local_data/all-domestic-certificates/domestic-E07000152-East-Northamptonshire/certificates.csv",
# "local_data/all-domestic-certificates/domestic-E07000155-South-Northamptonshire/certificates.csv",
# #
# "local_data/all-domestic-certificates/domestic-E08000027-Dudley/certificates.csv",
# "local_data/all-domestic-certificates/domestic-E08000029-Solihull/certificates.csv",
# "local_data/all-domestic-certificates/domestic-E07000234-Bromsgrove/certificates.csv",
# "local_data/all-domestic-certificates/domestic-E08000030-Walsall/certificates.csv",
# "local_data/all-domestic-certificates/domestic-E08000028-Sandwell/certificates.csv",
# "local_data/all-domestic-certificates/domestic-E06000019-Herefordshire-County-of/certificates.csv",
# "local_data/all-domestic-certificates/domestic-E06000020-Telford-and-Wrekin/certificates.csv",
# "local_data/all-domestic-certificates/domestic-E07000218-North-Warwickshire/certificates.csv",
# "local_data/all-domestic-certificates/domestic-E07000222-Warwick/certificates.csv",
# "local_data/all-domestic-certificates/domestic-E07000237-Worcester/certificates.csv",
# # East midlands
# "local_data/all-domestic-certificates/domestic-E07000035-Derbyshire-Dales/certificates.csv",
# "local_data/all-domestic-certificates/domestic-E07000038-North-East-Derbyshire/certificates.csv",
# "local_data/all-domestic-certificates/domestic-E07000039-South-Derbyshire/certificates.csv",
# "local_data/all-domestic-certificates/domestic-E06000012-North-East-Lincolnshire/certificates.csv",
# "local_data/all-domestic-certificates/domestic-E06000013-North-Lincolnshire/certificates.csv",
# "local_data/all-domestic-certificates/domestic-E07000138-Lincoln/certificates.csv",
# "local_data/all-domestic-certificates/domestic-E07000134-North-West-Leicestershire/certificates.csv",
# "local_data/all-domestic-certificates/domestic-E06000017-Rutland/certificates.csv",
# ]
# paths = list(set(paths))
# find_f_g_properties(paths)
properties = pd.read_excel("EPC F & G Properties.xlsx")
company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_04.csv")
company_ownership["is_overseas"] = False
overseas_company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/OCOD_FULL_2024_04 2.csv")
overseas_company_ownership["is_overseas"] = True
company_ownership = pd.concat([company_ownership, overseas_company_ownership])
# FIlter on relevant postcodes
company_ownership = company_ownership[
company_ownership["Postcode"].str.lower().isin(properties["POSTCODE"].str.lower().unique())]
# Now we filter properties the other way around
properties = properties[properties["POSTCODE"].str.lower().isin(company_ownership["Postcode"].str.lower().unique())]
# We end up with 7.4k entires on a postcode match, however we need to now do a direct address match
# Take just private rentals
properties = properties[
properties["TENURE"].isin(["rental (private)", "Rented (private)", "owner-occupied", "Owner-occupied"])
]
# Remove entries where the address begins with the term "land adjoining", or other records that don't reference the
# the property itself
starting_terms = [
"land adjoining", "land on the", "land to the rear of", "land and buildings on the",
"garage adjoining", "car park adjoining", "the land adjoining", "land and buildings adjoining",
"all royal mines"
]
for starting_term in starting_terms:
company_ownership = company_ownership[
~company_ownership["Property Address"].str.lower().str.startswith(starting_term)
]
freehold_matching_lookup = [] # 634
leasehold_matching_lookup = [] # 86
shared_leasehold_match = []
shared_freehold_match = []
for _, address in tqdm(properties.iterrows(), total=len(properties)):
match_type = "exact"
filtered = company_ownership[
company_ownership["Postcode"].str.lower() == address["POSTCODE"].lower()
].copy()
# Remove postcode and remove trailing commas
filtered["house_number"] = (
filtered["Property Address"]
.apply(remove_text_in_brackets)
.apply(SearchEpc.get_house_number)
.str.lower()
.str.replace(",", "")
)
house_no = SearchEpc.get_house_number(address["ADDRESS1"])
if house_no is not None:
house_no = house_no.replace(",", "")
if house_no is None:
# It's hard for us to get a reliable match
# filtered = filtered[filtered["Property Address"].str.contains(address["ADDRESS1"])]
# if filtered.shape[0] > 1:
# raise Exception("No valid - maybe we should do levenstein?")
continue
else:
if house_no not in filtered["house_number"].values:
# If this happens, we check house_number for a x-y range of addresses
filtered["house_number_range"] = filtered["house_number"].apply(extract_range_from_house_number)
# If we have found a house number range, we check if the house number is in the range and if not,
# we drop the row
filtered['is_in_range'] = filtered['house_number_range'].apply(lambda x: is_in_range(x, house_no))
if filtered['is_in_range'].any():
# If house_no is found in any range, keep only rows where it is in range
filtered = filtered[filtered['is_in_range']]
else:
# If house_no is not found in any range, filter out rows where 'house_number_range' is not None
filtered = filtered[filtered['house_number_range'].isnull()]
# Strip out letters from house_no and house_number
house_no = extract_numeric_part(house_no)
filtered["house_number"] = filtered["house_number"].astype(str).apply(extract_numeric_part)
match_type = "approximate"
filtered = filtered[filtered["house_number"] == house_no]
if filtered.empty:
continue
filtered_freehold = filtered[filtered["Tenure"] == "Freehold"]
filtered_leasehold = filtered[filtered["Tenure"] == "Leasehold"]
if filtered_freehold.shape[0] > 1:
matched = filtered_leasehold[["Title Number"]].copy()
matched.insert(0, "UPRN", address["UPRN"])
shared_freehold_match.append(matched)
elif not filtered_freehold.empty:
freehold_matching_lookup.append(
{
"UPRN": address["UPRN"],
"Title Number": filtered_freehold["Title Number"].values[0],
"match_type": match_type,
}
)
if filtered_leasehold.shape[0] > 1:
matched = filtered_leasehold[["Title Number"]].copy()
matched.insert(0, "UPRN", address["UPRN"])
shared_leasehold_match.append(matched)
elif not filtered_leasehold.empty:
leasehold_matching_lookup.append(
{
"UPRN": address["UPRN"],
"Title Number": filtered_leasehold["Title Number"].values[0],
"match_type": match_type,
}
)
freehold_matching_lookup = pd.DataFrame(freehold_matching_lookup)
leasehold_matching_lookup = pd.DataFrame(leasehold_matching_lookup)
shared_leasehold_match = pd.concat(shared_leasehold_match)
# The approximate matches aren't very good
freehold_matching_lookup = freehold_matching_lookup[freehold_matching_lookup["match_type"] == "exact"]
leasehold_matching_lookup = leasehold_matching_lookup[leasehold_matching_lookup["match_type"] == "exact"]
# There are some cases where we have duplicates
freehold_matching_lookup = remove_duplicate_matches(freehold_matching_lookup, properties, company_ownership)
leasehold_matching_lookup = remove_duplicate_matches(leasehold_matching_lookup, properties, company_ownership)
matched_addresses = freehold_matching_lookup.merge(
properties[["UPRN", "ADDRESS"]].rename(columns={"ADDRESS": "epc_address"}),
how="left", on="UPRN"
).merge(
company_ownership[["Title Number", "Property Address"]],
how="left", on="Title Number"
)
# shared_freehold_match = pd.DataFrame(shared_freehold_match)
# Strore these files
freehold_matching_lookup.to_excel("freehold_matching_lookup.xlsx")
leasehold_matching_lookup.to_excel("leasehold_matching_lookup.xlsx")
shared_leasehold_match.to_excel("shared_leasehold_match.xlsx")
# shared_freehold_match.to_excel("shared_freehold_match.xlsx")
freehold_aggregate = aggregate_matches(freehold_matching_lookup, company_ownership, properties)
leasehold_aggregate = aggregate_matches(leasehold_matching_lookup, company_ownership, properties)
combined_aggregate = aggregate_matches(
pd.concat([freehold_matching_lookup, leasehold_matching_lookup]), company_ownership, properties
)
df = pd.concat([freehold_matching_lookup, leasehold_matching_lookup])
investment_20m = combined_aggregate[combined_aggregate["cumulative_value"] <= 20_500_000]
investment_50m = combined_aggregate[combined_aggregate["cumulative_value"] <= 51_000_000]
properties["WALLS_DESCRIPTION"].value_counts(normalize=True)
def company_aggregation():
company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_04.csv")
aggregation = (
company_ownership
.groupby(["Proprietor Name (1)", "Company Registration No. (1)"])
["Property Address"]
.count()
.reset_index(name="Number of Properties")
)
aggregation = aggregation.sort_values("Number of Properties", ascending=False)
aggregation.to_excel("Company ownership aggregation.xlsx")

View file

@ -0,0 +1,98 @@
import os
import pandas as pd
from tqdm import tqdm
from dotenv import load_dotenv
from utils.s3 import read_excel_from_s3
from backend.SearchEpc import SearchEpc
from epc_api.client import EpcClient
from utils.s3 import save_csv_to_s3
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
def app():
"""
This app is satisying an adhoc request to retrieve EPC data for properties owned by Guiness, to help plan the
route march
These properties were provided to us by Ecosurv
:return:
"""
asset_list = read_excel_from_s3(
bucket_name="retrofit-datalake-dev",
file_key="customers/guiness/TGP CW Properties PV.xlsx",
header_row=0
)
epc_data = []
for _, guiness_property in tqdm(asset_list.iterrows(), total=len(asset_list)):
searcher = SearchEpc(
address1=str(guiness_property["Address"]),
postcode=guiness_property["POSTCODES"],
auth_token=EPC_AUTH_TOKEN,
os_api_key="",
property_type=None,
fast=True
)
# Force the skipping of estimating the EPC
searcher.ordnance_survey_client.property_type = None
searcher.ordnance_survey_client.built_form = None
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
continue
epc = {
"asset_list_address": guiness_property["Address"],
"asset_list_postcode": guiness_property["POSTCODES"],
**searcher.newest_epc.copy()
}
epc_data.append(epc)
epc_df = pd.DataFrame(epc_data)
# Retrieve just the data we need
epc_df = epc_df[
[
"asset_list_address",
"asset_list_postcode",
"uprn",
"property-type",
"built-form",
"inspection-date",
"current-energy-rating",
"current-energy-efficiency",
"roof-description",
"walls-description",
"transaction-type"
]
]
asset_list = asset_list.merge(
epc_df, how="left", left_on=["Address", "POSTCODES"], right_on=["asset_list_address", "asset_list_postcode"]
)
# De-dupe on the address and postcode, since 137 Badger Avenue was duplicated
asset_list = asset_list.drop_duplicates(subset=["Address", "POSTCODES"])
asset_list = asset_list.drop(columns=["asset_list_address", "asset_list_postcode"])
# Rename the columns
asset_list = asset_list.rename(columns={
"property-type": "Property Type",
"built-form": "Archetype",
"inspection-date": "Last EPC Inspection Date",
"current-energy-rating": "Last survey EPC Rating",
"current-energy-efficiency": "Last survey SAP Score",
"roof-description": "Roof Construction",
"walls-description": "Wall Construction",
"transaction-type": "Last EPC Reason"
})
# Store as an excel
filename = "Guiness EPC data.xlsx"
asset_list.to_excel(filename, index=False)

View file

@ -0,0 +1,157 @@
import pandas as pd
from utils.s3 import read_excel_from_s3
from utils.s3 import save_csv_to_s3
USER_ID = 8
PORTFOLIO_ID = 70
council_tax_bands = [
{'address': '8 Corporation Road', 'postcode': 'DY2 7PX', 'band': 'A'},
{'address': '21 Wells Road', 'postcode': 'DY5 3TB', 'band': 'A'},
{'address': '27 Milton Road', 'postcode': 'WV14 8HZ', 'band': 'A'},
{'address': '195 Ashenhurst Road', 'postcode': 'DY1 2JB', 'band': 'A'},
{'address': '53 Bromley', 'postcode': 'DY5 4PJ', 'band': 'A'},
{'address': '91 Osprey Drive', 'postcode': 'DY1 2JS', 'band': 'B'},
{'address': '47 Fairfield Road', 'postcode': 'DY8 5UJ', 'band': 'B'},
{'address': '150 Huntingtree Road', 'postcode': 'B63 4HP', 'band': 'C'},
{'address': '6 Beech Road', 'postcode': 'DY1 4BP', 'band': 'A'},
{'address': '5 Oaklands', 'postcode': 'B62 0JA', 'band': 'A'},
]
council_tax_bands = pd.DataFrame(council_tax_bands)
# This is information we need to override on the EPC itself, for instance if a new survey has been conducted and
# that has not reached the API
# For 53 Bromley, the non-invasives found the walls to be partially filled
patches = [
{
'address': '6 Beech Road', 'postcode': 'DY1 4BP',
'walls-description': 'Cavity wall, filled cavity',
'walls-energy-eff': 'Good',
'roof-description': 'Pitched, 12 mm loft insulation',
'roof-energy-eff': 'Very Poor',
'windows-description': 'Fully double glazed',
'windows-energy-eff': 'Good',
'mainheat-description': 'Room heaters, electric',
'mainheat-energy-eff': 'Very Poor',
'mainheatcont-description': 'Appliance thermostats',
'mainheatc-energy-eff': 'Good',
'lighting-description': 'Low energy lighting in 25% of fixed outlets',
'lighting-energy-eff': 'Good',
'floor-description': 'Solid, no insulation (assumed)',
'secondheat-description': 'None',
'current-energy-efficiency': '32',
'energy-consumption-current': '491',
'co2-emissions-current': '5.0',
'potential-energy-efficiency': '87'
},
{
'address': '53 Bromley', 'postcode': 'DY5 4PJ',
'walls-description': 'Cavity wall, partial insulation (assumed)',
},
]
# This is information that is found as a result of the non-invasives, that mean that certain measures
# have been installed already. To reflect this in the front end, it is included in the recommendation, however
# the cost is removed and instead, a message is presented saying that the measure is already installed.
already_installed = [
{
'address': '5 Oaklands',
'postcode': 'B62 0JA',
"already_installed": ["windows_glazing"]
}
]
non_invasive_recommendations = [
{'address': '8 Corporation Road', 'postcode': 'DY2 7PX', 'recommendations': []},
{'address': '21 Wells Road', 'postcode': 'DY5 3TB', 'recommendations': ['cavity_extract_and_refill']},
{'address': '27 Milton Road', 'postcode': 'WV14 8HZ', 'recommendations': ['cavity_extract_and_refill']},
{'address': '195 Ashenhurst Road', 'postcode': 'DY1 2JB', 'recommendations': ['cavity_extract_and_refill']},
{'address': '53 Bromley', 'postcode': 'DY5 4PJ', 'recommendations': ['cavity_surveyed_as_filled_is_partial']},
{'address': '91 Osprey Drive', 'postcode': 'DY1 2JS', 'recommendations': ['cavity_extract_and_refill']},
{'address': '47 Fairfield Road', 'postcode': 'DY8 5UJ', 'recommendations': ['cavity_extract_and_refill']},
{'address': '150 Huntingtree Road', 'postcode': 'B63 4HP', 'recommendations': ['cavity_extract_and_refill']},
{'address': '6 Beech Road', 'postcode': 'DY1 4BP', 'recommendations': []},
{'address': '5 Oaklands', 'postcode': 'B62 0JA', 'recommendations': ['cavity_extract_and_refill']},
]
def app():
raw_asset_list = read_excel_from_s3(
bucket_name="retrofit-datalake-dev",
file_key="customers/Immo/IMMO Sample Assets_Dudley.xlsx",
header_row=0
)
raw_asset_list = raw_asset_list.drop(columns=["Unnamed: 0"])
# Extract address and postcode
raw_asset_list["address"] = raw_asset_list["Full Address"].str.split(",").str[0]
raw_asset_list["postcode"] = raw_asset_list["Full Address"].str.split(",").str[-1].str.strip()
asset_list = raw_asset_list.merge(council_tax_bands, how="left", on=["address", "postcode"])
# We're provided with number of bathrooms and number of bedrooms.
asset_list = asset_list.rename(
columns={
"No. of Beds": "n_bedrooms",
"No. of WC's": "n_bathrooms"
}
)
# Store the asset list in s3
filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
save_csv_to_s3(
dataframe=asset_list,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
# Store overrides in s3
already_installed_filename = f"{USER_ID}/{PORTFOLIO_ID}/already_installed.json"
save_csv_to_s3(
dataframe=pd.DataFrame(already_installed),
bucket_name="retrofit-plan-inputs-dev",
file_name=already_installed_filename
)
# Store patches in s3
patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.json"
save_csv_to_s3(
dataframe=pd.DataFrame(patches),
bucket_name="retrofit-plan-inputs-dev",
file_name=patches_filename
)
# Store non-invasive recommendations in S3
non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.json"
save_csv_to_s3(
dataframe=pd.DataFrame(non_invasive_recommendations),
bucket_name="retrofit-plan-inputs-dev",
file_name=non_invasive_recommendations_filename
)
# EPC C portoflio
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increase EPC",
"goal_value": "C",
"trigger_file_path": filename,
"already_installed_file_path": already_installed_filename,
"patches_file_path": patches_filename,
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"budget": None,
}
print(body)
# EPC B portoflio
body = {
"portfolio_id": str(PORTFOLIO_ID + 1),
"housing_type": "Private",
"goal": "Increase EPC",
"goal_value": "B",
"trigger_file_path": filename,
"already_installed_file_path": already_installed_filename,
"patches_file_path": patches_filename,
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"budget": None,
}
print(body)

View file

@ -0,0 +1,152 @@
import pandas as pd
from utils.s3 import read_excel_from_s3
from utils.s3 import save_csv_to_s3
USER_ID = 8
PORTFOLIO_ID = 72
# For
patches = [
{
'address': '116 Parkes Hall Road',
'postcode': 'DY1 3RJ',
'uprn': '90046817',
'walls-description': 'Cavity wall, filled cavity',
'walls-energy-eff': 'Average',
'roof-description': 'Pitched, 270 mm loft insulation',
'roof-energy-eff': 'Good',
'windows-description': 'Fully double glazed',
'windows-energy-eff': 'Good',
'mainheat-description': 'Boiler and radiators, mains gas',
'mainheat-energy-eff': 'Good',
'mainheatcont-description': 'Programmer, room thermostat and TRVs',
'mainheatc-energy-eff': 'Good',
'lighting-description': 'Low energy lighting in 27% of fixed outlets',
'lighting-energy-eff': 'Average',
'floor-description': 'Solid, no insulation (assumed)',
'secondheat-description': 'None',
'current-energy-efficiency': '73',
'current-energy-rating': 'C',
'energy-consumption-current': '184',
'co2-emissions-current': '2.4',
'potential-energy-efficiency': '88',
'total-floor-area': '73',
'construction-age-band': 'England and Wales: 1930-1949',
'property-type': 'House',
'built-form': 'Mid-Terrace',
}
]
# This is information that is found as a result of the non-invasives, that mean that certain measures
# have been installed already. To reflect this in the front end, it is included in the recommendation, however
# the cost is removed and instead, a message is presented saying that the measure is already installed.
already_installed = [
{
'address': '28 Sangwin Road', 'postcode': 'WV14 9EQ', "already_installed": ["loft_insulation"]
},
{
'address': '51 Hillwood Road', 'postcode': 'B62 8NQ', "already_installed": ["loft_insulation"]
},
{
'address': '47 Watsons Close', 'postcode': 'DY2 7HL', "already_installed": ["loft_insulation"]
},
{
'address': '44 Hatfield Road',
'postcode': 'DY9 7LW',
"already_installed": ["loft_insulation", "cavity_wall_insulation"]
}
]
non_invasive_recommendations = []
def app():
raw_asset_list = read_excel_from_s3(
bucket_name="retrofit-datalake-dev",
file_key="customers/Immo/Dudley Asset List - Hestia - pilot2.xlsx",
header_row=0
)
raw_asset_list = raw_asset_list[raw_asset_list["in_pilot"]].copy()
# Extract address and postcode
raw_asset_list["address"] = raw_asset_list["Full Address"].str.split(",").str[0]
raw_asset_list["postcode"] = raw_asset_list["Full Address"].str.split(",").str[-1].str.strip()
# We're provided with number of bathrooms and number of bedrooms.
# THe UPRNs are not the official ones
asset_list = raw_asset_list.rename(
columns={
"No. of Beds": "n_bedrooms",
"No. of WC's": "n_bathrooms",
'Property Type': 'property_type',
'Architype': 'built_form'
}
)
# Remap the values
asset_list["built_form"] = asset_list["built_form"].map({
"SEMI DETACHED": "Semi-Detached",
"MID TERRACE": "Mid-Terrace",
"END TERRACE": "End-Terrace",
})
# Store the asset list in s3
filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
save_csv_to_s3(
dataframe=asset_list,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
# Store overrides in s3
already_installed_filename = f"{USER_ID}/{PORTFOLIO_ID}/already_installed.json"
save_csv_to_s3(
dataframe=pd.DataFrame(already_installed),
bucket_name="retrofit-plan-inputs-dev",
file_name=already_installed_filename
)
# Store patches in s3
patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.json"
save_csv_to_s3(
dataframe=pd.DataFrame(patches),
bucket_name="retrofit-plan-inputs-dev",
file_name=patches_filename
)
# Store non-invasive recommendations in S3
non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.json"
save_csv_to_s3(
dataframe=pd.DataFrame(non_invasive_recommendations),
bucket_name="retrofit-plan-inputs-dev",
file_name=non_invasive_recommendations_filename
)
# EPC C portoflio
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increase EPC",
"goal_value": "C",
"trigger_file_path": filename,
"already_installed_file_path": already_installed_filename,
"patches_file_path": patches_filename,
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"budget": None,
}
print(body)
# EPC B portoflio
body = {
"portfolio_id": str(PORTFOLIO_ID + 1),
"housing_type": "Private",
"goal": "Increase EPC",
"goal_value": "B",
"trigger_file_path": filename,
"already_installed_file_path": already_installed_filename,
"patches_file_path": patches_filename,
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"budget": None,
}
print(body)

View file

@ -0,0 +1,210 @@
# import extract_msg
from datetime import datetime
from sqlalchemy.orm import sessionmaker
from backend.app.db.connection import db_engine
from backend.app.db.functions.non_intrusive_surveys import upload_non_intrusive_survey_notes
def parse_msg_body(text):
# Split the text into lines
lines = text.split('\r\n')
# Dictionary to hold the parsed data
data = {}
# Process each line
for line in lines:
# Remove all asterisks and extra whitespace
clean_line = line.replace('*', '').strip()
if clean_line: # Ensure the line is not empty after cleaning
# Attempt to split clean '=' if present
if '=' in clean_line:
clean_line = clean_line.replace(' = ', ': ')
# Use line content as a key with a default value indicating presence
# Generate a unique key for lines without '='
data[f"Info{len(data) + 1}"] = clean_line
return data
def app():
"""
This code retrieves the results of the non-invasive surveys, to be stored in S3
:return:
"""
# filepath = ("/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/5 Oaklands B62 "
# "0JA/Immo - 5 Oaklands Halesowen B62 0JA.msg")
# filepath = ("/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/6 Beech Rd DY1 "
# "4BP/IMMO - 6 Beech Road Dudley DY1 4BP.msg")
# filepath = (
# "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/8 Corporation Rd DY2 "
# "7PX/IMMO - 8 Corporation Road Dudley DY2 7PX.msg"
# )
# filepath = (
# "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/21 Wells Rd DY5 3TB/"
# "IMMO - 21 Wells Road Brierley Hill DY5 3TB.msg"
# )
# filepath = (
# "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/47 Fairfield Rd DY8 "
# "5UJ/IMMO - 47 Fairfield Road Wordsley Stourbridge DY8 5UJ.msg"
# )
# filepath = (
# "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/91 Osprey Drive DY1 "
# "2JS/IMMO - 91 Osprey Drive Dudley DY1 2JS.msg"
# )
# filepath = (
# "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/195 Ashenhurst Rd DY1 "
# "2JB/IMMO - 195 Ashenhurst Road Dudley DY1 2JB.msg"
# )
# filepath = (
# "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/27 Milton Rd DY1 2JB/IMMO "
# "- 27 Milton Road Coseley Bilston WV14 8HZ.msg"
# )
#
# with extract_msg.Message(filepath) as msg:
# body = msg.body
#
# from pprint import pprint
# pprint(parse_msg_body(body))
# We manually create the non-invasive notes for the pilot
non_invasive_notes = [
{
'uprn': 90028499,
# 'address': '5 Oaklands',
# 'postcode': 'B62 0JA',
'surveyor': 'Carl Fitzgerald - The Warmfront Team',
'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
'CIGA check and extracting the cavity, replacing with bead insulation. '
'There is a shared alleyway with the neighbour, that is a solid brick wall.',
'Wall Render': 'Partial render between top of ground floor window and bottom of 1st floor window',
'Existing solar PV': 'No existing solar',
'Orientation': 'Front house direction: North East, Back house direction: South West',
'Access to mains?': 'Property has access to the mains',
},
{
'uprn': 90055152,
# 'address': '6 Beech Road',
# 'postcode': 'DY1 4BP',
'surveyor': 'Carl Fitzgerald - The Warmfront Team',
'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
'Wall Insulation': '1st floor is solid brick with external wall insulation. 2nd floor is cavity, '
'retro drilled, containing loose fibre insulation. Consider getting a '
'CIGA check and extracting the cavity, replacing with bead insulation.',
'Wall Render': None,
'Existing solar PV': 'No existing solar',
'Orientation': 'Side house direction: North East',
'Access to mains?': 'Property has access to the mains',
},
{
'uprn': 90070461,
# 'address': '8 Corporation Road',
# 'postcode': 'DY2 7PX',
'surveyor': 'Carl Fitzgerald - The Warmfront Team',
'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
'Wall Insulation': "External wall insulation",
'Wall Render': "Render finish throughout",
'Existing solar PV': 'No existing solar',
'Orientation': 'Front house direction: North East, Back house direction: South West',
'Access to mains?': None,
},
{
'uprn': 90022227,
# 'address': '21 Wells Road',
# 'postcode': 'DY5 3TB',
'surveyor': 'Carl Fitzgerald - The Warmfront Team',
'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
'CIGA check and extracting the cavity, replacing with bead insulation.',
'Wall Render': None,
'Existing solar PV': 'No existing solar',
'Orientation': 'Front house direction: East, Back house direction: West',
'Access to mains?': 'Property has access to the mains',
},
{
'uprn': 90077535,
# 'address': '47 Fairfield Road',
# 'postcode': 'DY8 5UJ',
'surveyor': 'Carl Fitzgerald - The Warmfront Team',
'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
'CIGA check and extracting the cavity, replacing with bead insulation.',
'Wall Render': None,
'Existing solar PV': 'No existing solar',
'Orientation': 'Front house direction: East, Back house direction: West',
'Access to mains?': 'Property has access to the mains',
},
{
'uprn': 90060989,
# 'address': '53 Bromley',
# 'postcode': 'DY5 4PJ',
'surveyor': 'Carl Fitzgerald - The Warmfront Team',
'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
'Wall Insulation': "Filled at build, partially filled - celotex/king board, 50mm cavity remaining - "
"recommends a cavity wall fill",
"Roof": "Hipped roof",
'Existing solar PV': 'No existing solar',
'Orientation': "Front house direction: North, Back house direction: South, Side house direction: West",
'Access to mains?': 'Property has access to the mains',
},
{
'uprn': 90048026,
# 'address': '91 Osprey Drive',
# 'postcode': 'DY1 2JS',
'surveyor': 'Carl Fitzgerald - The Warmfront Team',
'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
'CIGA check and extracting the cavity, replacing with bead insulation.',
'Wall Render': 'Tile hung front and rear of property',
'Existing solar PV': 'No existing solar',
'Orientation': 'Side house direction: East',
'Access to mains?': 'Property has access to the mains',
},
{
'uprn': 90093693,
# 'address': '150 Huntingtree Road',
# 'postcode': 'B63 4HP',
'surveyor': 'Carl Fitzgerald - The Warmfront Team',
'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
'Heating': 'Electric (storage heaters)',
'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
'CIGA check and extracting the cavity, replacing with bead insulation.',
"Roof": "Hipped roof",
'Existing solar PV': 'No existing solar',
'Orientation': "Front house direction: North West, Back house direction: South East, Side house direction: "
"North East",
},
{
'uprn': 90051858,
# 'address': '195 Ashenhurst Road',
# 'postcode': 'DY1 2JB',
'surveyor': 'Carl Fitzgerald - The Warmfront Team',
'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
'CIGA check and extracting the cavity, replacing with bead insulation.',
'Wall Render': "Solid render front and rear of property",
'Existing solar PV': 'No existing solar',
'Orientation': 'Front house direction: South, Back house direction: North',
'Access to mains?': 'Property has access to the mains',
},
{
'uprn': 90106884,
# 'address': '27 Milton Road',
# 'postcode': 'WV14 8HZ',
'surveyor': 'Carl Fitzgerald - The Warmfront Team',
'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
'CIGA check and extracting the cavity, replacing with bead insulation.',
'Wall Render': "Solid render front and rear of property",
'Existing solar PV': 'No existing solar',
'Orientation': 'Front house direction: South East, Back house direction: North West',
'Access to mains?': 'Property has access to the mains',
},
]
session = sessionmaker(bind=db_engine)()
upload_non_intrusive_survey_notes(session=session, non_invasive_notes=non_invasive_notes, batch_size=500)

View file

@ -0,0 +1 @@
extract-msg

View file

@ -0,0 +1,134 @@
import os
import pandas as pd
from tqdm import tqdm
from dotenv import load_dotenv
from utils.s3 import read_excel_from_s3
from backend.SearchEpc import SearchEpc
from epc_api.client import EpcClient
from utils.s3 import save_csv_to_s3
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
def route_march_may_2024():
"""
This code pulls supplementary data for a route march that is expected to happen in May 2024. This code
was authored on the 30th April 2024.
"""
asset_list = read_excel_from_s3(
bucket_name="retrofit-datalake-dev",
file_key="customers/Livewest/Livewest proposed route march Apr-May 2024.xlsx",
header_row=0
)
epc_data = []
for _, unit in tqdm(asset_list.iterrows(), total=len(asset_list)):
lst = [unit["NO"], unit["ADDRESS 1"], unit["ADDRESS 2"], unit["ADDRESS 3"], unit["POSTCODE"]]
lst = [str(x).strip() for x in lst if not pd.isnull(x)]
full_address = ", ".join(lst)
searcher = SearchEpc(
address1=str(unit["NO"]),
postcode=unit["POSTCODE"],
auth_token=EPC_AUTH_TOKEN,
os_api_key="",
property_type=None,
fast=True,
full_address=full_address
)
# Force the skipping of estimating the EPC
searcher.ordnance_survey_client.property_type = None
searcher.ordnance_survey_client.built_form = None
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
# We try with a different address 1
add1 = str(unit["NO"]).lower()
add1 = (
add1
.replace("flat", "")
.replace("ft", "")
.replace("t", "").strip()
)
searcher = SearchEpc(
address1=add1,
postcode=unit["POSTCODE"],
auth_token=EPC_AUTH_TOKEN,
os_api_key="",
property_type=None,
fast=True,
full_address=full_address
)
# Force the skipping of estimating the EPC
searcher.ordnance_survey_client.property_type = None
searcher.ordnance_survey_client.built_form = None
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
continue
epc = {
"asset_list_house_no": unit["NO"],
"asset_list_address1": unit["ADDRESS 1"],
"asset_list_postcode": unit["POSTCODE"],
**searcher.newest_epc.copy()
}
epc_data.append(epc)
epc_df = pd.DataFrame(epc_data)
#
# Retrieve just the data we need
epc_df = epc_df[
[
"asset_list_house_no",
"asset_list_address1",
"asset_list_postcode",
"uprn",
"address",
"property-type",
"built-form",
"inspection-date",
"current-energy-rating",
"current-energy-efficiency",
"roof-description",
"walls-description",
"transaction-type"
]
].rename(columns={"address": "Matched EPC Address"})
asset_list = asset_list.merge(
epc_df,
how="left",
left_on=["NO", "ADDRESS 1", "POSTCODE"],
right_on=["asset_list_house_no", "asset_list_address1", "asset_list_postcode"]
)
asset_list = asset_list.drop_duplicates(subset=["NO", "ADDRESS 1", "POSTCODE"])
asset_list = asset_list.drop(columns=["asset_list_house_no", "asset_list_address1", "asset_list_postcode"])
# Rename the columns
asset_list = asset_list.rename(columns={
"property-type": "Property Type",
"built-form": "Archetype",
"inspection-date": "Last EPC Inspection Date",
"current-energy-rating": "Last survey EPC Rating",
"current-energy-efficiency": "Last survey SAP Score",
"roof-description": "Roof Construction",
"walls-description": "Wall Construction",
"transaction-type": "Last EPC Reason"
})
# Store as an excel
filename = "Livewest EPC data.xlsx"
asset_list.to_excel(filename, index=False)

View file

@ -0,0 +1,137 @@
import os
import pandas as pd
from tqdm import tqdm
from dotenv import load_dotenv
from utils.s3 import read_excel_from_s3
from backend.SearchEpc import SearchEpc
from epc_api.client import EpcClient
from utils.s3 import save_csv_to_s3
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
def app():
"""
This app is satisying an adhoc request to retrieve EPC data for properties owned by Guiness, to help plan the
route march
These properties were provided to us by Ecosurv
:return:
"""
asset_list = read_excel_from_s3(
bucket_name="retrofit-datalake-dev",
file_key="customers/Places For People/PFP ROUTE MARCH PHASE 1.xlsx",
header_row=1
)
epc_data = []
for _, pfp_property in tqdm(asset_list.iterrows(), total=len(asset_list)):
lst = [
pfp_property["ADDRESS"],
pfp_property["ADDRESS.1"],
pfp_property["ADDRESS.2"],
pfp_property["POSTCODE"]
]
lst = [str(x).strip() for x in lst if not pd.isnull(x)]
full_address = ", ".join(lst)
searcher = SearchEpc(
address1=str(pfp_property["ADDRESS"]),
postcode=pfp_property["POSTCODE"],
auth_token=EPC_AUTH_TOKEN,
os_api_key="",
property_type=None,
fast=True,
full_address=full_address
)
# Force the skipping of estimating the EPC
searcher.ordnance_survey_client.property_type = None
searcher.ordnance_survey_client.built_form = None
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
# We try with a different address 1
add1 = str(pfp_property["ADDRESS"]).lower()
add1 = add1.replace("ft", "").replace("t", "").strip()
searcher = SearchEpc(
address1=add1,
postcode=pfp_property["POSTCODE"],
auth_token=EPC_AUTH_TOKEN,
os_api_key="",
property_type=None,
fast=True,
full_address=full_address
)
# Force the skipping of estimating the EPC
searcher.ordnance_survey_client.property_type = None
searcher.ordnance_survey_client.built_form = None
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
continue
epc = {
"asset_list_address": pfp_property["ADDRESS"],
"asset_list_address1": pfp_property["ADDRESS.1"],
"asset_list_postcode": pfp_property["POSTCODE"],
**searcher.newest_epc.copy()
}
epc_data.append(epc)
epc_df = pd.DataFrame(epc_data)
# 702
# Retrieve just the data we need
epc_df = epc_df[
[
"asset_list_address",
"asset_list_address1",
"asset_list_postcode",
"uprn",
"address",
"property-type",
"built-form",
"inspection-date",
"current-energy-rating",
"current-energy-efficiency",
"roof-description",
"walls-description",
"transaction-type"
]
].rename(columns={"address": "Matched EPC Address"})
asset_list = asset_list.merge(
epc_df,
how="left",
left_on=["ADDRESS", "ADDRESS.1", "POSTCODE"],
right_on=["asset_list_address", "asset_list_address1", "asset_list_postcode"]
)
# De-dupe on the address and postcode, since 137 Badger Avenue was duplicated
asset_list = asset_list.drop_duplicates(subset=["ADDRESS", "ADDRESS.1", "POSTCODE"])
asset_list = asset_list.drop(columns=["asset_list_address", "asset_list_address1", "asset_list_postcode"])
# Rename the columns
asset_list = asset_list.rename(columns={
"property-type": "Property Type",
"built-form": "Archetype",
"inspection-date": "Last EPC Inspection Date",
"current-energy-rating": "Last survey EPC Rating",
"current-energy-efficiency": "Last survey SAP Score",
"roof-description": "Roof Construction",
"walls-description": "Wall Construction",
"transaction-type": "Last EPC Reason"
})
# Store as an excel
filename = "Places For People EPC data.xlsx"
asset_list.to_excel(filename, index=False)

View file

@ -246,7 +246,7 @@ def create_powerpoint(data, save_location):
prs.save(save_location)
def create_recommendations_summary(recommendations_df, properties_df, sap_target):
def create_recommendations_summary(recommendations_df, properties_df, property_details_df, sap_target):
# Aggregate the impact of the recommendations
# We want:
# Total number of sap points
@ -259,13 +259,15 @@ def create_recommendations_summary(recommendations_df, properties_df, sap_target
total_valuation_impact=("property_valuation_increase", "sum"),
total_bill_savings=("energy_cost_savings", "sum"),
total_cost=("estimated_cost", "sum"),
total_carbon=("co2_equivalent_savings", "sum")
total_carbon=("co2_equivalent_savings", "sum"),
adjusted_heat_demand=("adjusted_heat_demand", "sum")
).reset_index()
# Merge on current sap points
# Merge on current sap points, current CO2, current adjusted_heat_demand, current annual bill
recommendations_summary = recommendations_summary.merge(
properties_df[["id", "uprn", "current_sap_points"]].rename(columns={"id": "property_id"}), on="property_id",
how="left"
)
recommendations_summary["expected_sap_points"] = (
recommendations_summary["current_sap_points"] + recommendations_summary["total_sap_points"]
)
@ -274,4 +276,18 @@ def create_recommendations_summary(recommendations_df, properties_df, sap_target
)
recommendations_summary["sap_difference"] = sap_target - recommendations_summary["expected_sap_points"]
if property_details_df is not None:
recommendations_summary = recommendations_summary.merge(
property_details_df[["uprn", "co2_emissions", "adjusted_energy_consumption", "energy_bill"]].rename(
columns={
"id": "property_id",
"co2_emissions": "current_co2",
"adjusted_energy_consumption": "current_energy",
"energy_bill": "current_energy_bill"
}
),
on="uprn",
how="left"
)
return recommendations_summary

View file

View file

@ -0,0 +1,56 @@
import pandas as pd
from utils.s3 import read_excel_from_s3
from utils.s3 import save_csv_to_s3
PORTFOLIO_ID = 77
USER_ID = 8
patches = [
{
"address": "79 Perryn Road",
"postcode": "W3 7LT",
"roof-description": "Pitched, no insulation (assumed)"
}
]
def app():
asset_list = [
{
'uprn': 12103117,
"address": "79 Perryn Road",
"postcode": "W3 7LT",
},
]
asset_list = pd.DataFrame(asset_list)
# Store the asset list in s3
filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
save_csv_to_s3(
dataframe=asset_list,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
# Store patches in s3
patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.json"
save_csv_to_s3(
dataframe=pd.DataFrame(patches),
bucket_name="retrofit-plan-inputs-dev",
file_name=patches_filename
)
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increase EPC",
"goal_value": "B",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": patches_filename,
"non_invasive_recommendations_file_path": "",
"budget": None,
}
print(body)

View file

@ -145,6 +145,7 @@ class Eligibility:
"reason": None,
"thickness_classification": thickness_classification
}
return
# Insulation is already thick enough
self.loft = {
@ -164,8 +165,10 @@ class Eligibility:
"""
is_cavity = self.walls["is_cavity_wall"]
is_empty = (not self.walls["is_filled_cavity"]) or (
is_empty = (not self.walls["is_filled_cavity"])
is_as_built = (
self.walls["is_as_built"] and self.walls["insulation_thickness"] not in ["average", "above average"]
and self.walls["is_assumed"]
)
is_partial_filled = "partial" in self.walls["clean_description"].lower()
# We look for potentially under performing cavities - anything that is assumed, as built and insulated
@ -175,6 +178,7 @@ class Eligibility:
is_unfilled_cavity = is_cavity and (is_empty and not is_partial_filled)
is_partial_filled_cavity = is_cavity and is_partial_filled
is_assumed_filled_cavity = is_cavity and is_as_built
is_underperforming_cavity = is_cavity and is_underperforming
# Check if it has internal or external wall insulation
@ -195,6 +199,13 @@ class Eligibility:
}
return
if is_assumed_filled_cavity:
self.cavity = {
"suitability": True,
"type": "as built assumed",
}
return
if is_partial_filled_cavity:
self.cavity = {
"suitability": True,
@ -340,13 +351,35 @@ class Eligibility:
# Check if the property is suitable for cavity wall
self.cavity_insulation()
self.loft_insulation()
self.gbis_warmfront = (self.cavity["suitability"]) and (
int(self.epc["current-energy-efficiency"]) <= 68
)
current_sap = int(self.epc["current-energy-efficiency"])
# We have a strict suitability check and a non-strict check
def check_eco4_warmfront(self, post_retrofit_sap=None):
# Perfect strictness
if (self.cavity["type"] == "empty") and (current_sap < 69):
self.gbis_warmfront = {
"eligible": True,
"strict": True,
"message": "Perfect suitability",
}
return
# Near perfect
if self.cavity["suitability"] and (current_sap < 69):
self.gbis_warmfront = {
"eligible": True,
"strict": True,
"message": "Near perfect suitability",
}
return
self.gbis_warmfront = {
"eligible": False,
"strict": False,
"message": "All conditions fail",
}
def check_eco4_warmfront(self):
"""
This funciton will check if the property is eligible for funding under the ECO4 scheme
@ -378,49 +411,121 @@ class Eligibility:
self.cavity_insulation()
self.loft_insulation()
# make sure conditions 2 and 3 are true
is_eligible = self.cavity["suitability"] & self.loft["suitability"]
# We put in a placeholder when the roof is not a loft
if self.loft["reason"] == "roof not loft":
self.loft["thickness"] = 999
if current_sap >= 69:
# Case 1: No conditions meet
if not self.cavity["suitability"] and (self.loft["thickness"] > 100) and current_sap >= 55:
self.eco4_warmfront = {
"eligible": False,
"message": "sap too high",
"strict": False,
"message": "All conditions fail",
"cavity_type": self.cavity["type"],
"loft_type": self.loft["thickness_classification"]
}
return
if post_retrofit_sap is None:
if current_sap >= 55:
message = "Possibly eligible but property currently EPC D"
else:
message = "subject to post retrofit sap" if is_eligible else "not eligible"
# Update the message to flag properties that failed just because of a full cavity.
# We need to double check that the wall is a cavity, that the loft is suitable and that the
# sap is within reason
# We can then estimate the age of the cavity fill
if not is_eligible and (current_sap < 69) and self.loft["suitability"] and self.walls["is_cavity_wall"]:
message = "Failed due to full cavity - check cavity age"
# Case 2 - perfect match
if (self.cavity["type"] == "empty") and (self.loft["thickness"] <= 100) and (current_sap < 55):
self.eco4_warmfront = {
"eligible": is_eligible,
"message": message,
"eligible": True,
"strict": True,
"message": "Perfect suitability",
"cavity_type": self.cavity["type"],
"loft_type": self.loft["thickness_classification"]
}
return
is_eligible = is_eligible & (post_retrofit_sap >= 69)
# Case 2.5 - near perfect match - but we would not recommend this using the model
if self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap < 55):
self.eco4_warmfront = {
"eligible": True,
"strict": True,
"message": "Near perfect suitability",
"cavity_type": self.cavity["type"],
"loft_type": self.loft["thickness_classification"]
}
return
self.eco4_warmfront = {
"eligible": is_eligible,
"message": None,
"cavity_type": self.cavity["type"],
"loft_type": self.loft["thickness_classification"]
}
return
# Case 3 - cavity is suitable, loft is within 150mm, sap is good
if self.cavity["suitability"] and (self.loft["thickness"] <= 150) and (current_sap < 55):
self.eco4_warmfront = {
"eligible": True,
"strict": False,
"message": "Meets cavity, loft borderline, meets sap",
"cavity_type": self.cavity["type"],
"loft_type": self.loft["thickness_classification"]
}
return
# Case 3 - cavity is suitable, loft is not, sap is good
if self.cavity["suitability"] and (self.loft["thickness"] > 150) and (current_sap < 55):
self.eco4_warmfront = {
"eligible": True,
"strict": False,
"message": "Meets cavity and sap",
"cavity_type": self.cavity["type"],
"loft_type": self.loft["thickness_classification"]
}
return
# Case 4 - cavity is not suitable, loft is, sap is not - we say this is not elifible
if not self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap < 55):
self.eco4_warmfront = {
"eligible": False,
"strict": False,
"message": "failed fabric check",
"cavity_type": self.cavity["type"],
"loft_type": self.loft["thickness_classification"]
}
return
# Case 5 - cavity and loft suitable, sap too high
if self.cavity["suitability"] and (self.loft["thickness"] <= 150) and (current_sap >= 55):
self.eco4_warmfront = {
"eligible": True,
"strict": False,
"message": "Meets fabric, fails SAP check",
"cavity_type": self.cavity["type"],
"loft_type": self.loft["thickness_classification"]
}
return
# Case 6 - meets just cavity
if self.cavity["suitability"] and (self.loft["thickness"] > 100) and (current_sap >= 55):
self.eco4_warmfront = {
"eligible": True,
"strict": False,
"message": "Meets just cavity",
"cavity_type": self.cavity["type"],
"loft_type": self.loft["thickness_classification"]
}
return
# Case 7 - fails cavity, loft but meets sap
if not self.cavity["suitability"] and (self.loft["thickness"] > 100) and (current_sap < 55):
self.eco4_warmfront = {
"eligible": False,
"strict": False,
"message": "Fails cavity and loft, meets SAP",
"cavity_type": self.cavity["type"],
"loft_type": self.loft["thickness_classification"]
}
return
# Case 8 - fails cavity, meets loft, fails sap
if not self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap >= 55):
self.eco4_warmfront = {
"eligible": False,
"strict": False,
"message": "Fails cavity, meets loft, fails SAP",
"cavity_type": self.cavity["type"],
"loft_type": self.loft["thickness_classification"]
}
return
raise ValueError("Implement me")
def check_gbis(self):

View file

@ -387,17 +387,19 @@ def prepare_model_data_row(
}
simulations = [
[cavity_simulation],
[loft_simulation]
cavity_simulation,
loft_simulation
]
p.adjust_difference_record_with_recommendations(simulations)
recommendation_record = p.base_difference_record.df.to_dict("records")[0].copy()
scoring_dict = p.create_recommendation_scoring_data(
property_id=p.id,
recommendation_record=recommendation_record,
recommendations=simulations,
primary_recommendation_id=cavity_simulation["recommendation_id"]
)
# Make sure we definitely have the correct data
cavity_scoring = [x for x in p.recommendations_scoring_data if "cavity" in x["id"]][0]
loft_scoring = [x for x in p.recommendations_scoring_data if "loft" in x["id"]][0]
return [cavity_scoring, loft_scoring]
return [scoring_dict]
def get_ha_32data(ha_data, cleaned, cleaning_data, created_at):

File diff suppressed because it is too large Load diff

View file

@ -203,11 +203,11 @@ class TrainingDataset(BaseDataset):
common_cols = [[col + "_starting", col + "_ending"] for col in common_cols]
self.df = self.df.loc[
:,
no_suffix_cols
+ only_ending_cols
+ [col for cols in common_cols for col in cols],
]
:,
no_suffix_cols
+ only_ending_cols
+ [col for cols in common_cols for col in cols],
]
def _remove_abnormal_change_in_floor_area(self):
"""
@ -509,7 +509,7 @@ class TrainingDataset(BaseDataset):
expanded_df["is_sandstone_or_limestone"]
== expanded_df["is_sandstone_or_limestone_ending"]
)
]
]
elif component == "floor":
expanded_df = expanded_df[
(expanded_df["is_suspended"] == expanded_df["is_suspended_ending"])
@ -526,7 +526,7 @@ class TrainingDataset(BaseDataset):
expanded_df["is_to_external_air"]
== expanded_df["is_to_external_air_ending"]
)
]
]
elif component == "roof":
expanded_df = expanded_df[
(expanded_df["is_pitched"] == expanded_df["is_pitched_ending"])
@ -539,7 +539,7 @@ class TrainingDataset(BaseDataset):
expanded_df["has_dwelling_above"]
== expanded_df["has_dwelling_above_ending"]
)
]
]
return expanded_df

View file

@ -1,9 +1,11 @@
import msgpack
import pandas as pd
from datetime import datetime
from typing import List
from pathlib import Path
from tqdm import tqdm
import multiprocessing as mp
from etl.epc.DataProcessor import EPCDataProcessor
from etl.epc.Record import EPCRecord, EPCDifferenceRecord
@ -87,9 +89,10 @@ class EPCPipeline:
run_mode="training",
epc_local_file="certificates.csv",
epc_bucket_name="retrofit-data-dev",
epc_cleaning_dataset_key="sap_change_model/cleaning_dataset_record.parquet",
epc_all_equal_rows_key="sap_change_model/all_equal_rows_record.parquet",
epc_compiled_dataset_key="sap_change_model/dataset_record.parquet",
epc_cleaning_dataset_key="sap_change_model/{}/cleaning_dataset_rooms.parquet",
epc_all_equal_rows_key="sap_change_model/{}/all_equal_rows_rooms.parquet",
epc_compiled_dataset_key="sap_change_model/{}/dataset_rooms.parquet",
use_parallel=False,
):
"""
:param directories: List of directories to process
@ -111,9 +114,13 @@ class EPCPipeline:
self.run_mode = run_mode
self.epc_local_file = epc_local_file
self.epc_bucket_name = epc_bucket_name
self.epc_cleaning_dataset_key = epc_cleaning_dataset_key
self.epc_all_equal_rows_key = epc_all_equal_rows_key
self.epc_compiled_dataset_key = epc_compiled_dataset_key
self.use_parallel = use_parallel
self.timeprefix = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
self.epc_cleaning_dataset_key = epc_cleaning_dataset_key.format(self.timeprefix)
self.epc_all_equal_rows_key = epc_all_equal_rows_key.format(self.timeprefix)
self.epc_compiled_dataset_key = epc_compiled_dataset_key.format(self.timeprefix)
def run(self):
"""
@ -209,8 +216,11 @@ class EPCPipeline:
"Directories not specified - Unable to run Training pipeline"
)
for directory in tqdm(self.directories):
self.process_directory(directory)
if self.use_parallel:
self.run_training_dataset_parallel_pipeline()
else:
for directory in tqdm(self.directories):
self.process_directory(directory)
save_dataframe_to_s3_parquet(
df=self.compiled_dataset,
@ -230,6 +240,41 @@ class EPCPipeline:
file_key=self.epc_cleaning_dataset_key,
)
def run_training_dataset_parallel_pipeline(self):
"""
Run the training pipeline in parallel
"""
with mp.Pool() as pool:
results = list(
tqdm(
pool.imap(self.process_directory_task, self.directories),
total=len(self.directories),
),
)
for result in tqdm(results):
self.compiled_dataset = pd.concat(
[self.compiled_dataset, result["dataset"]]
)
self.compiled_cleaning_averages.append(result["cleaning_averages"])
self.compiled_all_equal_rows.extend(result["all_equal_rows"])
def process_directory_task(self, directory: str) -> pd.DataFrame:
"""
Task to enable parallel processing
"""
self.process_directory(directory=directory)
output = {
"dataset": self.compiled_dataset,
"cleaning_averages": self.epc_data_processor.cleaning_averages,
"all_equal_rows": self.compiled_all_equal_rows,
}
return output
def process_directory(self, directory: Path):
"""
Process a single directory
@ -241,12 +286,13 @@ class EPCPipeline:
self.epc_data_processor.prepare_data(filepath=filepath)
constituency_data = self.epc_data_processor.data
self.compiled_cleaning_averages.append(
self.epc_data_processor.cleaning_averages
)
constituency_difference_records = []
# self.check_records = []
for uprn, property_data in constituency_data.groupby("uprn", observed=True):
difference_records = self.process_uprn(
uprn=str(uprn), property_data=property_data, directory=directory
@ -254,12 +300,6 @@ class EPCPipeline:
if difference_records is not None:
constituency_difference_records.extend(difference_records)
# check_list = []
# for check_record in self.check_records:
# check_list.append(check_record["difference_record"])
# td = TrainingDataset(datasets=check_list, cleaned_lookup=clean_lookup)
constituency_dataset = TrainingDataset(
datasets=constituency_difference_records, cleaned_lookup=clean_lookup
)

View file

@ -191,7 +191,7 @@ class EPCRecord:
This method will clean the records using the data processor
"""
epc_data_processor = EPCDataProcessor(
data=self.epc_record_as_dataframe("prepared_epc"),
data=self.epc_record_as_dataframe("prepared_epc").copy(),
run_mode="newdata",
cleaning_averages=self.cleaning_data,
)
@ -725,26 +725,26 @@ class EPCRecord:
if self.prepared_epc["construction-age-band"] in DATA_ANOMALY_MATCHES:
if self.old_data:
# Take the most recent
max_datetime = max(
[
old_record["lodgement-datetime"]
for old_record in self.old_data
if old_record["construction-age-band"]
not in DATA_ANOMALY_MATCHES
]
)
most_recent = [
old_record
old_age_bands = [
old_record["lodgement-datetime"]
for old_record in self.old_data
if old_record["lodgement-datetime"] == max_datetime
if old_record["construction-age-band"] not in DATA_ANOMALY_MATCHES
]
self.prepared_epc["construction-age-band"] = (
EPCDataProcessor.clean_construction_age_band(
most_recent[0]["construction-age-band"]
if old_age_bands:
max_datetime = max(old_age_bands)
most_recent = [
old_record
for old_record in self.old_data
if old_record["lodgement-datetime"] == max_datetime
]
self.prepared_epc["construction-age-band"] = (
EPCDataProcessor.clean_construction_age_band(
most_recent[0]["construction-age-band"]
)
)
)
self.construction_age_band = self.prepared_epc["construction-age-band"]
self.age_band = england_wales_age_band_lookup.get(self.construction_age_band)

View file

@ -20,6 +20,10 @@ from recommendations.Recommendations import Recommendations
from utils.logger import setup_logger
from utils.s3 import read_dataframe_from_s3_parquet, save_dataframe_to_s3_parquet
from datetime import datetime
now = datetime.now().strftime("%d-%m-%Y-%H-%M-%S")
logger = setup_logger()
logger.info("Connecting to db")
@ -50,9 +54,19 @@ scenario_properties = [
"postcode": "NN1 5JY",
"lmk-key": "1459796789102016070507274146560098",
"measures": [
[["internal_wall_insulation"], "11", None, [0]],
[["external_wall_insulation"], "10", None, [0]],
[["solar", "windows"], "12-15", {"photo_supply_ending": 50}, [0, 1]],
[
["internal_wall_insulation"],
"11",
{"walls_insulation_thickness_ending": "average"},
[0],
],
[
["external_wall_insulation"],
"10",
{"walls_insulation_thickness_ending": "average"},
[0],
],
[["solar", "windows"], "15", {"photo_supply_ending": 50}, [0, 1]],
],
},
{
@ -60,7 +74,12 @@ scenario_properties = [
"postcode": "HP1 2HA",
"lmk-key": "c14029235739827d5f627dc8aa9bb567d026b267e851e0db0001db24638667b1",
"measures": [
[["cavity_wall_insulation", "loft_insulation"], "15", None, [0, 1]],
[
["cavity_wall_insulation", "loft_insulation"],
"15",
{"walls_insulation_thickness_ending": "average"},
[0, 1],
],
],
},
{
@ -68,7 +87,12 @@ scenario_properties = [
"postcode": "HP1 2HE",
"lmk-key": "99296a6dda21314fef3a61cda59e441e9a2aacf115eb96f4a0fa85696bf7b117",
"measures": [
[["cavity_wall_insulation", "loft_insulation"], "15", None, [0, 1]],
[
["cavity_wall_insulation", "loft_insulation"],
"15",
{"walls_insulation_thickness_ending": "average"},
[0, 1],
],
],
},
{
@ -76,7 +100,12 @@ scenario_properties = [
"postcode": "HP1 2AN",
"lmk-key": "d1e0534be3a44c33003323b21d0e322e3daddc65b5ee71936f89c59ddab96b50",
"measures": [
[["cavity_wall_insulation", "loft_insulation"], "15", None, [0, 1]],
[
["cavity_wall_insulation", "loft_insulation"],
"15",
{"walls_insulation_thickness_ending": "average"},
[0, 1],
],
],
},
{
@ -84,11 +113,17 @@ scenario_properties = [
"postcode": "HP1 2HX",
"lmk-key": "1eae354db522a95188018d9cd0502ed8c609910b6c88f8797d3a25f59b11770a",
"measures": [
[["cavity_wall_insulation", "loft_insulation"], "15", None, [0, 1]],
[
["cavity_wall_insulation", "loft_insulation"],
"15",
{"walls_insulation_thickness_ending": "average"},
[0, 1],
],
],
},
]
recommendations_scoring_data = []
for scenario_property in scenario_properties:
@ -132,7 +167,7 @@ for scenario_property in scenario_properties:
p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
recommender = Recommendations(property_instance=p, materials=materials)
property_recommendations = recommender.recommend()
property_recommendations = recommender.recommend("0")
wall_recommendations = recommender.wall_recomender.recommendations
loft_recommendations = recommender.roof_recommender.recommendations
@ -213,6 +248,9 @@ for scenario_property in scenario_properties:
recommendations_scoring_data.extend(scoring_list)
recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
recommendations_scoring_data["impact"] = recommendations_scoring_data["impact"].astype(
int
)
recommendations_scoring_data = recommendations_scoring_data.drop(
columns=[
"rdsap_change",
@ -247,5 +285,5 @@ all_predictions = model_api.predict_all(
save_dataframe_to_s3_parquet(
recommendations_scoring_data,
"retrofit-data-dev",
"scenario_data/recommendations_scoring_data.parquet",
f"scenario_data/{now}/recommendations_scoring_data.parquet",
)

View file

@ -16,7 +16,7 @@ def main():
epc_pipeline = EPCPipeline(
directories=directories,
run_mode="record",
use_parallel=True,
epc_data_processor=EPCDataProcessor(run_mode="training"),
)

View file

@ -1,4 +1,5 @@
pandas==2.1.3
tqdm==4.66.1
msgpack==1.0.7
boto3==1.29.6
boto3==1.29.6
pyarrow==15.0.2

View file

@ -36,8 +36,11 @@ def app():
cleaned_data = {}
epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
WALLS = []
for directory in tqdm(epc_directories):
data = pd.read_csv(directory / "certificates.csv", low_memory=False)
z = data["WALLS_DESCRIPTION"].unique().tolist()
WALLS.extend(z)
# Rename the columns to the same format as the api returns
data.columns = [c.replace("_", "-").lower() for c in data.columns]
# Take just date before the date threshold

View file

@ -122,6 +122,13 @@ class RoofAttributes(Definitions):
result["is_valid"] = "invalid" not in description
description = description.replace("invalid", "")
# We handle an edge case where the description is "pitched, 150 loft insulation" and is missing the mm
if result["is_pitched"] or result["is_loft"]:
# Search for a regular expression that matches 150 insulation
match = re.search(r"(\d+\+?)\s*insulation", description)
if match:
result['insulation_thickness'] = match.group(1)
# insulation thickness
thickness_map = {
"ceiling insulated": "average",
@ -137,11 +144,11 @@ class RoofAttributes(Definitions):
# Remove the match from the description
# description = description.replace(key, "")
break
else:
# Extract insulation thickness in mm, if present
match = re.search(r'(\d+\+?)\s*mm', description)
if match:
result['insulation_thickness'] = match.group(1)
# Extract insulation thickness in mm, if present
match = re.search(r'(\d+\+?)\s*mm', description)
if match:
result['insulation_thickness'] = match.group(1)
if "insulation_thickness" not in result:
result['insulation_thickness'] = None

View file

@ -0,0 +1,19 @@
# Non Intrusive Surveys - photo upload
This folder contains photos taken during non-intrusive surveys. Photos are stored in folders named after the survey ID.
## Getting started
Install the required packages by running the following command:
```bash
pip install -r requirements.txt
```
## Usage
The main application is found in the app.py file. To run the application, use the following command:
```bash
python app.py
```

View file

@ -0,0 +1,149 @@
import boto3
import os
from PIL import Image
from pathlib import Path
from dotenv import load_dotenv
# Inputs
ENV_FILEPATH = "etl/non_intrusive_surveys/photos/.env"
PHOTO_DIRECTORY = "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data"
FOLDER_UPRN_LOOKUP = {
"91 Osprey Drive DY1 2JS": 90048026,
"195 Ashenhurst Rd DY1 2JB": 90051858,
"6 Beech Rd DY1 4BP": 90055152,
"53 Bromley DY5 4PJ": 90060989,
"5 Oaklands B62 0JA": 90028499,
"47 Fairfield Rd DY8 5UJ": 90077535,
"150 Huntingtree Rd B63 4HP": 90093693,
"27 Milton Rd DY1 2JB": 90106884,
"21 Wells Rd DY5 3TB": 90022227,
"8 Corporation Rd DY2 7PX": 90070461
}
load_dotenv(ENV_FILEPATH)
CLOUDFRONT_DISTRIBUTION_DOMAIN_NAME = os.getenv("CLOUDFRONT_DISTRIBUTION_DOMAIN_NAME", None)
CDN_BUCKET_NAME = os.getenv("CDN_BUCKET_NAME", None)
def list_subdirectories(directory_path):
"""
List all subdirectories within a given directory.
:param directory_path: Path to the directory.
:return: A list of paths to the subdirectories.
"""
directory = Path(directory_path)
subdirectories = [subdir for subdir in directory.iterdir() if subdir.is_dir()]
return subdirectories
def list_files_in_directory(directory_path, file_extension=".jpg"):
"""
List all files with a specific extension within a given directory and its subdirectories.
:param directory_path: Path to the directory to scan.
:param file_extension: File extension to filter by.
:return: A list of paths to the files.
"""
# Convert the directory path to a Path object if it's not already one
directory = Path(directory_path) if not isinstance(directory_path, Path) else directory_path
# List all files of the specified type in the directory and subdirectories
file_list = [file for file in directory.rglob(f'*{file_extension}')]
return file_list
def create_images(input_path, uprn):
# Define the base directory path
base_directory = f"non_intrusive_photos/{uprn}"
print(f"Creating directory: {base_directory}") # Debug: print the directory to be created
# Need to create local directory if it doesn't exist
os.makedirs(base_directory, exist_ok=True)
# Define output paths
thumbnail_path = os.path.join(base_directory, "thumbnail.jpg")
full_hd_path = os.path.join(base_directory, "1080p.jpg")
webp_path = os.path.join(base_directory, "webp.webp") # Save as WebP format
# Load the image
with Image.open(input_path) as img:
# Create a thumbnail
thumbnail = img.copy()
thumbnail.thumbnail((128, 128), Image.Resampling.LANCZOS)
thumbnail.save(thumbnail_path, 'JPEG', quality=85)
# Create a 1080p version
full_hd = img.copy()
full_hd.thumbnail((1920, 1080), Image.Resampling.LANCZOS)
full_hd.save(full_hd_path, 'JPEG', quality=90)
# Convert to WebP for better compression
webp = img.copy()
webp.save(webp_path, 'WEBP', quality=90)
# Return paths to the processed images
return thumbnail_path, full_hd_path, webp_path
def upload_to_s3(bucket_name, file_path, object_name):
s3_client = boto3.client('s3')
s3_client.upload_file(file_path, bucket_name, object_name)
print(f"Uploaded {object_name} to S3 bucket {bucket_name}")
def upload_photos_to_s3(bucket_name, photo_paths):
# Upload each photo
for path in photo_paths:
object_name = path.split('/')[-1] # Assuming the path format is folder/filename
upload_to_s3(bucket_name, path, object_name)
def generate_cdn_url(distribution_domain, object_name):
return f"https://{distribution_domain}/{object_name}"
def process_and_upload_images(uprn, input_image_path, bucket_name, distribution_domain):
# Create images
thumbnail, full_hd, original = create_images(input_image_path, uprn=str(uprn))
# Upload images
upload_photos_to_s3(bucket_name, photo_paths=[thumbnail, full_hd, original])
# Generate CDN links
cdn_links = [generate_cdn_url(distribution_domain, path.split('/')[-1]) for path in [thumbnail, full_hd, original]]
# Delete local files
for path in [thumbnail, full_hd, original]:
os.remove(path)
return cdn_links
def app():
"""
This application is tasked with uploading the photos, recorded during the non-invasive surveys, to s3 and the
database.
To begin with, this app will simply read the files from the local machine, however we will come up with a more
efficient way to do this in the future.
:return:
"""
# List all files in the directory using pathlib
property_directories = list_subdirectories(PHOTO_DIRECTORY)
# For each property, we want to list all of the photos in the directory
for property_dir in property_directories:
photo_files = list_files_in_directory(property_dir)
uprn = FOLDER_UPRN_LOOKUP[property_dir.name]
# We now want to convert each file, and upload it to s3
for photo_filepath in photo_files:
process_and_upload_images(
uprn=uprn,
input_image_path=photo_filepath,
bucket_name=CDN_BUCKET_NAME,
distribution_domain=CLOUDFRONT_DISTRIBUTION_DOMAIN_NAME
)

View file

@ -0,0 +1,3 @@
Pillow
boto3
python-dotenv

View file

@ -66,7 +66,7 @@ resource "aws_security_group" "allow_db" {
resource "aws_db_instance" "default" {
allocated_storage = var.allocated_storage
engine = "postgres"
engine_version = "14.7"
engine_version = "14.10"
instance_class = var.instance_class
db_name = var.database_name
username = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)["db_assessment_model_username"]
@ -181,4 +181,16 @@ module "lambda_carbon_prediction_ecr" {
module "lambda_heat_prediction_ecr" {
ecr_name = "lambda-heat-prediction-${var.stage}"
source = "./modules/ecr"
}
##############################################
# CDN - Cloudfront
##############################################
module "cloudfront_distribution" {
source = "./modules/cloudfront"
bucket_name = module.s3.bucket_name
bucket_id = module.s3.bucket_id
bucket_arn = module.s3.bucket_arn
bucket_domain_name = module.s3.bucket_domain_name
stage = var.stage
}

View file

@ -0,0 +1,65 @@
resource "aws_cloudfront_distribution" "s3_distribution" {
origin {
domain_name = var.bucket_domain_name
origin_id = "S3-${var.bucket_name}"
s3_origin_config {
origin_access_identity = aws_cloudfront_origin_access_identity.oai.cloudfront_access_identity_path
}
}
enabled = true
default_cache_behavior {
allowed_methods = ["GET", "HEAD"]
cached_methods = ["GET", "HEAD"]
target_origin_id = "S3-${var.bucket_name}"
viewer_protocol_policy = "redirect-to-https"
compress = true
forwarded_values {
query_string = false
cookies {
forward = "none"
}
}
min_ttl = 0
default_ttl = 86400
max_ttl = 31536000
}
price_class = "PriceClass_All"
restrictions {
geo_restriction {
restriction_type = "none"
}
}
viewer_certificate {
cloudfront_default_certificate = true
}
}
resource "aws_cloudfront_origin_access_identity" "oai" {
comment = "OAI for ${var.bucket_name}"
}
resource "aws_s3_bucket_policy" "bucket_policy" {
bucket = var.bucket_id
policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Principal = {
AWS = "arn:aws:iam::cloudfront:user/CloudFront Origin Access Identity ${aws_cloudfront_origin_access_identity.oai.id}"
}
Action = "s3:GetObject"
Resource = "${var.bucket_arn}/*"
},
]
})
}

View file

@ -0,0 +1,24 @@
variable "bucket_name" {
description = "The name of the bucket"
type = string
}
variable "stage" {
description = "The deployment stage"
type = string
}
variable "bucket_id" {
description = "The ID of the S3 bucket"
type = string
}
variable "bucket_arn" {
description = "The ARN of the S3 bucket"
type = string
}
variable "bucket_domain_name" {
description = "The regional domain name of the S3 bucket"
type = string
}

View file

@ -2,3 +2,15 @@ output "bucket_name" {
description = "The name of the S3 bucket"
value = aws_s3_bucket.bucket.bucket
}
output "bucket_id" {
value = aws_s3_bucket.bucket.id
}
output "bucket_arn" {
value = aws_s3_bucket.bucket.arn
}
output "bucket_domain_name" {
value = aws_s3_bucket.bucket.bucket_regional_domain_name
}

View file

@ -37,12 +37,76 @@ MCS_SOLAR_PV_COST_DATA = {
"average_cost_per_kwh-Northern Ireland": 2126.09,
}
# This data is based on the MCS database, We use the larger figure between the 2023 and 2024 average,
# to be conservative
MCS_AIR_SOURCE_HEAT_PUMP_COST_DATA = {
"Outer London": 13220,
"Inner London": 13220,
"South East England": 13547,
"South West England": 12776,
"East of England": 12585,
"East Midlands": 12239,
"West Midlands": 13182,
"North East England": 11829,
"North West England": 11714,
"Yorkshire and the Humber": 11919,
"Wales": 13701,
"Scotland": 12586,
"Northern Ireland": 12000, # There are hardly any air source heat pump installs going on in Northern Ireland
}
BOILER_UPGRADE_SCHEME_ASHP_VALUE = 7500
# This is based on quotes from installers
BATTERY_COST = 3500
# This is based on https://www.checkatrade.com/blog/cost-guides/cost-smart-thermostat/
SMART_APPLIANCE_THERMOSTAT_COST = 400
PROGRAMMER_COST = 200
PROGRAMMER_COST = 120
ROOM_THERMOSTAT_COST = 150
TRVS_COST = 35
# Cost for TTZC
# Smart thermostat based on checkatrade https://www.checkatrade.com/blog/cost-guides/cost-smart-thermostat/
# Based on the Nest system
TTZC_SMART_THERMOSTAT_COST = 205
TTZC_SMART_THERMOSTAT_LABOUR_HOURS = 2
TTZC_ELECTRICIAN_HOURLY_RATE = 45
# Based on cost of a Nest temperature sensor
TTZC_ROOM_TEMPERATURE_SENSOR_COST = 50
TTZC_ROOM_TEMPERATURE_SENSOR_LABOUR_HOURS = 0.17 # (Assume ~ 10 mins install per sensor)
# Basedon an average cost of smart radiator values
TTZC_SMART_RADIATOR_VALUES = 50
TTZC_SMART_RADIATOR_VALUES_LABOUR_HOURS = 0.37 # (Assume ~ 15-30 mins install per valve)
# Low carbon combi boiler - median value based on £2200 - £3000 range
LOW_CARBON_COMBI_BOILER = 2200
# boiler prices based on
# https://www.greenmatch.co.uk/boilers/30kw-boiler
# https://www.greenmatch.co.uk/boilers/35kw-boiler
# https://www.greenmatch.co.uk/boilers/40kw-boiler
# These are exclusive of installation costs
CONDENSING_BOILER_COSTS = {
"30kw": 1550,
"35kw": 1610,
"40kw": 1625
}
# Assumes 3 hours to remove each heater (including re-decorating)
ROOM_HEATER_REMOVAL_COST = 120
ROOM_HEATER_REMOVAL_LABOUR_HOURS = 3
# This is a cost quoted by Jim for a system flush - existig system will run more efficiently
SYSTEM_FLUSH_COST = 250
SINGLE_RADIATOR_COST = 150
DOUBLE_RADIATOR_COST = 300
FLUE_COST = 600
PIPEWORK_COST = 750 # Min cost is £500
# This is the cost per meter squared for cavity extraction
# https://www.checkatrade.com/blog/cost-guides/cavity-wall-insulation-removal-cost/
CAVITY_EXTRACTION_COST = 21.5
class Costs:
@ -126,7 +190,7 @@ class Costs:
if not self.labour_adjustment_factor:
raise ValueError("Labour adjustment factor not found")
def cavity_wall_insulation(self, wall_area, material):
def cavity_wall_insulation(self, wall_area, material, is_extraction_and_refill=False):
"""
Calculates the total cost for cavity wall insulation based on material and labor costs,
including contingency, preliminaries, profit, and VAT.
@ -161,6 +225,13 @@ class Costs:
# Assume a team of 2
labour_days = (labour_hours / 8) / 2
if is_extraction_and_refill:
# bump up the cost of the work
total_cost = total_cost + CAVITY_EXTRACTION_COST * wall_area
# Additional 2 days work
labour_hours = labour_hours + (2 * 8)
labour_days = labour_days + 2
return {
"total": total_cost,
"subtotal": subtotal_before_vat,
@ -998,3 +1069,219 @@ class Costs:
"labour_hours": 0,
"labour_days": 0,
}
def roomstat_programmer_trvs(
self, number_heated_rooms, has_programmer, has_trvs, has_room_thermostat
):
"""
:return:
"""
total_cost = 0
labour_hours = 0
if not has_programmer:
total_cost += PROGRAMMER_COST
labour_hours += 1
if not has_trvs:
total_cost += TRVS_COST * number_heated_rooms
labour_hours += 0.25 * number_heated_rooms
if not has_room_thermostat:
total_cost += ROOM_THERMOSTAT_COST
labour_hours += 0.5
subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
vat = total_cost - subtotal_before_vat
return {
"total": total_cost,
"subtotal": subtotal_before_vat,
"vat": vat,
"labour_hours": labour_hours,
"labour_days": 1,
}
def time_and_temperature_zone_control(self, number_heated_rooms):
# The product costs are inclusive of VAT
product_costs = (
TTZC_SMART_THERMOSTAT_COST +
TTZC_ROOM_TEMPERATURE_SENSOR_COST * number_heated_rooms +
TTZC_SMART_RADIATOR_VALUES * number_heated_rooms
)
labour_hours = (
TTZC_SMART_THERMOSTAT_LABOUR_HOURS +
TTZC_ROOM_TEMPERATURE_SENSOR_LABOUR_HOURS * number_heated_rooms +
TTZC_SMART_RADIATOR_VALUES_LABOUR_HOURS * number_heated_rooms
)
labour_costs = TTZC_ELECTRICIAN_HOURLY_RATE * labour_hours
# Add continency and preliminaries to the labour to account for the complexity of the job
labour_costs = labour_costs * (1 + self.CONTINGENCY + self.PRELIMINARIES)
vat = labour_costs * self.VAT_RATE
subtotal_before_vat = product_costs + labour_costs
total_cost = subtotal_before_vat + vat
labour_days = np.ceil(labour_hours / 8)
return {
"total": total_cost,
"subtotal": subtotal_before_vat,
"vat": vat,
"labour_hours": labour_hours,
"labour_days": labour_days,
}
def heater_removal(self, n_rooms):
"""
Estimates the costs of removal of heaters, including the redecoration costs of the space behind the heater
:return:
"""
removal_cost = ROOM_HEATER_REMOVAL_COST * n_rooms
removal_labour_hours = ROOM_HEATER_REMOVAL_LABOUR_HOURS * n_rooms
vat = removal_cost * self.VAT_RATE
subtotal_before_vat = removal_cost
total_cost = subtotal_before_vat + vat
return {
"total": total_cost,
"subtotal": subtotal_before_vat,
"vat": vat,
"labour_hours": removal_labour_hours,
"labour_days": np.ceil(removal_labour_hours / 8),
}
@staticmethod
def _estimate_n_radiators(number_habitable_rooms, total_floor_area, property_type, built_form):
# Base number of radiators: one per habitable room
base_radiators = number_habitable_rooms
# Additional radiators for non-habitable essential areas (e.g., kitchens, hallways)
additional_radiators = 3 # Initial assumption
# Adjust additional radiators based on property type
if property_type == 'Flat':
additional_radiators -= 1 # Flats may need fewer radiators due to less exposure
elif property_type in ['House', 'Bungalow', 'Maisonette']:
# Multiple floors in Maisonette may require additional heating points
additional_radiators += 2 # Houses and bungalows might need more due to greater exposure
else:
raise Exception("Invalid property type")
# Adjust total radiator needs based on built form
form_factor = {
'Mid-Terrace': 0.95,
'Semi-Detached': 1.05,
'Detached': 1.25,
'End-Terrace': 1.05
}
# Calculate total heating power needed and number of radiators based on standard output
total_heating_power_required = total_floor_area * 80 # Watts per square meter
radiator_output = 1000 # Average wattage per radiator
total_radiators_based_on_power = (total_heating_power_required / radiator_output) * form_factor[built_form]
# Final estimation taking the higher of calculated needs or base room count
estimated_radiators = max(total_radiators_based_on_power, base_radiators + additional_radiators)
return round(estimated_radiators)
def boiler(self, size, exising_room_heaters, system_change, n_heated_rooms, n_rooms):
"""
Based on a basic estimate of median value £2600 to install a low carbon combi boiler
First time central heating vosts can als be found here:
https://www.checkatrade.com/blog/cost-guides/central-heating-installation-cost/
:return:
"""
unit_cost = CONDENSING_BOILER_COSTS[size]
# The unit cost is the cost without VAT
# We now need to estimate the cost of the works
labour_days = 2
labour_hours = labour_days * 8
labour_rate = 300
# Average cost of installation is 1 (maybe 2days) at £300 per day
# https://www.checkatrade.com/blog/cost-guides/new-boiler-cost/
# To be pessimistic, assume 2 days work
labour_cost = labour_rate * self.labour_adjustment_factor * labour_days
# Add contingency and preliminaries
labour_cost = labour_cost * (1 + self.CONTINGENCY + self.PRELIMINARIES)
# labour_days = labour_days + (removal_labour_hours / 8)
vat = labour_cost * self.VAT_RATE
subtotal_before_vat = unit_cost + labour_cost
total_cost = subtotal_before_vat + vat
# if there are existing room heaters, we need to add the cost of removing them
if exising_room_heaters:
removal_costing = self.heater_removal(n_rooms=n_heated_rooms)
# Add the totals to the existing totals
total_cost += removal_costing["total"]
subtotal_before_vat += removal_costing["subtotal"]
labour_hours += removal_costing["labour_hours"]
labour_days += removal_costing["labour_days"]
vat += removal_costing["vat"]
if system_change:
# We need the cost of radiators
n_radiators = self._estimate_n_radiators(
number_habitable_rooms=n_rooms,
total_floor_area=self.property.floor_area,
property_type=self.property.data["property-type"],
built_form=self.property.data["built-form"]
)
additionals_labour_cost = labour_rate * self.labour_adjustment_factor
radiator_cost = DOUBLE_RADIATOR_COST * n_radiators
system_change_cost = radiator_cost + FLUE_COST + PIPEWORK_COST + additionals_labour_cost
system_change_cost_before_vat = system_change_cost / (1 + self.VAT_RATE)
system_change_vat = system_change_cost - system_change_cost_before_vat
# We add an extra labour day for the system change
labour_days += 1
labour_hours += 8
total_cost += system_change_cost
subtotal_before_vat += system_change_cost_before_vat
vat += system_change_vat
return {
"total": total_cost,
"subtotal": subtotal_before_vat,
"vat": vat,
"labour_hours": labour_hours,
"labour_days": labour_days,
}
def air_source_heat_pump(self):
"""
Based on the region and type of property, this function will produce a cost estimation for an air source heat
pump. This cost will include the boiler upgrade scheme grant
"""
# This is the average cost of a project, we'll add some additional contingency
regional_cost = MCS_AIR_SOURCE_HEAT_PUMP_COST_DATA[self.region]
total_cost = regional_cost * (1 + self.CONTINGENCY) - BOILER_UPGRADE_SCHEME_ASHP_VALUE
subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
vat = total_cost - subtotal_before_vat
# We assume 3 days installation
labour_days = 3
labour_hours = labour_days * 8
return {
"total": total_cost,
"subtotal": subtotal_before_vat,
"vat": vat,
"labour_hours": labour_hours,
"labour_days": labour_days,
}

View file

@ -32,7 +32,8 @@ class FireplaceRecommendations(Definitions):
if number_open_fireplaces == 0:
return
estimated_cost = number_open_fireplaces * self.COST_OF_WORK
already_installed = "sealing_open_fireplace" in self.property.already_installed
estimated_cost = number_open_fireplaces * self.COST_OF_WORK if not already_installed else 0
# We recommend installing two mechanical ventilation systems
self.recommendation = [
@ -44,6 +45,7 @@ class FireplaceRecommendations(Definitions):
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
"already_installed": already_installed,
"total": estimated_cost,
# Take a very basic estimate of 6 hours, multipled by the number of open fireplaces to seal
"labour_hours": 6 * number_open_fireplaces,

View file

@ -8,7 +8,7 @@ from datatypes.enums import QuantityUnits
from backend.Property import Property
from recommendations.recommendation_utils import (
r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns, update_lowest_selected_u_value,
get_recommended_part, get_floor_u_value
get_recommended_part, get_floor_u_value, override_costs
)
from recommendations.Costs import Costs
@ -192,12 +192,21 @@ class FloorRecommendations(Definitions):
material=material.to_dict(),
non_insulation_materials=non_insulation_materials
)
already_installed = "suspended_floor_insulation" in self.property.already_installed
if already_installed:
cost_result = override_costs(cost_result)
elif material["type"] == "solid_floor_insulation":
cost_result = self.costs.solid_floor_insulation(
insulation_floor_area=self.property.insulation_floor_area,
material=material.to_dict(),
non_insulation_materials=non_insulation_materials
)
already_installed = "solid_floor_insulation" in self.property.already_installed
if already_installed:
cost_result = override_costs(cost_result)
else:
raise NotImplementedError("Implement me!")
@ -217,6 +226,7 @@ class FloorRecommendations(Definitions):
"starting_u_value": u_value,
"new_u_value": new_u_value,
"sap_points": None,
"already_installed": already_installed,
**cost_result
}
)

View file

@ -1,5 +1,5 @@
from recommendations.Costs import Costs
from recommendations.recommendation_utils import check_simulation_difference
from recommendations.recommendation_utils import check_simulation_difference, override_costs
from backend.Property import Property
from etl.epc_clean.epc_attributes.MainheatControlAttributes import MainheatControlAttributes
@ -27,6 +27,17 @@ class HeatingControlRecommender:
self.recommend_high_heat_retention_controls()
return
if heating_description in ["Boiler and radiators, mains gas"]:
# We can recommend roomstat programmer trvs
self.recommend_roomstat_programmer_trvs()
# We can also recommend time and temperature zone controls
self.recommend_time_temperature_zone_controls()
return
if heating_description in ["Air source heat pump, radiators, electric"]:
self.recommend_time_temperature_zone_controls()
def recommend_room_heaters_electric_controls(self):
"""
If the home has Room heaters, electric, we start by identifying potential heating controls that could
@ -105,3 +116,136 @@ class HeatingControlRecommender:
# We don't implement any other recommendations right now
return
def recommend_roomstat_programmer_trvs(self):
"""
If the home has a boiler and radiators, mains gas, we start by identifying potential heating controls that could
be upgraded, that would provide a practical impact.
The criteria for recommending an upgrade to heating controls are (one of these must be true)
1) There are no controls
2) No programmer
3) No room thermostat
4) No TRVs
:return:
"""
# We check if we have the conditions to recommend this upgrade
needs_programmer = self.property.main_heating_controls["switch_system"] is None
needs_room_thermostat = self.property.main_heating_controls["thermostatic_control"] is None
needs_trvs = self.property.main_heating_controls["trvs"] is None
can_recommend = (
(self.property.main_heating_controls["no_control"] is not None) or
needs_programmer or
needs_room_thermostat or
needs_trvs
)
if not can_recommend:
return
ending_config = MainheatControlAttributes("Programmer, room thermostat and TRVS").process()
# We use this to determine how we should be updating the config
simulation_config = check_simulation_difference(
new_config=ending_config, old_config=self.property.main_heating_controls
)
# This upgrade will only take the heating system to average energy efficiency
# If the current system is below good, we make it good
if self.property.data["mainheatc-energy-eff"] in ["Poor", "Very Poor", "Average"]:
simulation_config["mainheatc_energy_eff_ending"] = "Good"
has_programmer = not needs_programmer
has_room_thermostat = not needs_room_thermostat
has_trvs = not needs_trvs
cost_result = self.costs.roomstat_programmer_trvs(
number_heated_rooms=int(self.property.data["number-heated-rooms"]),
has_programmer=has_programmer,
has_room_thermostat=has_room_thermostat,
has_trvs=has_trvs
)
description = "upgrade heating controls to Room thermostat, programmer and TRVs"
already_installed = "heating_control" in self.property.already_installed
if already_installed:
cost_result = override_costs(cost_result)
description = "Heating controls have already been upgraded, no further action needed."
self.recommendation.append(
{
"type": "heating_control",
"parts": [],
"description": description,
**cost_result,
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
"already_installed": already_installed,
"simulation_config": simulation_config
}
)
return
def recommend_time_temperature_zone_controls(self):
"""
If the home has a boiler, we can recommend time and temperature zone controls. This is a more advanced
and more efficient control system than the standard controls that come with a boiler. However, it may come
with a higher cost and more involved usage
:return:
"""
# We check if the efficiency of the current heating controls is good or below, and
# Conditions for installation are as follows:
# 1) The current heating controls are not time and temperature zone controls
# 2) The current heating controls are not already at 'Very Good' or above
if (
(self.property.main_heating_controls["thermostatic_control"] == "time and temperature zone control") or
(self.property.data["mainheatc-energy-eff"] in ["Very Good"])
):
# No recommendation needed
return
ending_config = MainheatControlAttributes("Time and temperature zone control").process()
# We use this to determine how we should be updating the config
simulation_config = check_simulation_difference(
new_config=ending_config, old_config=self.property.main_heating_controls
)
# If the current system is below very good, we make it very good
if self.property.data["mainheatc-energy-eff"] in ["Poor", "Very Poor", "Average", "Good"]:
simulation_config["mainheatc_energy_eff_ending"] = "Very Good"
cost_result = self.costs.time_and_temperature_zone_control(
number_heated_rooms=int(self.property.data["number-heated-rooms"])
)
description = ("Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & "
"temperature zone control)")
already_installed = "heating_control" in self.property.already_installed
if already_installed:
cost_result = override_costs(cost_result)
description = "Heating controls have already been upgraded, no further action needed."
self.recommendation.append(
{
"type": "heating_control",
"parts": [],
"description": description,
**cost_result,
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
"already_installed": already_installed,
"simulation_config": simulation_config
}
)

View file

@ -1,9 +1,9 @@
import pandas as pd
from recommendations.Costs import Costs
from recommendations.recommendation_utils import check_simulation_difference
from recommendations.Costs import Costs, BOILER_UPGRADE_SCHEME_ASHP_VALUE
from recommendations.recommendation_utils import check_simulation_difference, override_costs
from backend.Property import Property
from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes
from etl.epc_clean.epc_attributes.HotWaterAttributes import HotWaterAttributes
from etl.epc_clean.epc_attributes.MainFuelAttributes import MainFuelAttributes
from recommendations.HeatingControlRecommender import HeatingControlRecommender
@ -13,18 +13,194 @@ class HeatingRecommender:
self.property = property_instance
self.costs = Costs(self.property)
self.recommendations = []
self.heating_recommendations = []
self.heating_control_recommendations = []
def recommend(self, phase=0):
self.recommendations = []
def recommend(self, has_cavity_or_loft_recommendations, phase=0):
"""
Produces heating recommendations
:param has_cavity_or_loft_recommendations: boolean indicating if we have produced a cavity or loft insulation
recommendation. If there are cavity or loft recommendations, the property would need to complete those measures
before being able to get the boiler upgrade scheme benefits. The messaging in the front end would be to
:param phase: indicates the phase of the retrofit programme
"""
# TODO: We could have a system flush recommendation for an existing boiler, where there is no need to replace
# the boiler, but instead flushing the system will make it run more efficiently. There is a cost for this
# in the Costs class, stored as SYSTEM_FLUSH_COST
self.heating_recommendations = []
self.heating_control_recommendations = []
# This first iteration of the recommender will provide very basic recommendation
# We recommend heating controls based on the main heating system
if self.property.main_heating["clean_description"] in [
has_electric_heating_description = self.property.main_heating["clean_description"] in [
"Room heaters, electric", "Electric storage heaters", "Electric storage heaters, radiators"
]:
]
no_heating_no_mains = (
self.property.main_heating["clean_description"] in ["No system present, electric heaters assumed"] and
not self.property.data["mains-gas-flag"]
)
if has_electric_heating_description or no_heating_no_mains:
# Recommend high heat retention storage heaters
self.recommend_electric_storage_heaters(phase=phase, system_change=True, heating_controls_only=False)
return
self.recommend_hhr_storage_heaters(phase=phase, system_change=True, heating_controls_only=False)
# if the property has mains heating with boiler and radiators, we recommend optimal heating controls
has_boiler = self.property.main_heating["clean_description"] in ["Boiler and radiators, mains gas"]
# We also check that the property doesn't have a heating system, but it has access to the mains gas
no_heating_has_mains = self.property.main_heating["clean_description"] in [
'No system present, electric heaters assumed'
] and self.property.data["mains-gas-flag"]
has_gas_heaters = (
self.property.main_heating["clean_description"] in ["Room heaters, mains gas"] and
self.property.data["mains-gas-flag"]
)
# We also check if the property has electric heating, but it has access to the mains gas
electic_heating_has_mains = has_electric_heating_description and self.property.data["mains-gas-flag"]
portable_heaters_has_mains = (
self.property.main_heating["clean_description"] in ["Portable electric heaters assumed for most rooms"] and
self.property.data["mains-gas-flag"]
)
if (
has_boiler or
no_heating_has_mains or
electic_heating_has_mains or
has_gas_heaters or
portable_heaters_has_mains
):
# This indicates that the home previously did not have a boiler in place and so would require
# an overhaul to the system - right now, this is all reasons, apart from if there is an existing boiler
system_change = not has_boiler
exising_room_heaters = self.property.main_heating["clean_description"] in [
"Room heaters, electric", "Room heaters, mains gas"
]
self.recommend_boiler_upgrades(
phase=phase, system_change=system_change, exising_room_heaters=exising_room_heaters
)
# We recommend air source heat pumps
# Heat pumps are suitable for all property types:
# https://energysavingtrust.org.uk/from-flats-to-terraced-houses-heat-pumps-are-suitable-for-all-property-types/
# Just seems least probable for flats, so we'll allow houses and bungalows
# In the future, we'll allow overrides, so that non-intrusive surveys can contradict these conditions
# and either allow or prevent the recommendation of an air source heat pump
suitable_property_type = self.property.data["property-type"] in ["House", "Bungalow"]
has_air_source_heat_pump = self.property.main_heating["has_air_source_heat_pump"]
if suitable_property_type and not has_air_source_heat_pump:
self.recommend_air_source_heat_pump(
phase=phase, has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations
)
return
def recommend_air_source_heat_pump(self, phase, has_cavity_or_loft_recommendations):
"""
This method will implement the recommendation for an air source heat pump
This is ultimately an overhaul to the heating system and so is recommended as an alternative to other
heating system recommendations
:return:
"""
controls_recommender = HeatingControlRecommender(self.property)
controls_recommender.recommend(heating_description="Air source heat pump, radiators, electric")
ashp_costs = self.costs.air_source_heat_pump()
# We add the costs of the heating controls, onto each key in the costs dictionary
if controls_recommender.recommendation:
for key in ashp_costs:
ashp_costs[key] += controls_recommender.recommendation[0][key]
already_installed = "air_source_heat_pump" in self.property.already_installed
if already_installed:
ashp_costs = override_costs(ashp_costs)
description = "The property already has an air source heat pump, no further action needed."
else:
if controls_recommender.recommendation:
description = ("Install an air source heat pump, and upgrade heating controls to Smart Thermostats, "
"room sensors and smart radiator valves (time & temperature zone control).")
else:
description = "Install an air source heat pump."
# If the property does not have existing cavity and loft insulation, we include a note that the cost
# includes the boiler upgrade scheme and that the cavity and loft need to be treated, to ensure access
# to the funding
if has_cavity_or_loft_recommendations:
description = description + (f" The cost includes the £"
f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant. "
f"You must ensure that the property has an insulated cavity and "
f"270mm+ loft insulation to qualify for the grant")
else:
description = description + (f" The cost includes the £"
f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant")
simulation_config = {
"mainheat_energy_eff_ending": "Good",
"hot_water_energy_eff_ending": "Good"
}
# Installation of a boiler improves the hot water system so we need to reflect this in
# the outcome of the recommendation
heating_ending_config = MainHeatAttributes("Air source heat pump, radiators, electric").process()
hotwater_ending_config = HotWaterAttributes("From main system").process()
# If the property does not currently have electric main fuel, we'll simulate the change
fuel_ending_config = {}
if self.property.main_fuel["fuel_type"] != "electricity":
fuel_ending_config = MainFuelAttributes("electricity (not community)").process()
# Check the simulation differences
heating_simulation_config = check_simulation_difference(
new_config=heating_ending_config, old_config=self.property.main_heating
)
hotwater_simulation_config = check_simulation_difference(
new_config=hotwater_ending_config, old_config=self.property.hotwater
)
fuel_simulation_config = check_simulation_difference(
new_config=fuel_ending_config, old_config=self.property.main_fuel
)
simulation_config = {
**simulation_config,
**heating_simulation_config,
**hotwater_simulation_config,
**fuel_simulation_config,
}
if controls_recommender.recommendation:
# We should have just the single recommendation for heat controls, which is time
# and temperature zone controls
if len(controls_recommender.recommendation) != 1:
raise NotImplementedError("More than one heat controls recommendation for air source heat pump")
simulation_config = {
**simulation_config,
**controls_recommender.recommendation[0]["simulation_config"]
}
ashp_recommendation = {
"phase": phase,
"parts": [
# TODO
],
"type": "heating",
"description": description,
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
"already_installed": already_installed,
"simulation_config": simulation_config,
**ashp_costs
}
self.heating_recommendations.append(ashp_recommendation)
@staticmethod
def check_simulation_difference(old_config, new_config):
@ -39,9 +215,8 @@ class HeatingRecommender:
return differences
@staticmethod
def combine_heating_and_controls(
controls_recommendations, heating_simulation_config, costs, description, phase, heating_controls_only,
self, controls_recommendations, heating_simulation_config, costs, description, phase, heating_controls_only,
system_change
):
"""
@ -82,8 +257,18 @@ class HeatingRecommender:
**recommendation_simulation_config,
**controls_recommendations[0]["simulation_config"]
}
controls_description = controls_recommendations[0]['description']
# Make the first letter of the description lowercase
controls_description = (
controls_description[0].lower() + controls_description[1:]
)
recommendation_description = f"{description} and {controls_recommendations[0]['description']}"
recommendation_description = f"{description} and {controls_description}"
already_installed = "heating_controls" in self.property.already_installed
if already_installed:
total_costs = override_costs(total_costs)
recommendation_description = "Heating system has already been upgraded, no further action needed."
recommendation = {
"phase": phase,
@ -95,6 +280,7 @@ class HeatingRecommender:
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
"already_installed": already_installed,
**total_costs,
"simulation_config": recommendation_simulation_config
}
@ -126,9 +312,8 @@ class HeatingRecommender:
return output
def recommend_electric_storage_heaters(self, phase, system_change, heating_controls_only):
def recommend_hhr_storage_heaters(self, phase, system_change, heating_controls_only):
"""
We recommend electric storage heaters as an upgrade to the heating system.
We will recommend upgrading to a high heat retention storage system, if the current system is not already
high heat retention storage
@ -165,9 +350,18 @@ class HeatingRecommender:
# This upgrade will only take the heating system to average energy efficiency
heating_simulation_config["mainheat_energy_eff_ending"] = "Average"
# If the property is off-gas and has no heating system in place, the number of heated rooms will actually
# be 0, so we use the number of rooms as the figure
number_heated_rooms = (
self.property.data["number-heated-rooms"] if self.property.data["number-heated-rooms"] > 0
else (
self.property.number_of_rooms - 1 if self.property.number_of_rooms > 1 else
self.property.number_of_rooms
)
)
# Upgrade to electric storage heaters
costs = self.costs.high_heat_electric_storage_heaters(
number_heated_rooms=self.property.data["number-heated-rooms"]
number_heated_rooms=number_heated_rooms
)
description = "Install high heat retention electric storage heaters"
@ -181,4 +375,182 @@ class HeatingRecommender:
system_change=system_change
)
self.recommendations.extend(recommendations)
self.heating_recommendations.extend(recommendations)
@staticmethod
def estimate_boiler_size(property_type, built_form, floor_area, floor_height, num_heated_rooms):
# Step 1: Base size estimation based on property type (as a starting point)
base_size = {
'Flat': 25,
'House': 30,
'Maisonette': 28,
'Bungalow': 27
}
# Step 2: Calculate the volume of the property
volume = floor_area * floor_height
# Step 3: Adjust base size for built form (to account for heat retention)
form_adjustment = {
'Mid-Terrace': 0,
'End-Terrace': 2,
'Semi-Detached': 4,
'Detached': 6
}
# Step 4: Further adjust for the total volume and number of heated rooms
volume_adjustment = (volume / 100) # Simplified adjustment factor for volume
rooms_adjustment = (num_heated_rooms - 5) * 0.5 # Assuming base case of 5 rooms
# Calculate the estimated boiler size
estimated_size = base_size[property_type] + form_adjustment[built_form] + volume_adjustment + rooms_adjustment
# Step 5: Align with available boiler sizes and ensure it does not exceed 35kW, as it's rare to need more
available_sizes = [30, 35, 40, 45, 50]
estimated_size = min(max(estimated_size, 30), 40) # Ensure within 30kW to 35kW range
# Find the closest available size (in this case, either rounding up or down to align with 30 or 35)
closest_size = min(available_sizes, key=lambda x: abs(x - estimated_size))
return closest_size
def recommend_boiler_upgrades(self, phase, system_change, exising_room_heaters):
"""
This boiler recommendation will only recommend a like-for-like upgrade, since changing the system
is generally more expensive
:param phase:
:param system_change: Indicates if the property would be undergoing a heating system change. This could be true
if the home didn't have a heating system in place, or if the home had electric heating
previously
:param exising_room_heaters: Indicates if the property had room heaters previously - if so, a boiler
recommendation will need to be accompanied by removal of the room heaters
:return:
"""
recommendation_phase = phase
# We now recommend boiler upgrades, if applicable
simulation_config = {}
boiler_costs = {}
boiler_recommendation = {}
has_inefficient_space_heating = self.property.data["mainheat-energy-eff"] in ["Very Poor", "Poor", "Average"]
has_inefficient_mains_water = (
self.property.hotwater["clean_description"] in ["From main system"] and
self.property.data["hot-water-energy-eff"] in ["Very Poor", "Poor", "Average"]
)
if has_inefficient_space_heating or has_inefficient_mains_water:
boiler_size = self.estimate_boiler_size(
property_type=self.property.data["property-type"],
built_form=self.property.data["built-form"],
floor_area=self.property.floor_area,
floor_height=self.property.floor_height,
num_heated_rooms=self.property.data["number-heated-rooms"],
)
description = "Upgrade to a new condensing boiler"
simulation_config = {
"mainheat_energy_eff_ending": "Good",
"hot_water_energy_eff_ending": "Good"
}
if system_change:
# Installation of a boiler improves the hot water system so we need to reflect this in
# the outcome of the recommendation
heating_ending_config = MainHeatAttributes("Boiler and radiators, mains gas").process()
hotwater_ending_config = HotWaterAttributes("From main system").process()
fuel_ending_config = MainFuelAttributes("mains gas (not community)").process()
heating_simulation_config = check_simulation_difference(
new_config=heating_ending_config, old_config=self.property.main_heating
)
hotwater_simulation_config = check_simulation_difference(
new_config=hotwater_ending_config, old_config=self.property.hotwater
)
fuel_simulation_config = check_simulation_difference(
new_config=fuel_ending_config, old_config=self.property.main_fuel
)
simulation_config = {
**simulation_config,
**heating_simulation_config,
**hotwater_simulation_config,
**fuel_simulation_config,
}
boiler_costs = self.costs.boiler(
size=f"{boiler_size}kw",
exising_room_heaters=exising_room_heaters,
system_change=system_change,
n_heated_rooms=self.property.data["number-heated-rooms"],
n_rooms=self.property.number_of_rooms
)
already_installed = "heating" in self.property.already_installed
if already_installed:
boiler_costs = override_costs(boiler_costs)
description = "Heating system has already been upgraded, no further action needed."
boiler_recommendation = {
"phase": recommendation_phase,
"parts": [
# TODO
],
"type": "heating",
"description": description,
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
"already_installed": already_installed,
"simulation_config": simulation_config,
**boiler_costs
}
# We recommend the heating controls
# If the property did not previously have a boiler, we combine
controls_recommender = HeatingControlRecommender(self.property)
controls_recommender.recommend(heating_description="Boiler and radiators, mains gas")
# We may have 2 recommendations from the heating controls
if not controls_recommender.recommendation and not boiler_recommendation:
return
if not system_change and len(boiler_recommendation):
# If there is not a system change, we add the boiler recommendation at point.
self.heating_recommendations.extend([boiler_recommendation])
if system_change:
# We combine the heating and controls recommendations, in the case of a system change
combined_recommendations = []
for controls_recommendation in controls_recommender.recommendation:
combined_recommendation = self.combine_heating_and_controls(
controls_recommendations=[controls_recommendation],
heating_simulation_config=simulation_config,
costs=boiler_costs,
description=boiler_recommendation["description"],
phase=recommendation_phase,
heating_controls_only=False,
system_change=True
)
combined_recommendations.extend(combined_recommendation)
# Overwrite the existing boiler recommendation
self.heating_recommendations.extend(combined_recommendations)
else:
# We increment the recommendation phase, since the heating controls are separate from the boiler upgrade
# but we'll only upgrade if we have a heating recommendation
has_heating_recommendation = any(
rec["type"] == "heating" for rec in self.heating_recommendations
)
if has_heating_recommendation:
recommendation_phase += 1
# The heating controls recommendation is distrinct from the boiler upgrade recommendation
# We insert phase into the recommendations for heating controls
for recommendation in controls_recommender.recommendation:
recommendation["phase"] = recommendation_phase
self.heating_control_recommendations.extend(controls_recommender.recommendation)
return

View file

@ -1,5 +1,6 @@
from backend.Property import Property
from recommendations.Costs import Costs
from recommendations.recommendation_utils import override_costs
class HotwaterRecommendations:
@ -22,8 +23,14 @@ class HotwaterRecommendations:
# This first iteration of the recommender will provide very basic recommendation
# We recommend heating controls based on the main heating system
if (self.property.hotwater["heater_type"] in ["electric immersion"]) & \
(self.property.data["hot-water-energy-eff"] == "Very Poor"):
# If there is no system present, but access to the mains, we
if (
(self.property.hotwater["heater_type"] in ["electric immersion"]) &
(self.property.data["hot-water-energy-eff"] == "Very Poor") &
(self.property.hotwater["no_system_present"] is None)
):
self.recommend_tank_insulation(phase=phase)
return
@ -35,6 +42,13 @@ class HotwaterRecommendations:
recommendation_cost = self.costs.hot_water_tank_insulation()
already_installed = "hot_water_tank_insulation" in self.property.already_installed
if already_installed:
recommendation_cost = override_costs(recommendation_cost)
description = "Insulation tank has already been insulated, no further action required"
else:
description = "Insulate hot water tank"
self.recommendations.append(
{
"phase": phase,
@ -42,10 +56,11 @@ class HotwaterRecommendations:
# TODO
],
"type": "hot_water_tank_insulation",
"description": "Insulate the hot water tank with an insulation jacket",
"description": description,
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
"already_installed": already_installed,
**recommendation_cost,
"simulation_config": {"hot_water_energy_eff_ending": "Average"}
}

View file

@ -1,6 +1,7 @@
from backend.Property import Property
from typing import List
from recommendations.Costs import Costs
from recommendations.recommendation_utils import override_costs
class LightingRecommendations:
@ -91,6 +92,11 @@ class LightingRecommendations:
heat_demand_change, carbon_change = self.estimate_lighting_impact(number_non_lel_outlets)
already_installed = "low_energy_lighting" in self.property.already_installed
if already_installed:
cost_result = override_costs(cost_result)
description = "Low energy lighting has already been installed, no further action required"
self.recommendation = [
{
"phase": phase,
@ -99,6 +105,7 @@ class LightingRecommendations:
"description": description,
"starting_u_value": None,
"new_u_value": None,
"already_installed": already_installed,
# For SAP points, we use the fact that lighting is usually worth 2 points and we scale this to
# the proportion of lights that will be set to low energy
"sap_points": round(2 * (number_non_lel_outlets / number_lighting_outlets), 2),

View file

@ -11,6 +11,7 @@ from recommendations.SolarPvRecommendations import SolarPvRecommendations
from recommendations.WindowsRecommendations import WindowsRecommendations
from recommendations.HeatingRecommender import HeatingRecommender
from recommendations.HotwaterRecommendations import HotwaterRecommendations
from recommendations.SecondaryHeating import SecondaryHeating
from backend.ml_models.AnnualBillSavings import AnnualBillSavings
@ -22,7 +23,8 @@ class Recommendations:
def __init__(
self,
property_instance: Property,
materials: List
materials: List,
exclusions: List[str] = None,
):
"""
:param property_instance: Instance of the Property class, for the home associated to property_id
@ -31,6 +33,7 @@ class Recommendations:
self.property_instance = property_instance
self.materials = materials
self.exclusions = exclusions if exclusions else []
self.floor_recommender = FloorRecommendations(property_instance=property_instance, materials=materials)
self.wall_recomender = WallRecommendations(property_instance=property_instance, materials=materials)
@ -44,8 +47,9 @@ class Recommendations:
self.solar_recommender = SolarPvRecommendations(property_instance=property_instance)
self.heating_recommender = HeatingRecommender(property_instance=property_instance)
self.hotwater_recommender = HotwaterRecommendations(property_instance=property_instance)
self.secondary_heating_recommender = SecondaryHeating(property_instance=property_instance)
def recommend(self, portfolio_id):
def recommend(self):
"""
This method runs the recommendations for the individual measures and then appends them to a list for output
@ -58,78 +62,139 @@ class Recommendations:
property_recommendations = []
phase = 0
print("WALL RECOMMENDATIONS HAVE BEEN COMMENTED OUT TEMPORARILY - ADD ME BACK IN")
if portfolio_id != 66:
# Building Fabric
# Building Fabric
if "wall_insulation" not in self.exclusions:
self.wall_recomender.recommend(phase=phase)
if self.wall_recomender.recommendations:
property_recommendations.append(self.wall_recomender.recommendations)
phase += 1
# Ventilation recommendations
# We only produce a ventilation recommendation if the property is recommended to have wall or roof
# insulation
# We will not attribute a SAP impact to the ventilation recommendation, since we've seen that this has no
# real impact on the SAP score. Therefore, we don't need to include phasing for ventilation. If we have any
# wall or roof recommendations, we will ensure that ventilation is included in the simulation
if "roof_insulation" not in self.exclusions:
self.roof_recommender.recommend(phase=phase)
if self.roof_recommender.recommendations:
property_recommendations.append(self.roof_recommender.recommendations)
phase += 1
# Ventilation recommendations
# We only produce a ventilation recommendation if the property is recommended to have wall or roof
# insulation
# We will not attribute a SAP impact to the ventilation recommendation, since we've seen that this
# has no
# real impact on the SAP score. Therefore, we don't need to include phasing for ventilation. If we
# have any
# wall or roof recommendations, we will ensure that ventilation is included in the simulation
if "ventilation" not in self.exclusions:
if self.wall_recomender.recommendations or self.roof_recommender.recommendations:
self.ventilation_recomender.recommend()
if self.ventilation_recomender.recommendation:
property_recommendations.append(self.ventilation_recomender.recommendation)
self.roof_recommender.recommend(phase=phase)
if self.roof_recommender.recommendations:
property_recommendations.append(self.roof_recommender.recommendations)
phase += 1
if "floor_insulation" not in self.exclusions:
self.floor_recommender.recommend(phase=phase)
if self.floor_recommender.recommendations:
property_recommendations.append(self.floor_recommender.recommendations)
phase += 1
self.floor_recommender.recommend(phase=phase)
if self.floor_recommender.recommendations:
property_recommendations.append(self.floor_recommender.recommendations)
phase += 1
if "windows" not in self.exclusions:
self.windows_recommender.recommend(phase=phase)
if self.windows_recommender.recommendation:
property_recommendations.append(self.windows_recommender.recommendation)
phase += 1
self.windows_recommender.recommend(phase=phase)
if self.windows_recommender.recommendation:
property_recommendations.append(self.windows_recommender.recommendation)
phase += 1
self.fireplace_recommender.recommend(phase=phase)
if self.fireplace_recommender.recommendation:
property_recommendations.append(self.fireplace_recommender.recommendation)
phase += 1
if "fireplace" not in self.exclusions:
self.fireplace_recommender.recommend(phase=phase)
if self.fireplace_recommender.recommendation:
property_recommendations.append(self.fireplace_recommender.recommendation)
phase += 1
# Heating and Electical systems
self.heating_recommender.recommend(phase=phase)
if self.heating_recommender.recommendations:
property_recommendations.append(self.heating_recommender.recommendations)
phase += 1
if "heating" not in self.exclusions:
cavity_or_loft_recommendations = [
r for r in self.wall_recomender.recommendations + self.roof_recommender.recommendations
if r["type"] in ["cavity_wall_insulation", "loft_insulation"]
]
has_cavity_or_loft_recommendations = len(cavity_or_loft_recommendations) > 0
self.heating_recommender.recommend(
phase=phase, has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations
)
if (
self.heating_recommender.heating_recommendations or
self.heating_recommender.heating_control_recommendations
):
# We split into first and second phase recommendations
first_phase_recommendations = [
r for r in (
self.heating_recommender.heating_recommendations +
self.heating_recommender.heating_control_recommendations
)
if r["phase"] == phase
]
second_phase_recommendations = [
r for r in (
self.heating_recommender.heating_recommendations +
self.heating_recommender.heating_control_recommendations
)
if r["phase"] == phase + 1
]
if first_phase_recommendations:
property_recommendations.append(first_phase_recommendations)
if second_phase_recommendations:
property_recommendations.append(second_phase_recommendations)
# We check if we have distinct heating and heating controls recommendations
# If so, we increment by 2 (one of the heating system, one for the heating controls)
# otherwise we incremenet by 1
max_used_phase = max(
[rec["phase"] for rec in
self.heating_recommender.heating_recommendations +
self.heating_recommender.heating_control_recommendations]
)
amount_to_increment = max_used_phase - phase + 1
phase += amount_to_increment
# Hot water
self.hotwater_recommender.recommend(phase=phase)
if self.hotwater_recommender.recommendations:
property_recommendations.append(self.hotwater_recommender.recommendations)
phase += 1
if "hot_water" not in self.exclusions:
self.hotwater_recommender.recommend(phase=phase)
if self.hotwater_recommender.recommendations:
property_recommendations.append(self.hotwater_recommender.recommendations)
phase += 1
self.lighting_recommender.recommend(phase=phase)
if self.lighting_recommender.recommendation:
property_recommendations.append(self.lighting_recommender.recommendation)
phase += 1
if "lighting" not in self.exclusions:
self.lighting_recommender.recommend(phase=phase)
if self.lighting_recommender.recommendation:
property_recommendations.append(self.lighting_recommender.recommendation)
phase += 1
if "secondary_heating" not in self.exclusions:
self.secondary_heating_recommender.recommend(phase=phase)
if self.secondary_heating_recommender.recommendation:
property_recommendations.append(self.secondary_heating_recommender.recommendation)
phase += 1
# Renewables
self.solar_recommender.recommend(phase=phase)
if self.solar_recommender.recommendation:
property_recommendations.append(self.solar_recommender.recommendation)
phase += 1
if "solar_pv" not in self.exclusions:
self.solar_recommender.recommend(phase=phase)
if self.solar_recommender.recommendation:
property_recommendations.append(self.solar_recommender.recommendation)
phase += 1
# We insert temporary ids into the recommendations which is important for the optimiser later
property_recommendations = self.insert_temp_recommendation_id(property_recommendations)
# We also need to create the representative recommendations for each recommendation type
property_representative_recommendations = self.create_representative_recommendations(property_recommendations)
property_representative_recommendations = self.create_representative_recommendations(
property_recommendations, non_invasive_recommendations=self.property_instance.non_invasive_recommendations
)
return property_recommendations, property_representative_recommendations
@staticmethod
def create_representative_recommendations(property_recommendations):
def create_representative_recommendations(property_recommendations, non_invasive_recommendations):
"""
This method will create a representative recommendation for each recommendation type
In order to create a representative recommendation, we choose the recommendation that has:
@ -144,6 +209,13 @@ class Recommendations:
for recommendations_by_type in property_recommendations:
# If the property was initially surveyed as filled, but the cavity was only partially filled, we don't
# want to include the cavity wall insulation recommendation in the defaults
# if (recommendations_by_type[0].get("type") == "cavity_wall_insulation") and (
# "cavity_surveyed_as_filled_is_partial" in non_invasive_recommendations
# ):
# continue
if recommendations_by_type[0].get("type") == "mechanical_ventilation":
continue
@ -213,13 +285,13 @@ class Recommendations:
property_sap_predictions = all_predictions["sap_change_predictions"][
all_predictions["sap_change_predictions"]["property_id"] == str(property_instance.id)
]
].copy()
property_heat_predictions = all_predictions["heat_demand_predictions"][
all_predictions["heat_demand_predictions"]["property_id"] == str(property_instance.id)
]
].copy()
property_carbon_predictions = all_predictions["carbon_change_predictions"][
all_predictions["carbon_change_predictions"]["property_id"] == str(property_instance.id)
]
].copy()
property_recommendations = recommendations[property_instance.id].copy()
@ -247,6 +319,8 @@ class Recommendations:
current_epc_rating=property_instance.data["current-energy-rating"],
)
# TODO: This isn't quite right as this is based on EVERY possible measure, not just the ones that are
# actually implemented
expected_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
epc_energy_consumption=expected_heat_demand,
current_epc_rating=property_instance.data["current-energy-rating"],
@ -256,6 +330,10 @@ class Recommendations:
current_adjusted_energy - expected_adjusted_energy
)
# TODO: We should determine if the home is gas & electricity or just electricity
current_energy_bill = AnnualBillSavings.calculate_annual_bill(current_adjusted_energy)
expected_energy_bill = AnnualBillSavings.calculate_annual_bill(expected_adjusted_energy)
for recommendations_by_type in property_recommendations:
for rec in recommendations_by_type:
@ -330,4 +408,10 @@ class Recommendations:
rec["heat_demand"] is None) or (rec["energy_cost_savings"] is None):
raise ValueError("sap points, co2 or heat demand is missing")
return property_recommendations, current_adjusted_energy, expected_adjusted_energy
return (
property_recommendations,
current_adjusted_energy,
expected_adjusted_energy,
current_energy_bill,
expected_energy_bill
)

View file

@ -5,7 +5,7 @@ from typing import List
from datatypes.enums import QuantityUnits
from recommendations.recommendation_utils import (
get_roof_u_value, r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns,
update_lowest_selected_u_value, get_recommended_part, convert_thickness_to_numeric
update_lowest_selected_u_value, get_recommended_part, convert_thickness_to_numeric, override_costs
)
from recommendations.Costs import Costs
@ -20,8 +20,9 @@ class RoofRecommendations:
DIMINISHING_RETURNS_U_VALUE = 0.14
# It is recommended that lofts should have at least 270mm of insulation
MINIMUM_LOFT_ISULATION_MM = 270
# It is recommended that lofts should have at least 270mm of insulation. If the property has more than 200mm of
# loft insulation in place already, we do not recommend anything for the moment
MINIMUM_LOFT_ISULATION_MM = 200
# Flat roof should have at least 100mm of insulation
MINIMUM_FLAT_ROOF_ISULATION_MM = 100
@ -71,7 +72,7 @@ class RoofRecommendations:
# Building regulations part L recommend installing at least 270mm of insulation, however generally we
# experience diminishing returns in terms of SAP once we go beyond around 150mm of insulation
# This only holds true for pitched roofs.
if (insulation_thickness >= self.MINIMUM_LOFT_ISULATION_MM) and self.property.roof["is_pitched"]:
if (insulation_thickness > self.MINIMUM_LOFT_ISULATION_MM) and self.property.roof["is_pitched"]:
return
if (insulation_thickness >= self.MINIMUM_FLAT_ROOF_ISULATION_MM) and self.property.roof["is_flat"]:
@ -206,12 +207,18 @@ class RoofRecommendations:
floor_area=self.property.insulation_floor_area,
material=material
)
already_installed = "loft_insulation" in self.property.already_installed
if already_installed:
cost_result = override_costs(cost_result)
elif material["type"] == "flat_roof_insulation":
cost_result = self.costs.flat_roof_insulation(
floor_area=self.property.insulation_floor_area,
material=material,
non_insulation_materials=non_insulation_materials
)
already_installed = "flat_roof_insulation" in self.property.already_installed
if already_installed:
cost_result = override_costs(cost_result)
else:
raise ValueError("Invalid material type")
@ -231,6 +238,7 @@ class RoofRecommendations:
"starting_u_value": u_value,
"new_u_value": new_u_value,
"sap_points": None,
"already_installed": already_installed,
**cost_result
}
)

View file

@ -0,0 +1,65 @@
from recommendations.Costs import Costs
from recommendations.recommendation_utils import override_costs
from backend.Property import Property
class SecondaryHeating:
"""
This class recommends the removal of the secondary heating system for properties that have a primary heating
system.
"""
# The list of existing heating systems that are accepted
ACCEPTED_MAINHEAT_DESCRIPTIONS = ["Boiler and radiators, mains gas"]
ACCEPTED_SECONDHEAT_DESCRIPTIONS = ["Room heaters, electric"]
# These are the heaters where works are required to remove them
FIXED_HEATER_DESCRIPTIONS = ["Room heaters, electric"]
def __init__(self, property_instance: Property):
self.property = property_instance
self.costs = Costs(self.property)
self.recommendation = []
def recommend(self, phase: int):
# Reset
self.recommendation = []
if self.property.main_heating["clean_description"] not in self.ACCEPTED_MAINHEAT_DESCRIPTIONS:
return
# TODO: We need to clean secondary data
if self.property.data['secondheat-description'] not in self.ACCEPTED_SECONDHEAT_DESCRIPTIONS:
return
if self.property.data['secondheat-description'] in self.FIXED_HEATER_DESCRIPTIONS:
# We have an associated cost otherwise, there is no cost
n_rooms = self.property.data['number-heated-rooms']
else:
n_rooms = 0
costs = self.costs.heater_removal(n_rooms=n_rooms)
already_installed = "secondary_heating" in self.property.already_installed
if already_installed:
costs = override_costs(costs)
description = "Secondary heating system has already been removed, no further action required"
else:
description = "Remove the secondary heating system"
self.recommendation.append(
{
"phase": phase,
"parts": [],
"type": "secondary_heating",
"description": description,
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
"already_installed": already_installed,
**costs,
"simulation_config": {
"secondheat_description_ending": "None"
}
}
)

View file

@ -1,5 +1,6 @@
import numpy as np
from recommendations.Costs import Costs
from recommendations.recommendation_utils import override_costs
class SolarPvRecommendations:
@ -8,6 +9,9 @@ class SolarPvRecommendations:
# Wattage per panel - this is based on the average wattage of a solar panel being between 250w and 420w
SOLAR_PANEL_WATTAGE = 250
MAX_SYSTEM_WATTAGE = 6000
MIN_SYSTEM_WATTAGE = 1000
def __init__(self, property_instance):
"""
:param property_instance: Instance of the Property class, for the home associated to property_id
@ -18,6 +22,19 @@ class SolarPvRecommendations:
self.recommendation = []
@staticmethod
def trim_solar_wattage_options(scenarios_with_wattage):
# Initialize the list with the first element, assuming the list is not empty
trimmed_list = [scenarios_with_wattage[0]]
# Iterate over the list starting from the second element
for scenario in scenarios_with_wattage[1:]:
# Compare the second element (index 1) of the current tuple with the last tuple in the trimmed list
if scenario[1] > trimmed_list[-1][1]:
trimmed_list.append(scenario)
return trimmed_list
def recommend(self, phase):
"""
We check if a property is potentially suitable for solar PV based on the following criteria:
@ -27,7 +44,7 @@ class SolarPvRecommendations:
:return:
"""
is_valid_property_type = self.property.data["property-type"] in ["House", "Bungalow"]
is_valid_property_type = self.property.data["property-type"] in ["House", "Bungalow", "Maisonette"]
is_valid_roof_type = (
self.property.roof["is_flat"] or self.property.roof["is_pitched"] or self.property.roof["is_roof_room"]
)
@ -39,33 +56,56 @@ class SolarPvRecommendations:
if not is_valid_property_type or not is_valid_roof_type or not has_no_existing_solar_pv:
return
solar_pv_percentage = self.property.solar_pv_percentage
# We round up to the neaest 10%
solar_pv_percentage = np.ceil(solar_pv_percentage * 10) / 10
# For the solar recommendations, we produce the following scenarios:
# 1) Solar panels only, we present a high, medium and low coverage
# 2) With and without battery
roof_coverage_scenarios = [
self.property.solar_pv_percentage - 0.1, self.property.solar_pv_percentage,
self.property.solar_pv_percentage + 0.1
solar_pv_percentage - 0.1, solar_pv_percentage,
]
# We make sure we haven't gone too low or high
roof_coverage_scenarios = [v for v in roof_coverage_scenarios if 0 <= v <= 1]
if solar_pv_percentage <= 0.4:
roof_coverage_scenarios.append(solar_pv_percentage + 0.1)
# We make sure we haven't gone too low or high - we allow no more than 60% coverage
roof_coverage_scenarios = [v for v in roof_coverage_scenarios if 0 <= v <= 0.6]
# If we only have two scenarios, we add a coverage scenario 10% less than the smallest
if len(roof_coverage_scenarios) == 2:
roof_coverage_scenarios.insert(0, roof_coverage_scenarios[0] - 0.1)
battery_scenarios = [False, True]
# I now produce the cross product of the scenarios
scenarios = [(roof, battery) for roof in roof_coverage_scenarios for battery in battery_scenarios]
for roof_coverage, has_battery in scenarios:
scenarios_with_wattage = []
for roof_coverage in roof_coverage_scenarios:
# We now have a property which is potentially suitable for solar PV
solar_pv_roof_area = self.property.get_solar_pv_roof_area(roof_coverage)
number_solar_panels = np.floor(solar_pv_roof_area / self.SOLAR_PANEL_AREA)
solar_panel_wattage = number_solar_panels * self.SOLAR_PANEL_WATTAGE
roof_coverage_percent = round(roof_coverage * 100)
if solar_panel_wattage < self.MIN_SYSTEM_WATTAGE:
continue
solar_panel_wattage = np.clip(
a=solar_panel_wattage, a_min=self.MIN_SYSTEM_WATTAGE, a_max=self.MAX_SYSTEM_WATTAGE
)
scenarios_with_wattage.append((roof_coverage, solar_panel_wattage))
# We trim the scenarios, so that we don't have duplicate wattages
scenarios_with_wattage = self.trim_solar_wattage_options(scenarios_with_wattage)
# Produce the cross product of the scenarios
scenarios = [
(roof, wattage, battery) for roof, wattage in scenarios_with_wattage for battery in battery_scenarios
]
# We deduce the wattage of the solar panels based on the roof coverage
for roof_coverage, solar_panel_wattage, has_battery in scenarios:
# We now have a property which is potentially suitable for solar PV
roof_coverage_percent = round(roof_coverage * 100)
# Given the wattage, we estimate the cost of the solar PV system. This is based on the MCS database
# of solar PV installations
cost_result = self.costs.solar_pv(wattage=solar_panel_wattage, has_battery=has_battery)
kw = np.floor(solar_panel_wattage / 100) / 10
if has_battery:
@ -75,6 +115,10 @@ class SolarPvRecommendations:
description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) p"
f"anel system on {round(roof_coverage_percent)}% the roof.")
already_installed = "solar_pv" in self.property.already_installed
if already_installed:
cost_result = override_costs(cost_result)
self.recommendation.append(
{
"phase": phase,
@ -84,9 +128,11 @@ class SolarPvRecommendations:
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
"already_installed": already_installed,
**cost_result,
# This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we scale
# back up here
"photo_supply": 100 * roof_coverage
"photo_supply": 100 * roof_coverage,
"has_battery": has_battery
}
)

View file

@ -50,7 +50,11 @@ class VentilationRecommendations(Definitions):
part = self.materials.copy()
estimated_cost = n_units * part[0]["cost"]
already_installed = "cavity_wall_insulation" in self.property.already_installed
estimated_cost = n_units * part[0]["cost"] if not already_installed else 0
labour_hours = 4 * n_units if not already_installed else 0
labour_days = 4 * n_units / 8.0 if not already_installed else 0
part[0]["total"] = estimated_cost
part[0]["quantity"] = n_units
@ -65,6 +69,7 @@ class VentilationRecommendations(Definitions):
"description": f"Install {n_units} {part[0]['description']} units",
"starting_u_value": None,
"new_u_value": None,
"already_installed": already_installed,
"sap_points": 0,
"heat_demand": 0,
"adjusted_heat_demand": 0,
@ -72,7 +77,7 @@ class VentilationRecommendations(Definitions):
"energy_cost_savings": 0,
"total": estimated_cost,
# We use a very simple and rough estimate of 4 hours per unit
"labour_hours": 4 * n_units,
"labour_days": 4 * n_units / 8.0 # Assume 8 hour day
"labour_hours": labour_hours,
"labour_days": labour_days # Assume 8 hour day
}
]

View file

@ -8,7 +8,7 @@ from backend.Property import Property
from BaseUtility import Definitions
from recommendations.recommendation_utils import (
r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns, update_lowest_selected_u_value,
get_recommended_part, get_wall_u_value
get_recommended_part, get_wall_u_value, override_costs
)
from recommendations.config import PARTIALLY_FILLED_PERCENTAGE_ASSUMPTION
from recommendations.Costs import Costs
@ -113,7 +113,9 @@ class WallRecommendations(Definitions):
insulation_thickness = self.property.walls["insulation_thickness"]
# We check if the wall is already insulated and if so, we exit
if (insulation_thickness in ["average", "above average"]) or self.property.walls["is_filled_cavity"]:
if ((insulation_thickness in ["average", "above average"]) or self.property.walls["is_filled_cavity"]) and (
"cavity_extract_and_refill" not in self.property.non_invasive_recommendations
):
return
if u_value:
@ -216,11 +218,26 @@ class WallRecommendations(Definitions):
if new_u_value <= self.BUILDING_REGULATIONS_PART_L_CAVITY_WALL_MAX_U_VALUE:
lowest_selected_u_value = update_lowest_selected_u_value(lowest_selected_u_value, new_u_value)
is_extraction_and_refill = "cavity_extract_and_refill" in self.property.non_invasive_recommendations
cost_result = self.costs.cavity_wall_insulation(
wall_area=self.property.insulation_wall_area,
material=material.to_dict(),
is_extraction_and_refill=is_extraction_and_refill
)
already_installed = "cavity_wall_insulation" in self.property.already_installed
if already_installed:
cost_result = override_costs(cost_result)
if is_extraction_and_refill:
description = f"Extract and refill cavity wall insulation with {material['description']}"
else:
description = self._make_description(material)
# updated the new u-value with the best possible our installers have
new_u_value = max(0.31, new_u_value)
recommendations.append(
{
"phase": phase,
@ -233,10 +250,11 @@ class WallRecommendations(Definitions):
)
],
"type": "cavity_wall_insulation",
"description": self._make_description(material),
"description": description,
"starting_u_value": u_value,
"new_u_value": new_u_value,
"sap_points": None,
"already_installed": already_installed,
**cost_result
}
)
@ -277,12 +295,19 @@ class WallRecommendations(Definitions):
material=material.to_dict(),
non_insulation_materials=non_insulation_materials
)
already_installed = "internal_wall_insulation" in self.property.already_installed
if already_installed:
cost_result = override_costs(cost_result)
elif material["type"] == "external_wall_insulation":
cost_result = self.costs.external_wall_insulation(
wall_area=self.property.insulation_wall_area,
material=material.to_dict(),
non_insulation_materials=non_insulation_materials
)
already_installed = "external_wall_insulation" in self.property.already_installed
if already_installed:
cost_result = override_costs(cost_result)
else:
raise ValueError("Invalid material type")
@ -301,6 +326,7 @@ class WallRecommendations(Definitions):
"description": self._make_description(material),
"starting_u_value": u_value,
"new_u_value": new_u_value,
"already_installed": already_installed,
"sap_points": None,
**cost_result
}

View file

@ -4,6 +4,7 @@ import numpy as np
from backend.Property import Property
from recommendations.Costs import Costs
from recommendation_utils import override_costs
class WindowsRecommendations:
@ -70,18 +71,23 @@ class WindowsRecommendations:
is_secondary_glazing=is_secondary_glazing
)
glazing_type = "secondary glazing" if is_secondary_glazing else "double glazing"
if self.property.windows["glazing_coverage"] in ["partial", "most"]:
description = f"Install {glazing_type} to the remaining windows"
already_installed = "windows_glazing" in self.property.already_installed
if already_installed:
cost_result = override_costs(cost_result)
description = "The property already has double glazing installed. No further action is required."
else:
description = f"Install {glazing_type} to all windows"
glazing_type = "secondary glazing" if is_secondary_glazing else "double glazing"
if self.property.windows["glazing_coverage"] in ["partial", "most"]:
description = f"Install {glazing_type} to the remaining windows"
else:
description = f"Install {glazing_type} to all windows"
if self.property.is_listed:
description += ". Secondary glazing recommended due to listed building status"
elif self.property.is_heritage:
description += ". Secondary glazing recommended due to herigate building status"
elif self.property.in_conservation_area:
description += ". Secondary glazing recommended due to conservation area status"
if self.property.is_listed:
description += ". Secondary glazing recommended due to listed building status"
elif self.property.is_heritage:
description += ". Secondary glazing recommended due to herigate building status"
elif self.property.in_conservation_area:
description += ". Secondary glazing recommended due to conservation area status"
self.recommendation = [
{
@ -92,6 +98,7 @@ class WindowsRecommendations:
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
"already_installed": already_installed,
**cost_result,
"is_secondary_glazing": is_secondary_glazing
}

View file

@ -1,17 +1,13 @@
def prepare_input_measures(property_recommendations, goal, housing_type):
def prepare_input_measures(property_recommendations, goal):
"""
Basic function to convert recommendations_to_upload to a format that is
suitable for the optimiser - large
:param property_recommendations: object containing the recommendations, created in the plan trigger api
:param goal: goal to be optimised for, should be one of the keys in gain_map. E.g. if the gain is SAP points,
the goal should reflect that desired gain
:param housing_type: type of housing the recommendations are for - should be one of "Social" or "Private"
:return: Nested list of input measures
"""
if housing_type not in ["Social", "Private"]:
raise ValueError("Invalid housing type - investigate me")
goal_map = {
"Increase EPC": "sap_points"
}
@ -20,12 +16,14 @@ def prepare_input_measures(property_recommendations, goal, housing_type):
if not goal_key:
raise NotImplementedError("Not implemented this gain type - investigate me")
# We don't include suspended and solid floor insulation as possible measures in private housing, because
# of the need to decant the tenant
ignored_measures = ["suspended_floor_insulation", "solid_floor_insulation"] if housing_type == "Private" else []
input_measures = []
for recs in property_recommendations:
if recs[0]["type"] == "solar_pv":
# if the recommendation is a solar recommendation without a battery, we exclude it from the optimisation.
# That will ensure that the optimiser only considers solar recommendations with batteries, so we don't
# under-report the potential cost
recs = [r for r in recs if r["has_battery"]]
input_measures.append(
[
{
@ -34,7 +32,7 @@ def prepare_input_measures(property_recommendations, goal, housing_type):
"gain": rec[goal_key],
"type": rec["type"]
}
for rec in recs if rec["type"] not in ignored_measures
for rec in recs
]
)

View file

@ -767,3 +767,15 @@ def check_simulation_difference(old_config, new_config):
differences = {key + "_ending": new_config[key] for key in new_config if old_config[key] != new_config[key]}
return differences
def override_costs(costs):
"""
If the method is overridden, we want to make sure that the costs are zero. This function sets the costs to zero
:param costs: Dictionary of costing, as returned by the Costs class
:return:
"""
for k in costs:
costs[k] = 0
return costs

View file

@ -0,0 +1,944 @@
import pandas as pd
import msgpack
from datetime import datetime
from utils.s3 import read_dataframe_from_s3_parquet, read_from_s3
from backend.Property import Property
from recommendations.HeatingRecommender import HeatingRecommender
from recommendations.Recommendations import Recommendations
from etl.epc.Record import EPCRecord
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
from backend.ml_models.api import ModelApi
def find_examples():
""" Some scrappy helper code to find EPC examples"""
# Let's look for some testing data, where the only thing different pre and post is the installation of an
# air source heat pump
data = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev",
file_key="sap_change_model/2024-03-24-15-51-13/dataset_no_cleaning.parquet"
)
# Firstly, take records where before there was no air source heat pump and afterwards there was
data = data[
data["has_air_source_heat_pump_ending"] & ~data["has_air_source_heat_pump"]
]
# Start with a property that has a boiler
data = data[data["has_boiler"]]
static_columns = [
# Walls
'walls_thermal_transmittance_ending',
'is_filled_cavity_ending',
'is_park_home_ending',
'walls_insulation_thickness_ending',
'external_insulation_ending',
'internal_insulation_ending',
# Floors
# 'floor_thermal_transmittance_ending', # Don't subset on this, because it changes based on floor area
'floor_insulation_thickness_ending',
# Roof
'roof_thermal_transmittance_ending',
'is_at_rafters_ending',
'roof_insulation_thickness_ending',
# Hot water - air source heat pump will shange the hot water system (probably from whatever it was -> main)
# 'heater_type_ending',
# 'system_type_ending',
# 'thermostat_characteristics_ending',
# 'heating_scope_ending',
# 'energy_recovery_ending',
# 'hotwater_tariff_type_ending',
# 'extra_features_ending',
# 'chp_systems_ending',
# 'distribution_system_ending',
# 'no_system_present_ending',
# 'appliance_ending',
# Heating - Will change when installing an ASHP
# 'has_radiators_ending',
# 'has_fan_coil_units_ending',
# 'has_pipes_in_screed_above_insulation_ending',
# 'has_pipes_in_insulated_timber_floor_ending',
# 'has_pipes_in_concrete_slab_ending',
# 'has_boiler_ending',
# 'has_air_source_heat_pump_ending', # We want the air source heat pump to change
# 'has_room_heaters_ending',
# 'has_electric_storage_heaters_ending',
# 'has_warm_air_ending',
# 'has_electric_underfloor_heating_ending',
# 'has_electric_ceiling_heating_ending',
# 'has_community_scheme_ending',
# 'has_ground_source_heat_pump_ending',
# 'has_no_system_present_ending',
# 'has_portable_electric_heaters_ending',
# 'has_water_source_heat_pump_ending',
# 'has_electric_heat_pump_ending',
# 'has_micro-cogeneration_ending',
# 'has_solar_assisted_heat_pump_ending',
# 'has_exhaust_source_heat_pump_ending',
# 'has_community_heat_pump_ending',
# 'has_electric_ending',
# 'has_mains_gas_ending',
# 'has_wood_logs_ending', 'has_coal_ending', 'has_oil_ending',
# 'has_wood_pellets_ending', 'has_anthracite_ending', 'has_dual_fuel_mineral_and_wood_ending',
# 'has_smokeless_fuel_ending', 'has_lpg_ending', 'has_b30k_ending', 'has_electricaire_ending',
# 'has_assumed_for_most_rooms_ending', 'has_underfloor_heating_ending',
# 'thermostatic_control_ending',
# 'charging_system_ending',
# 'switch_system_ending',
# 'no_control_ending',
# 'dhw_control_ending',
# 'community_heating_ending',
# 'multiple_room_thermostats_ending',
# 'auxiliary_systems_ending',
# 'trvs_ending',
# 'rate_control_ending',
# Window
'glazing_type_ending',
# Fuel - could change with ASHP
# 'fuel_type_ending',
# 'main-fuel_tariff_type_ending',
# 'is_community_ending',
# 'no_individual_heating_or_community_network_ending',
# 'complex_fuel_type_ending',
'mechanical_ventilation_ending', 'secondheat_description_ending', 'glazed_type_ending',
'multi_glaze_proportion_ending', 'low_energy_lighting_ending', 'number_open_fireplaces_ending',
'solar_water_heating_flag_ending',
'photo_supply_ending',
'energy_tariff_ending',
'extension_count_ending',
'total_floor_area_ending',
# 'hot_water_energy_eff_ending',
'floor_energy_eff_ending',
'windows_energy_eff_ending',
'walls_energy_eff_ending',
'sheating_energy_eff_ending',
'roof_energy_eff_ending',
# 'mainheat_energy_eff_ending',
# 'mainheatc_energy_eff_ending',
'lighting_energy_eff_ending',
'number_habitable_rooms_ending',
'number_heated_rooms_ending',
]
for col in static_columns:
base_starting = col.split("_ending")[0]
if base_starting + "_starting" in data.columns:
starting_col = base_starting + "_starting"
else:
starting_col = base_starting
# Filter
print("Column: %s" % col)
print("Starting size: %s" % data.shape[0])
data = data[data[starting_col] == data[col]]
print("Ending size: %s" % data.shape[0])
z = data[['uprn', col, starting_col]]
# Great example UPRNs
# 100030969273
# 10034685399 - Completely transforms the heating and hot water systems in the home (goes from oil -> electricity)
# 100091200828 - goes from a liquid petroleum gas boiler to ashp
# Look for starting with a gas boiler
data[
data["has_boiler"] & data["has_radiators"] & data["has_mains_gas"] & ~data["has_boiler_ending"]
]
# UPRN: 100011776843
class TestAirSourceHeatPump:
def test_eligible(self):
# This tests a house, which will be suitable for an air source heat pump
epc_record = EPCRecord()
epc_record.prepared_epc = {
"county": "Broxbourne",
"mainheat-energy-eff": "Good",
"hot-water-energy-eff": "Good",
"mainheatc-energy-eff": "Good",
"number-heated-rooms": 5,
"property-type": "House",
"built-form": "Semi-Detached"
}
property_instance = Property(id=0, address="fake", postcode="fake", epc_record=epc_record)
property_instance.main_heating = {
'original_description': 'Boiler and radiators, mains gas',
"clean_description": "Boiler and radiators, mains gas",
'has_radiators': True,
'has_fan_coil_units': False, 'has_pipes_in_screed_above_insulation': False,
'has_pipes_in_insulated_timber_floor': False, 'has_pipes_in_concrete_slab': False, 'has_boiler': True,
'has_air_source_heat_pump': False,
'has_room_heaters': False, 'has_electric_storage_heaters': False,
'has_warm_air': False,
'has_electric_underfloor_heating': False,
'has_electric_ceiling_heating': False, 'has_community_scheme': False,
'has_ground_source_heat_pump': False, 'has_no_system_present': False,
'has_portable_electric_heaters': False,
'has_water_source_heat_pump': False, 'has_electric': False,
'has_mains_gas': True, 'has_wood_logs': False,
'has_coal': False, 'has_oil': False, 'has_wood_pellets': False,
'has_anthracite': False,
'has_dual_fuel_mineral_and_wood': False, 'has_smokeless_fuel': False,
'has_lpg': False, 'has_assumed': False,
'has_electricaire': False, 'has_assumed_for_most_rooms': False,
'has_underfloor_heating': False,
"has_electric_heat_pumps": False,
"has_micro-cogeneration": False
}
property_instance.main_fuel = {
'original_description': 'mains gas (not community)', 'fuel_type': 'mains gas',
'tariff_type': None,
'is_community': False, 'no_individual_heating_or_community_network': False,
'complex_fuel_type': None
}
property_instance.hotwater = {
'original_description': 'From main system',
'clean_description': 'From main system',
'heater_type': None,
'system_type': 'from main system',
'thermostat_characteristics': None, 'heating_scope': None,
'energy_recovery': None, 'tariff_type': None,
'extra_features': None, 'chp_systems': None, 'distribution_system': None,
'no_system_present': None,
'assumed': False, "appliance": None
}
property_instance.main_heating_controls = {
'original_description': 'Programmer, room thermostat and TRVs',
'thermostatic_control': 'room thermostat', 'charging_system': None, 'switch_system': 'programmer',
'no_control': None, 'dhw_control': None, 'community_heating': None, 'multiple_room_thermostats': False,
'auxiliary_systems': None, 'trvs': 'trvs', 'rate_control': None
}
recommender = HeatingRecommender(property_instance=property_instance)
assert not recommender.heating_recommendations
recommender.recommend(phase=0)
assert recommender.recommendation is None
def test_air_source_heat_pump_gas_boiler_starting(self):
starting_epc = {
'low-energy-fixed-light-count': '', 'address': '430 Gidlow Lane', 'uprn-source': 'Energy Assessor',
'floor-height': '2.62', 'heating-cost-potential': '599', 'unheated-corridor-length': '',
'hot-water-cost-potential': '67', 'construction-age-band': 'England and Wales: 1950-1966',
'potential-energy-rating': 'C', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Good',
'lighting-energy-eff': 'Very Good', 'environment-impact-potential': '72',
'glazed-type': 'double glazing installed during or after 2002', 'heating-cost-current': '913',
'address3': '', 'mainheatcont-description': 'Programmer, no room thermostat', 'sheating-energy-eff': 'N/A',
'property-type': 'House', 'local-authority-label': 'Wigan', 'fixed-lighting-outlets-count': '9',
'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '210',
'county': '', 'postcode': 'WN6 8RG', 'solar-water-heating-flag': 'N', 'constituency': 'E14001039',
'co2-emissions-potential': '2.6', 'number-heated-rooms': '4',
'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '180',
'local-authority': 'E08000010', 'built-form': 'Mid-Terrace', 'number-open-fireplaces': '0',
'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2022-02-15',
'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '78', 'address1': '430 Gidlow Lane',
'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Wigan',
'roof-energy-eff': 'Very Poor', 'total-floor-area': '80.0', 'building-reference-number': '10002334112',
'environment-impact-current': '38', 'co2-emissions-current': '6.2',
'roof-description': 'Pitched, no insulation (assumed)', 'floor-energy-eff': 'N/A',
'number-habitable-rooms': '4', 'address2': '', 'hot-water-env-eff': 'Poor', 'posttown': 'WIGAN',
'mainheatc-energy-eff': 'Very Poor', 'main-fuel': 'mains gas (not community)',
'lighting-env-eff': 'Very Good', 'windows-energy-eff': 'Good', 'floor-env-eff': 'N/A',
'sheating-env-eff': 'N/A', 'lighting-description': 'Low energy lighting in all fixed outlets',
'roof-env-eff': 'Very Poor', 'walls-energy-eff': 'Average', 'photo-supply': '0.0',
'lighting-cost-potential': '67', 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100',
'main-heating-controls': '', 'lodgement-datetime': '2022-02-23 16:39:41', 'flat-top-storey': '',
'current-energy-rating': 'E', 'secondheat-description': 'Room heaters, mains gas',
'walls-env-eff': 'Average', 'transaction-type': 'ECO assessment', 'uprn': '100011776843',
'current-energy-efficiency': '45', 'energy-consumption-current': '441',
'mainheat-description': 'Boiler and radiators, mains gas', 'lighting-cost-current': '67',
'lodgement-date': '2022-02-23', 'extension-count': '1', 'mainheatc-env-eff': 'Very Poor',
'lmk-key': '46cb404438a6d88ddff8965cab8b3027ec15c32d93e0b6a5f0381a5109b9bb0d', 'wind-turbine-count': '0',
'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '77',
'hot-water-energy-eff': 'Poor', 'low-energy-lighting': '100',
'walls-description': 'Cavity wall, filled cavity',
'hotwater-description': 'From main system, no cylinder thermostat'
}
ending_epc = {
'low-energy-fixed-light-count': '', 'address': '430 Gidlow Lane', 'uprn-source': 'Energy Assessor',
'floor-height': '2.62', 'heating-cost-potential': '803', 'unheated-corridor-length': '',
'hot-water-cost-potential': '292', 'construction-age-band': 'England and Wales: 1950-1966',
'potential-energy-rating': 'C', 'mainheat-energy-eff': 'Very Good', 'windows-env-eff': 'Good',
'lighting-energy-eff': 'Very Good', 'environment-impact-potential': '78',
'glazed-type': 'double glazing installed during or after 2002', 'heating-cost-current': '861',
'address3': '', 'mainheatcont-description': 'Time and temperature zone control',
'sheating-energy-eff': 'N/A', 'property-type': 'House', 'local-authority-label': 'Wigan',
'fixed-lighting-outlets-count': '9', 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural',
'hot-water-cost-current': '434', 'county': '', 'postcode': 'WN6 8RG', 'solar-water-heating-flag': 'N',
'constituency': 'E14001039', 'co2-emissions-potential': '2.0', 'number-heated-rooms': '4',
'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '147',
'local-authority': 'E08000010', 'built-form': 'Mid-Terrace', 'number-open-fireplaces': '0',
'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2022-05-11',
'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '43', 'address1': '430 Gidlow Lane',
'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Wigan',
'roof-energy-eff': 'Very Poor', 'total-floor-area': '80.0', 'building-reference-number': '10002334112',
'environment-impact-current': '63', 'co2-emissions-current': '3.4',
'roof-description': 'Pitched, no insulation (assumed)', 'floor-energy-eff': 'N/A',
'number-habitable-rooms': '4', 'address2': '', 'hot-water-env-eff': 'Poor', 'posttown': 'WIGAN',
'mainheatc-energy-eff': 'Very Good', 'main-fuel': 'electricity (not community)',
'lighting-env-eff': 'Very Good', 'windows-energy-eff': 'Good', 'floor-env-eff': 'N/A',
'sheating-env-eff': 'N/A', 'lighting-description': 'Low energy lighting in all fixed outlets',
'roof-env-eff': 'Very Poor', 'walls-energy-eff': 'Average', 'photo-supply': '0.0',
'lighting-cost-potential': '67', 'mainheat-env-eff': 'Very Good', 'multi-glaze-proportion': '100',
'main-heating-controls': '', 'lodgement-datetime': '2022-06-06 13:01:20', 'flat-top-storey': '',
'current-energy-rating': 'E', 'secondheat-description': 'Room heaters, mains gas',
'walls-env-eff': 'Average', 'transaction-type': 'ECO assessment', 'uprn': '100011776843',
'current-energy-efficiency': '53', 'energy-consumption-current': '252',
'mainheat-description': 'Air source heat pump, radiators, electric', 'lighting-cost-current': '67',
'lodgement-date': '2022-06-06', 'extension-count': '1', 'mainheatc-env-eff': 'Very Good',
'lmk-key': '672d5947f3d4a55d97255af71651d6127a939418fa66a687070af77e0ba90df2', 'wind-turbine-count': '0',
'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '70',
'hot-water-energy-eff': 'Very Poor', 'low-energy-lighting': '100',
'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system'
}
# differences = []
# for k, v in ending_epc.items():
# if v != starting_epc[k]:
# differences.append(
# {
# "variable": k,
# "starting_value": starting_epc[k],
# "ending_value": v
# }
# )
# differences = pd.DataFrame(differences)
#
# diffs = differences[
# differences["variable"].isin(
# [
# "mainheat-energy-eff",
# "mainheatcont-description",
# "mainheatc-energy-eff",
# "main-fuel",
# "mainheat-env-eff",
# "mainheat-description",
# "hot-water-energy-eff",
# "hotwater-description"
# ]
# )
# ]
cleaning_data = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
)
cleaned = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-dev"
)
cleaned = msgpack.unpackb(cleaned, raw=False)
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
epc = EPCRecord(
epc_records={
'original_epc': starting_epc,
'full_sap_epc': {},
'old_data': []
},
run_mode="newdata",
cleaning_data=cleaning_data
)
home = Property(
id=0,
address="",
postcode="",
epc_record=epc,
already_installed={},
non_invasive_recommendations={},
)
home.in_conservation_area = False
home.is_listed = False
home.is_heritage = False
home.restricted_measures = True
home.get_components(
cleaned=cleaned,
photo_supply_lookup=photo_supply_lookup,
floor_area_decile_thresholds=floor_area_decile_thresholds
)
recommender = HeatingRecommender(property_instance=home)
recommender.recommend_air_source_heat_pump(phase=0, has_cavity_or_loft_recommendations=False)
# Patch - for this property, the hot water energy efficiency is very poor. it's not clear why this is,
# but we insert this for this test
recommender.heating_recommendations[0]["simulation_config"]["hot_water_energy_eff_ending"] = "Very Poor"
property_recommendations = Recommendations.insert_temp_recommendation_id([recommender.heating_recommendations])
assert len(recommender.heating_recommendations) == 1
home.create_base_difference_epc_record(cleaned_lookup=cleaned)
home.adjust_difference_record_with_recommendations(
property_recommendations, []
)
scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop(
columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
"carbon_ending"]
)
model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
model_api.MODEL_PREFIXES = ["sap_change_predictions"]
predictions_dict = model_api.predict_all(
df=scoring_data,
bucket="retrofit-data-dev",
prediction_buckets={
"sap_change_predictions": "retrofit-sap-predictions-dev",
}
)
assert predictions_dict["sap_change_predictions"]["predictions"].values[0] == 52.2
def test_air_source_heat_pump_gas_boiler_starting_2(self):
"""
This property seems to have miniscule movement in SAP - just 2 poins
:return:
"""
starting_epc = {
'low-energy-fixed-light-count': '', 'address': '31 Whinney Hill Park', 'uprn-source': 'Energy Assessor',
'floor-height': '2.3', 'heating-cost-potential': '394', 'unheated-corridor-length': '',
'hot-water-cost-potential': '48', 'construction-age-band': 'England and Wales: 1967-1975',
'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average',
'lighting-energy-eff': 'Good', 'environment-impact-potential': '87',
'glazed-type': 'double glazing, unknown install date', 'heating-cost-current': '487', 'address3': '',
'mainheatcont-description': 'Programmer, room thermostat and TRVs', 'sheating-energy-eff': 'N/A',
'property-type': 'Bungalow', 'local-authority-label': 'Calderdale', 'fixed-lighting-outlets-count': '5',
'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '86',
'county': '', 'postcode': 'HD6 2PX', 'solar-water-heating-flag': 'N', 'constituency': 'E14000614',
'co2-emissions-potential': '0.8', 'number-heated-rooms': '2',
'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '105',
'local-authority': 'E08000033', 'built-form': 'End-Terrace', 'number-open-fireplaces': '0',
'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2021-11-25',
'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '56', 'address1': '31 Whinney Hill Park',
'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Calder Valley',
'roof-energy-eff': 'Good', 'total-floor-area': '44.0', 'building-reference-number': '10001772583',
'environment-impact-current': '62', 'co2-emissions-current': '2.5',
'roof-description': 'Pitched, 250 mm loft insulation', 'floor-energy-eff': 'N/A',
'number-habitable-rooms': '2', 'address2': '', 'hot-water-env-eff': 'Good', 'posttown': 'BRIGHOUSE',
'mainheatc-energy-eff': 'Good', 'main-fuel': 'mains gas (not community)', 'lighting-env-eff': 'Good',
'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
'lighting-description': 'Low energy lighting in 60% of fixed outlets', 'roof-env-eff': 'Good',
'walls-energy-eff': 'Average', 'photo-supply': '0.0', 'lighting-cost-potential': '40',
'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
'lodgement-datetime': '2021-11-25 11:39:35', 'flat-top-storey': '', 'current-energy-rating': 'D',
'secondheat-description': 'Room heaters, electric', 'walls-env-eff': 'Average',
'transaction-type': 'rental', 'uprn': '100051304421', 'current-energy-efficiency': '62',
'energy-consumption-current': '322', 'mainheat-description': 'Boiler and radiators, mains gas',
'lighting-cost-current': '56', 'lodgement-date': '2021-11-25', 'extension-count': '0',
'mainheatc-env-eff': 'Good', 'lmk-key': '077f70657e9c3f1f0ce5392798398398616b159493b2a8ca2338961596631c27',
'wind-turbine-count': '0', 'tenure': 'Rented (social)', 'floor-level': '',
'potential-energy-efficiency': '86', 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '60',
'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system'
}
ending_epc = {
'low-energy-fixed-light-count': '', 'address': '31 Whinney Hill Park',
'uprn-source': 'Energy Assessor', 'floor-height': '2.3', 'heating-cost-potential': '277',
'unheated-corridor-length': '', 'hot-water-cost-potential': '266',
'construction-age-band': 'England and Wales: 1967-1975', 'potential-energy-rating': 'B',
'mainheat-energy-eff': 'Very Good', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Good',
'environment-impact-potential': '90', 'glazed-type': 'double glazing, unknown install date',
'heating-cost-current': '331', 'address3': '',
'mainheatcont-description': 'Programmer and room thermostat', 'sheating-energy-eff': 'N/A',
'property-type': 'Bungalow', 'local-authority-label': 'Calderdale',
'fixed-lighting-outlets-count': '5', 'energy-tariff': 'Single',
'mechanical-ventilation': 'natural', 'hot-water-cost-current': '404', 'county': '',
'postcode': 'HD6 2PX', 'solar-water-heating-flag': 'N', 'constituency': 'E14000614',
'co2-emissions-potential': '0.7', 'number-heated-rooms': '2',
'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '92',
'local-authority': 'E08000033', 'built-form': 'End-Terrace', 'number-open-fireplaces': '0',
'windows-description': 'Fully double glazed', 'glazed-area': 'Normal',
'inspection-date': '2021-11-25', 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '48',
'address1': '31 Whinney Hill Park', 'heat-loss-corridor': '', 'flat-storey-count': '',
'constituency-label': 'Calder Valley', 'roof-energy-eff': 'Good', 'total-floor-area': '44.0',
'building-reference-number': '10001772583', 'environment-impact-current': '68',
'co2-emissions-current': '2.1', 'roof-description': 'Pitched, 250 mm loft insulation',
'floor-energy-eff': 'N/A', 'number-habitable-rooms': '2', 'address2': '',
'hot-water-env-eff': 'Poor', 'posttown': 'BRIGHOUSE', 'mainheatc-energy-eff': 'Average',
'main-fuel': 'electricity (not community)', 'lighting-env-eff': 'Good',
'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
'lighting-description': 'Low energy lighting in 60% of fixed outlets', 'roof-env-eff': 'Good',
'walls-energy-eff': 'Average', 'photo-supply': '0.0', 'lighting-cost-potential': '40',
'mainheat-env-eff': 'Very Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
'lodgement-datetime': '2022-03-23 16:06:21', 'flat-top-storey': '', 'current-energy-rating': 'D',
'secondheat-description': 'Room heaters, electric', 'walls-env-eff': 'Average',
'transaction-type': 'rental', 'uprn': '100051304421', 'current-energy-efficiency': '64',
'energy-consumption-current': '283',
'mainheat-description': 'Air source heat pump, radiators, electric',
'lighting-cost-current': '57', 'lodgement-date': '2022-03-23', 'extension-count': '0',
'mainheatc-env-eff': 'Average',
'lmk-key': '6296248141447b53426a40f1c39da17dad5f4786485db55ee38737891111a4d4',
'wind-turbine-count': '0', 'tenure': 'Rented (social)', 'floor-level': '',
'potential-energy-efficiency': '89', 'hot-water-energy-eff': 'Very Poor',
'low-energy-lighting': '60', 'walls-description': 'Cavity wall, filled cavity',
'hotwater-description': 'From main system'
}
# differences = []
# for k, v in ending_epc.items():
# if v != starting_epc[k]:
# differences.append(
# {
# "variable": k,
# "starting_value": starting_epc[k],
# "ending_value": v
# }
# )
# differences = pd.DataFrame(differences)
#
# diffs = differences[
# differences["variable"].isin(
# [
# "mainheat-energy-eff",
# "mainheatcont-description",
# "mainheatc-energy-eff",
# "main-fuel",
# "mainheat-env-eff",
# "mainheat-description",
# "hot-water-energy-eff",
# "hotwater-description"
# ]
# )
# ]
cleaning_data = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
)
cleaned = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-dev"
)
cleaned = msgpack.unpackb(cleaned, raw=False)
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
epc = EPCRecord(
epc_records={
'original_epc': starting_epc,
'full_sap_epc': {},
'old_data': []
},
run_mode="newdata",
cleaning_data=cleaning_data
)
home = Property(
id=0,
address="",
postcode="",
epc_record=epc,
already_installed={},
non_invasive_recommendations={},
)
home.in_conservation_area = False
home.is_listed = False
home.is_heritage = False
home.restricted_measures = True
home.get_components(
cleaned=cleaned,
photo_supply_lookup=photo_supply_lookup,
floor_area_decile_thresholds=floor_area_decile_thresholds
)
recommender = HeatingRecommender(property_instance=home)
recommender.recommend_air_source_heat_pump(phase=0, has_cavity_or_loft_recommendations=False)
property_recommendations = Recommendations.insert_temp_recommendation_id([recommender.heating_recommendations])
assert len(recommender.heating_recommendations) == 1
home.create_base_difference_epc_record(cleaned_lookup=cleaned)
home.adjust_difference_record_with_recommendations(
property_recommendations, []
)
scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop(
columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
"carbon_ending"]
)
model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
model_api.MODEL_PREFIXES = ["sap_change_predictions"]
predictions_dict = model_api.predict_all(
df=scoring_data,
bucket="retrofit-data-dev",
prediction_buckets={
"sap_change_predictions": "retrofit-sap-predictions-dev",
}
)
assert predictions_dict["sap_change_predictions"]["predictions"].values[0] == 69.3
# In actuality with this property, the heating controls get downgraded, so we test a manual patch of this
patched_simulation_config = {
'mainheat_energy_eff_ending': "Very Good",
'hot_water_energy_eff_ending': 'Very Poor',
'has_boiler_ending': False,
'has_air_source_heat_pump_ending': True,
'has_electric_ending': True,
'has_mains_gas_ending': False,
'fuel_type_ending': 'electricity',
'trvs_ending': None,
"mainheatc_energy_eff_ending": 'Average'
}
# PATCHING
property_recommendations_patch = Recommendations.insert_temp_recommendation_id(
[recommender.heating_recommendations]
)
property_recommendations_patch[0][0]["simulation_config"] = patched_simulation_config
home.create_base_difference_epc_record(cleaned_lookup=cleaned)
home.adjust_difference_record_with_recommendations(
property_recommendations_patch, []
)
scoring_data_patch = pd.DataFrame(home.recommendations_scoring_data).drop(
columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
"carbon_ending"]
)
model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
model_api.MODEL_PREFIXES = ["sap_change_predictions"]
predictions_dict_patch = model_api.predict_all(
df=scoring_data_patch,
bucket="retrofit-data-dev",
prediction_buckets={
"sap_change_predictions": "retrofit-sap-predictions-dev",
}
)
# The error is only 0.3, so the model is working
assert predictions_dict_patch["sap_change_predictions"]["predictions"].values[0] == 64.3
assert ending_epc["current-energy-efficiency"] == '64'
def test_air_source_heat_pump_lpg_boiler(self):
starting_epc = {
'low-energy-fixed-light-count': '', 'address': 'Holly Lodge, The Drive, Perry',
'uprn-source': 'Energy Assessor', 'floor-height': '2.8', 'heating-cost-potential': '1628',
'unheated-corridor-length': '', 'hot-water-cost-potential': '175',
'construction-age-band': 'England and Wales: 1950-1966', 'potential-energy-rating': 'D',
'mainheat-energy-eff': 'Poor', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Average',
'environment-impact-potential': '70', 'glazed-type': 'double glazing, unknown install date',
'heating-cost-current': '2158', 'address3': 'Perry',
'mainheatcont-description': 'No time or thermostatic control of room temperature',
'sheating-energy-eff': 'N/A', 'property-type': 'Bungalow', 'local-authority-label': 'Huntingdonshire',
'fixed-lighting-outlets-count': '12', 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural',
'hot-water-cost-current': '257', 'county': 'Cambridgeshire', 'postcode': 'PE28 0SX',
'solar-water-heating-flag': 'N', 'constituency': 'E14000757', 'co2-emissions-potential': '3.3',
'number-heated-rooms': '5', 'floor-description': 'Solid, no insulation (assumed)',
'energy-consumption-potential': '128', 'local-authority': 'E07000011', 'built-form': 'Semi-Detached',
'number-open-fireplaces': '0', 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal',
'inspection-date': '2023-08-31', 'mains-gas-flag': 'N', 'co2-emiss-curr-per-floor-area': '51',
'address1': 'Holly Lodge', 'heat-loss-corridor': '', 'flat-storey-count': '',
'constituency-label': 'Huntingdon', 'roof-energy-eff': 'Good', 'total-floor-area': '117.0',
'building-reference-number': '10005199915', 'environment-impact-current': '50',
'co2-emissions-current': '5.9', 'roof-description': 'Pitched, 270 mm loft insulation',
'floor-energy-eff': 'N/A', 'number-habitable-rooms': '5', 'address2': 'The Drive',
'hot-water-env-eff': 'Good', 'posttown': 'HUNTINGDON', 'mainheatc-energy-eff': 'Very Poor',
'main-fuel': 'LPG (not community)', 'lighting-env-eff': 'Average', 'windows-energy-eff': 'Average',
'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
'lighting-description': 'Low energy lighting in 33% of fixed outlets', 'roof-env-eff': 'Good',
'walls-energy-eff': 'Average', 'photo-supply': '0.0', 'lighting-cost-potential': '166',
'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
'lodgement-datetime': '2023-10-30 13:46:54', 'flat-top-storey': '', 'current-energy-rating': 'F',
'secondheat-description': 'Room heaters, electric', 'walls-env-eff': 'Average',
'transaction-type': 'ECO assessment', 'uprn': '100091200828', 'current-energy-efficiency': '32',
'energy-consumption-current': '243', 'mainheat-description': 'Boiler and radiators, LPG',
'lighting-cost-current': '277', 'lodgement-date': '2023-10-30', 'extension-count': '0',
'mainheatc-env-eff': 'Very Poor',
'lmk-key': 'f1d3bd4b8b50bc9b006231ccb158537c408523b748b3f4ef7e98cd03b144afa5', 'wind-turbine-count': '0',
'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '56',
'hot-water-energy-eff': 'Poor', 'low-energy-lighting': '33',
'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system'
}
ending_epc = {
'low-energy-fixed-light-count': '', 'address': 'Holly Lodge, The Drive, Perry',
'uprn-source': 'Energy Assessor', 'floor-height': '2.8', 'heating-cost-potential': '917',
'unheated-corridor-length': '', 'hot-water-cost-potential': '328',
'construction-age-band': 'England and Wales: 1950-1966', 'potential-energy-rating': 'A',
'mainheat-energy-eff': 'Very Good', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Average',
'environment-impact-potential': '96', 'glazed-type': 'double glazing, unknown install date',
'heating-cost-current': '1098', 'address3': 'Perry',
'mainheatcont-description': 'Programmer, TRVs and bypass', 'sheating-energy-eff': 'N/A',
'property-type': 'Bungalow', 'local-authority-label': 'Huntingdonshire',
'fixed-lighting-outlets-count': '12', 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural',
'hot-water-cost-current': '328', 'county': 'Cambridgeshire', 'postcode': 'PE28 0SX',
'solar-water-heating-flag': 'N', 'constituency': 'E14000757', 'co2-emissions-potential': '0.3',
'number-heated-rooms': '5', 'floor-description': 'Solid, no insulation (assumed)',
'energy-consumption-potential': '16', 'local-authority': 'E07000011', 'built-form': 'Semi-Detached',
'number-open-fireplaces': '0', 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal',
'inspection-date': '2023-10-05', 'mains-gas-flag': 'N', 'co2-emiss-curr-per-floor-area': '6',
'address1': 'Holly Lodge', 'heat-loss-corridor': '', 'flat-storey-count': '',
'constituency-label': 'Huntingdon', 'roof-energy-eff': 'Good', 'total-floor-area': '117.0',
'building-reference-number': '10005199915', 'environment-impact-current': '92',
'co2-emissions-current': '0.7', 'roof-description': 'Pitched, 270 mm loft insulation',
'floor-energy-eff': 'N/A', 'number-habitable-rooms': '5', 'address2': 'The Drive',
'hot-water-env-eff': 'Very Good', 'posttown': 'HUNTINGDON', 'mainheatc-energy-eff': 'Average',
'main-fuel': 'electricity (not community)', 'lighting-env-eff': 'Average', 'windows-energy-eff': 'Average',
'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
'lighting-description': 'Low energy lighting in 33% of fixed outlets', 'roof-env-eff': 'Good',
'walls-energy-eff': 'Average', 'photo-supply': '', 'lighting-cost-potential': '166',
'mainheat-env-eff': 'Very Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
'lodgement-datetime': '2023-11-01 16:29:16', 'flat-top-storey': '', 'current-energy-rating': 'A',
'secondheat-description': 'Room heaters, electric', 'walls-env-eff': 'Average',
'transaction-type': 'ECO assessment', 'uprn': '100091200828', 'current-energy-efficiency': '92',
'energy-consumption-current': '37', 'mainheat-description': 'Air source heat pump, radiators, electric',
'lighting-cost-current': '277', 'lodgement-date': '2023-11-01', 'extension-count': '0',
'mainheatc-env-eff': 'Average',
'lmk-key': 'cb7f2838b727907767c8c2a385cd22f722b1e4745463391d910d228e52124515', 'wind-turbine-count': '0',
'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '95',
'hot-water-energy-eff': 'Good', 'low-energy-lighting': '33',
'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system'
}
cleaning_data = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
)
cleaned = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-dev"
)
cleaned = msgpack.unpackb(cleaned, raw=False)
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
epc = EPCRecord(
epc_records={
'original_epc': starting_epc,
'full_sap_epc': {},
'old_data': []
},
run_mode="newdata",
cleaning_data=cleaning_data
)
home = Property(
id=0,
address="",
postcode="",
epc_record=epc,
already_installed={},
non_invasive_recommendations={},
)
home.in_conservation_area = False
home.is_listed = False
home.is_heritage = False
home.restricted_measures = True
home.get_components(
cleaned=cleaned,
photo_supply_lookup=photo_supply_lookup,
floor_area_decile_thresholds=floor_area_decile_thresholds
)
recommender = HeatingRecommender(property_instance=home)
recommender.recommend_air_source_heat_pump(phase=0, has_cavity_or_loft_recommendations=False)
property_recommendations = Recommendations.insert_temp_recommendation_id([recommender.heating_recommendations])
assert len(recommender.heating_recommendations) == 1
home.create_base_difference_epc_record(cleaned_lookup=cleaned)
home.adjust_difference_record_with_recommendations(
property_recommendations, []
)
scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop(
columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
"carbon_ending"]
)
model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
model_api.MODEL_PREFIXES = ["sap_change_predictions"]
predictions_dict = model_api.predict_all(
df=scoring_data,
bucket="retrofit-data-dev",
prediction_buckets={
"sap_change_predictions": "retrofit-sap-predictions-dev",
}
)
# We predict a huge uplift but not quite as much as the EPC, due to some distinct differences between our
# recommendation and the EPC
assert predictions_dict["sap_change_predictions"]["predictions"].values[0] == 81.3
assert ending_epc['current-energy-efficiency'] == '92'
# PATCH
# We patch the simulation config, to reflect the ending EPC, to see if we get the ending EPC's config
patched_simulation_config = {
'mainheat_energy_eff_ending': "Very Good",
'hot_water_energy_eff_ending': 'Good',
'has_boiler_ending': False,
'has_air_source_heat_pump_ending': True,
'has_electric_ending': True,
'has_lpg_ending': False,
'fuel_type_ending': 'electricity',
'switch_system_ending': 'programmer',
'no_control_ending': None,
'auxiliary_systems_ending': 'bypass',
'trvs_ending': 'trvs',
"mainheatc_energy_eff_ending": 'Average'
}
# PATCHING
property_recommendations_patch = Recommendations.insert_temp_recommendation_id(
[recommender.heating_recommendations]
)
property_recommendations_patch[0][0]["simulation_config"] = patched_simulation_config
home.create_base_difference_epc_record(cleaned_lookup=cleaned)
home.adjust_difference_record_with_recommendations(
property_recommendations_patch, []
)
scoring_data_patch = pd.DataFrame(home.recommendations_scoring_data).drop(
columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
"carbon_ending"]
)
model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
model_api.MODEL_PREFIXES = ["sap_change_predictions"]
predictions_dict_patch = model_api.predict_all(
df=scoring_data_patch,
bucket="retrofit-data-dev",
prediction_buckets={
"sap_change_predictions": "retrofit-sap-predictions-dev",
}
)
assert predictions_dict_patch["sap_change_predictions"]["predictions"].values[0] == 88.9
# We still underpredict but the improvement is notable
def test_offgrid(self):
"""
We test on a property we've worked with before, where we compare two options
a) Upgrading to a boiler
b) Upgrading to a heat pump
:return:
"""
starting_epc = {
'low-energy-fixed-light-count': '', 'address': '6 Beech Road', 'uprn-source': 'Energy Assessor',
'floor-height': '2.4', 'heating-cost-potential': '612', 'unheated-corridor-length': '',
'hot-water-cost-potential': '123', 'construction-age-band': 'England and Wales: 1930-1949',
'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Very Poor', 'windows-env-eff': 'Good',
'lighting-energy-eff': 'Good', 'environment-impact-potential': '87',
'glazed-type': 'double glazing installed during or after 2002', 'heating-cost-current': '2278',
'address3': '', 'mainheatcont-description': 'Appliance thermostats', 'sheating-energy-eff': 'N/A',
'property-type': 'House', 'local-authority-label': 'Dudley', 'fixed-lighting-outlets-count': '9',
'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '604',
'county': '', 'postcode': 'DY1 4BP', 'solar-water-heating-flag': 'N', 'constituency': 'E14000671',
'co2-emissions-potential': '1.0', 'number-heated-rooms': '4',
'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '93',
'local-authority': 'E08000027', 'built-form': 'End-Terrace', 'number-open-fireplaces': '0',
'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2024-03-13',
'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '83', 'address1': '6 Beech Road',
'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Dudley North',
'roof-energy-eff': 'Very Poor', 'total-floor-area': '60.0', 'building-reference-number': '10005780080',
'environment-impact-current': '41', 'co2-emissions-current': '5.0',
'roof-description': 'Pitched, 12 mm loft insulation', 'floor-energy-eff': 'N/A',
'number-habitable-rooms': '4', 'address2': '', 'hot-water-env-eff': 'Poor', 'posttown': 'DUDLEY',
'mainheatc-energy-eff': 'Good', 'main-fuel': 'electricity (not community)', 'lighting-env-eff': 'Good',
'windows-energy-eff': 'Good', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
'lighting-description': 'Low energy lighting in 67% of fixed outlets', 'roof-env-eff': 'Very Poor',
'walls-energy-eff': 'Average', 'photo-supply': '0.0', 'lighting-cost-potential': '113',
'mainheat-env-eff': 'Poor', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
'lodgement-datetime': '2024-03-13 11:29:11', 'flat-top-storey': '', 'current-energy-rating': 'F',
'secondheat-description': 'None', 'walls-env-eff': 'Average', 'transaction-type': 'rental',
'uprn': '90055152', 'current-energy-efficiency': '32', 'energy-consumption-current': '491',
'mainheat-description': 'Room heaters, electric', 'lighting-cost-current': '113',
'lodgement-date': '2024-03-13', 'extension-count': '1', 'mainheatc-env-eff': 'Good',
'lmk-key': '78ddf851b660e599a0894924d0e6b503980f5e0ad1aa711f8411718dc2989c44', 'wind-turbine-count': '0',
'tenure': 'Rented (social)', 'floor-level': '', 'potential-energy-efficiency': '87',
'hot-water-energy-eff': 'Very Poor', 'low-energy-lighting': '67',
'walls-description': 'Cavity wall, filled cavity',
'hotwater-description': 'Electric immersion, standard tariff'
}
cleaning_data = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
)
cleaned = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-dev"
)
cleaned = msgpack.unpackb(cleaned, raw=False)
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
epc = EPCRecord(
epc_records={
'original_epc': starting_epc,
'full_sap_epc': {},
'old_data': []
},
run_mode="newdata",
cleaning_data=cleaning_data
)
home = Property(
id=0,
address="",
postcode="",
epc_record=epc,
already_installed={},
non_invasive_recommendations={},
)
home.in_conservation_area = False
home.is_listed = False
home.is_heritage = False
home.restricted_measures = True
home.get_components(
cleaned=cleaned,
photo_supply_lookup=photo_supply_lookup,
floor_area_decile_thresholds=floor_area_decile_thresholds
)
recommender = HeatingRecommender(property_instance=home)
recommender.recommend_air_source_heat_pump(phase=0, has_cavity_or_loft_recommendations=False)
recommender.recommend_boiler_upgrades(phase=0, system_change=True, exising_room_heaters=False)
assert len(recommender.heating_recommendations) == 3
property_recommendations = Recommendations.insert_temp_recommendation_id([recommender.heating_recommendations])
home.create_base_difference_epc_record(cleaned_lookup=cleaned)
home.adjust_difference_record_with_recommendations(
property_recommendations, []
)
scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop(
columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
"carbon_ending"]
)
model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
model_api.MODEL_PREFIXES = ["sap_change_predictions"]
predictions_dict = model_api.predict_all(
df=scoring_data,
bucket="retrofit-data-dev",
prediction_buckets={
"sap_change_predictions": "retrofit-sap-predictions-dev",
}
)
# The ASHP isn't better under SAP, compared to a gas boiler with good heat controls
assert predictions_dict["sap_change_predictions"]["predictions"].tolist() == [66.9, 65.5, 65.9]

View file

@ -2,6 +2,13 @@ import pytest
from recommendations.SolarPvRecommendations import SolarPvRecommendations
from backend.Property import Property
from etl.epc.Record import EPCRecord
import pandas as pd
from datetime import datetime
from utils.s3 import read_dataframe_from_s3_parquet, read_from_s3
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
from recommendations.Recommendations import Recommendations
from backend.ml_models.api import ModelApi
import msgpack
class TestSolarPvRecommendations:
@ -82,3 +89,321 @@ class TestSolarPvRecommendations:
'photo_supply': 4000
}
]
def test_model(self):
"""
This function tests the recommendation engine, in conjunction with the model
:return:
"""
starting_epc = {
'low-energy-fixed-light-count': '', 'address': '27 Cromwell Street', 'uprn-source': 'Energy Assessor',
'floor-height': '2.5', 'heating-cost-potential': '443', 'unheated-corridor-length': '',
'hot-water-cost-potential': '53', 'construction-age-band': 'England and Wales: before 1900',
'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average',
'lighting-energy-eff': 'Very Poor', 'environment-impact-potential': '85',
'glazed-type': 'double glazing installed before 2002', 'heating-cost-current': '904', 'address3': '',
'mainheatcont-description': 'Programmer, room thermostat and TRVs', 'sheating-energy-eff': 'N/A',
'property-type': 'House', 'local-authority-label': 'West Lindsey', 'fixed-lighting-outlets-count': '10',
'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '79',
'county': 'Lincolnshire', 'postcode': 'DN21 1DH', 'solar-water-heating-flag': 'N',
'constituency': 'E14000707', 'co2-emissions-potential': '1.5', 'number-heated-rooms': '5',
'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '92',
'local-authority': 'E07000142', 'built-form': 'Mid-Terrace', 'number-open-fireplaces': '0',
'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2021-11-17',
'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '61', 'address1': '27 Cromwell Street',
'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Gainsborough',
'roof-energy-eff': 'Very Poor', 'total-floor-area': '89.0', 'building-reference-number': '10001989430',
'environment-impact-current': '47', 'co2-emissions-current': '5.4',
'roof-description': 'Pitched, no insulation (assumed)', 'floor-energy-eff': 'N/A',
'number-habitable-rooms': '5', 'address2': '', 'hot-water-env-eff': 'Good', 'posttown': 'GAINSBOROUGH',
'mainheatc-energy-eff': 'Good', 'main-fuel': 'mains gas (not community)', 'lighting-env-eff': 'Very Poor',
'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
'lighting-description': 'No low energy lighting', 'roof-env-eff': 'Very Poor',
'walls-energy-eff': 'Very Poor', 'photo-supply': '0.0', 'lighting-cost-potential': '72',
'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
'lodgement-datetime': '2021-12-01 10:12:23', 'flat-top-storey': '', 'current-energy-rating': 'E',
'secondheat-description': 'Room heaters, mains gas', 'walls-env-eff': 'Very Poor',
'transaction-type': 'ECO assessment', 'uprn': '100030949912', 'current-energy-efficiency': '54',
'energy-consumption-current': '346', 'mainheat-description': 'Boiler and radiators, mains gas',
'lighting-cost-current': '144', 'lodgement-date': '2021-12-01', 'extension-count': '2',
'mainheatc-env-eff': 'Good', 'lmk-key': '3ec5533af02ec78361c1f9bea8dd2e878c2c6fa6cf59e5cc505c3eeb038e0f91',
'wind-turbine-count': '0', 'tenure': 'Owner-occupied', 'floor-level': '',
'potential-energy-efficiency': '86', 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '0',
'walls-description': 'Solid brick, as built, no insulation (assumed)',
'hotwater-description': 'From main system'
}
ending_epc = {
'low-energy-fixed-light-count': '', 'address': '27 Cromwell Street', 'uprn-source': 'Energy Assessor',
'floor-height': '2.5', 'heating-cost-potential': '443', 'unheated-corridor-length': '',
'hot-water-cost-potential': '53', 'construction-age-band': 'England and Wales: before 1900',
'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average',
'lighting-energy-eff': 'Very Poor', 'environment-impact-potential': '86',
'glazed-type': 'double glazing installed before 2002', 'heating-cost-current': '904', 'address3': '',
'mainheatcont-description': 'Programmer, room thermostat and TRVs', 'sheating-energy-eff': 'N/A',
'property-type': 'House', 'local-authority-label': 'West Lindsey', 'fixed-lighting-outlets-count': '10',
'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '79',
'county': 'Lincolnshire', 'postcode': 'DN21 1DH', 'solar-water-heating-flag': 'N',
'constituency': 'E14000707', 'co2-emissions-potential': '1.4', 'number-heated-rooms': '5',
'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '84',
'local-authority': 'E07000142', 'built-form': 'Mid-Terrace', 'number-open-fireplaces': '0',
'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2021-12-21',
'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '49', 'address1': '27 Cromwell Street',
'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Gainsborough',
'roof-energy-eff': 'Very Poor', 'total-floor-area': '89.0', 'building-reference-number': '10001989430',
'environment-impact-current': '55', 'co2-emissions-current': '4.4',
'roof-description': 'Pitched, no insulation (assumed)', 'floor-energy-eff': 'N/A',
'number-habitable-rooms': '5', 'address2': '', 'hot-water-env-eff': 'Good', 'posttown': 'GAINSBOROUGH',
'mainheatc-energy-eff': 'Good', 'main-fuel': 'mains gas (not community)', 'lighting-env-eff': 'Very Poor',
'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
'lighting-description': 'No low energy lighting', 'roof-env-eff': 'Very Poor',
'walls-energy-eff': 'Very Poor', 'photo-supply': '50.0', 'lighting-cost-potential': '72',
'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
'lodgement-datetime': '2021-12-21 17:33:09', 'flat-top-storey': '', 'current-energy-rating': 'D',
'secondheat-description': 'Room heaters, mains gas', 'walls-env-eff': 'Very Poor',
'transaction-type': 'ECO assessment', 'uprn': '100030949912', 'current-energy-efficiency': '65',
'energy-consumption-current': '277', 'mainheat-description': 'Boiler and radiators, mains gas',
'lighting-cost-current': '144', 'lodgement-date': '2021-12-21', 'extension-count': '2',
'mainheatc-env-eff': 'Good', 'lmk-key': 'b0b19583c59afbc69db12f4d6c98cd8837e80da3214d577c426eb3e672d424fc',
'wind-turbine-count': '0', 'tenure': 'Owner-occupied', 'floor-level': '',
'potential-energy-efficiency': '88', 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '0',
'walls-description': 'Solid brick, as built, no insulation (assumed)',
'hotwater-description': 'From main system'
}
cleaning_data = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
)
cleaned = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-dev"
)
cleaned = msgpack.unpackb(cleaned, raw=False)
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
epc = EPCRecord(
epc_records={
'original_epc': starting_epc,
'full_sap_epc': {},
'old_data': []
},
run_mode="newdata",
cleaning_data=cleaning_data
)
home = Property(
id=0,
address="",
postcode="",
epc_record=epc,
already_installed={},
non_invasive_recommendations={},
)
home.in_conservation_area = False
home.is_listed = False
home.is_heritage = False
home.restricted_measures = True
home.get_components(
cleaned=cleaned,
photo_supply_lookup=photo_supply_lookup,
floor_area_decile_thresholds=floor_area_decile_thresholds
)
recommender = SolarPvRecommendations(property_instance=home)
recommender.recommend(phase=0)
coverage_50_percent = [x for x in recommender.recommendation if x["photo_supply"] == 50]
assert len(coverage_50_percent) == 2
property_recommendations = Recommendations.insert_temp_recommendation_id([coverage_50_percent])
home.create_base_difference_epc_record(cleaned_lookup=cleaned)
home.adjust_difference_record_with_recommendations(
property_recommendations, []
)
scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop(
columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
"carbon_ending"]
)
model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
model_api.MODEL_PREFIXES = ["sap_change_predictions"]
predictions_dict = model_api.predict_all(
df=scoring_data,
bucket="retrofit-data-dev",
prediction_buckets={
"sap_change_predictions": "retrofit-sap-predictions-dev",
}
)
assert predictions_dict["sap_change_predictions"]["predictions"].tolist() == [65.9, 65.9]
assert ending_epc["current-energy-efficiency"] == '65'
def test_model2(self):
data[["uprn", "sap_ending"]]
#
searcher = SearchEpc(
address1="",
postcode="",
auth_token="a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA=",
os_api_key="",
full_address="",
uprn=100030952942,
)
searcher.find_property(False)
ending_epc = {
'low-energy-fixed-light-count': '', 'address': '6 Kenmare Crescent',
'uprn-source': 'Energy Assessor', 'floor-height': '2.49', 'heating-cost-potential': '464',
'unheated-corridor-length': '', 'hot-water-cost-potential': '46',
'construction-age-band': 'England and Wales: 1967-1975', 'potential-energy-rating': 'B',
'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Very Good',
'environment-impact-potential': '91', 'glazed-type': 'not defined', 'heating-cost-current': '535',
'address3': '', 'mainheatcont-description': 'Programmer, room thermostat and TRVs',
'sheating-energy-eff': 'N/A', 'property-type': 'Bungalow',
'local-authority-label': 'West Lindsey', 'fixed-lighting-outlets-count': '9',
'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '69',
'county': 'Lincolnshire', 'postcode': 'DN21 1PR', 'solar-water-heating-flag': 'N',
'constituency': 'E14000707', 'co2-emissions-potential': '0.7', 'number-heated-rooms': '3',
'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '56',
'local-authority': 'E07000142', 'built-form': 'Semi-Detached', 'number-open-fireplaces': '0',
'windows-description': 'Fully double glazed', 'glazed-area': 'Much More Than Typical',
'inspection-date': '2022-08-24', 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '18',
'address1': '6 Kenmare Crescent', 'heat-loss-corridor': '', 'flat-storey-count': '',
'constituency-label': 'Gainsborough', 'roof-energy-eff': 'Very Good', 'total-floor-area': '66.0',
'building-reference-number': '10002845316', 'environment-impact-current': '85',
'co2-emissions-current': '1.2', 'roof-description': 'Pitched, 300 mm loft insulation',
'floor-energy-eff': 'N/A', 'number-habitable-rooms': '3', 'address2': '',
'hot-water-env-eff': 'Good', 'posttown': 'GAINSBOROUGH', 'mainheatc-energy-eff': 'Good',
'main-fuel': 'mains gas (not community)', 'lighting-env-eff': 'Very Good',
'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
'lighting-description': 'Low energy lighting in all fixed outlets', 'roof-env-eff': 'Very Good',
'walls-energy-eff': 'Average', 'photo-supply': '40.0', 'lighting-cost-potential': '65',
'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
'lodgement-datetime': '2022-08-24 15:39:42', 'flat-top-storey': '', 'current-energy-rating': 'B',
'secondheat-description': 'Room heaters, electric', 'walls-env-eff': 'Average',
'transaction-type': 'ECO assessment', 'uprn': '100030952942', 'current-energy-efficiency': '87',
'energy-consumption-current': '100', 'mainheat-description': 'Boiler and radiators, mains gas',
'lighting-cost-current': '65', 'lodgement-date': '2022-08-24', 'extension-count': '0',
'mainheatc-env-eff': 'Good',
'lmk-key': 'e20be883431b1fed15db7fa1f52634fb7655d2b80c2fdad37df779f93ec4dafd',
'wind-turbine-count': '0', 'tenure': 'Owner-occupied', 'floor-level': '',
'potential-energy-efficiency': '91', 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '100',
'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system'
}
starting_epc = {
'low-energy-fixed-light-count': '', 'address': '6 Kenmare Crescent', 'uprn-source': 'Energy Assessor',
'floor-height': '2.49', 'heating-cost-potential': '464', 'unheated-corridor-length': '',
'hot-water-cost-potential': '46', 'construction-age-band': 'England and Wales: 1967-1975',
'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average',
'lighting-energy-eff': 'Very Good', 'environment-impact-potential': '85', 'glazed-type': 'not defined',
'heating-cost-current': '535', 'address3': '',
'mainheatcont-description': 'Programmer, room thermostat and TRVs', 'sheating-energy-eff': 'N/A',
'property-type': 'Bungalow', 'local-authority-label': 'West Lindsey', 'fixed-lighting-outlets-count': '9',
'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '69',
'county': 'Lincolnshire', 'postcode': 'DN21 1PR', 'solar-water-heating-flag': 'N',
'constituency': 'E14000707', 'co2-emissions-potential': '1.2', 'number-heated-rooms': '3',
'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '102',
'local-authority': 'E07000142', 'built-form': 'Semi-Detached', 'number-open-fireplaces': '0',
'windows-description': 'Fully double glazed', 'glazed-area': 'Much More Than Typical',
'inspection-date': '2022-05-31', 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '40',
'address1': '6 Kenmare Crescent', 'heat-loss-corridor': '', 'flat-storey-count': '',
'constituency-label': 'Gainsborough', 'roof-energy-eff': 'Very Good', 'total-floor-area': '66.0',
'building-reference-number': '10002845316', 'environment-impact-current': '68',
'co2-emissions-current': '2.6', 'roof-description': 'Pitched, 300 mm loft insulation',
'floor-energy-eff': 'N/A', 'number-habitable-rooms': '3', 'address2': '', 'hot-water-env-eff': 'Good',
'posttown': 'GAINSBOROUGH', 'mainheatc-energy-eff': 'Good', 'main-fuel': 'mains gas (not community)',
'lighting-env-eff': 'Very Good', 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A',
'sheating-env-eff': 'N/A', 'lighting-description': 'Low energy lighting in all fixed outlets',
'roof-env-eff': 'Very Good', 'walls-energy-eff': 'Average', 'photo-supply': '0.0',
'lighting-cost-potential': '65', 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100',
'main-heating-controls': '', 'lodgement-datetime': '2022-06-15 08:38:02', 'flat-top-storey': '',
'current-energy-rating': 'D', 'secondheat-description': 'Room heaters, electric',
'walls-env-eff': 'Average', 'transaction-type': 'ECO assessment', 'uprn': '100030952942',
'current-energy-efficiency': '68', 'energy-consumption-current': '227',
'mainheat-description': 'Boiler and radiators, mains gas', 'lighting-cost-current': '65',
'lodgement-date': '2022-06-15', 'extension-count': '0', 'mainheatc-env-eff': 'Good',
'lmk-key': 'ce181970b7077cb9b4626242bfb010b30a0e48541b5f22427e81f1adbeeec4f2', 'wind-turbine-count': '0',
'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '85',
'hot-water-energy-eff': 'Good', 'low-energy-lighting': '100',
'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system'
}
cleaning_data = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
)
cleaned = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-dev"
)
cleaned = msgpack.unpackb(cleaned, raw=False)
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
epc = EPCRecord(
epc_records={
'original_epc': starting_epc,
'full_sap_epc': {},
'old_data': []
},
run_mode="newdata",
cleaning_data=cleaning_data
)
home = Property(
id=0,
address="",
postcode="",
epc_record=epc,
already_installed={},
non_invasive_recommendations={},
)
home.in_conservation_area = False
home.is_listed = False
home.is_heritage = False
home.restricted_measures = True
home.get_components(
cleaned=cleaned,
photo_supply_lookup=photo_supply_lookup,
floor_area_decile_thresholds=floor_area_decile_thresholds
)
recommender = SolarPvRecommendations(property_instance=home)
recommender.recommend(phase=0)
coverage_40_percent = [x for x in recommender.recommendation if x["photo_supply"] == 40]
assert len(coverage_40_percent) == 2
property_recommendations = Recommendations.insert_temp_recommendation_id([coverage_40_percent])
home.create_base_difference_epc_record(cleaned_lookup=cleaned)
home.adjust_difference_record_with_recommendations(
property_recommendations, []
)
scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop(
columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
"carbon_ending"]
)
model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
model_api.MODEL_PREFIXES = ["sap_change_predictions"]
predictions_dict = model_api.predict_all(
df=scoring_data,
bucket="retrofit-data-dev",
prediction_buckets={
"sap_change_predictions": "retrofit-sap-predictions-dev",
}
)
assert predictions_dict["sap_change_predictions"]["predictions"].tolist() == [87.1, 87.1]
assert ending_epc["current-energy-efficiency"] == '87'
assert starting_epc["current-energy-efficiency"] == '68'

View file

@ -1,9 +1,10 @@
import pickle
import boto3
from io import BytesIO, StringIO
from botocore.exceptions import NoCredentialsError, PartialCredentialsError
import csv
import pandas as pd
from io import BytesIO, StringIO
from utils.logger import setup_logger
from botocore.exceptions import NoCredentialsError, PartialCredentialsError
logger = setup_logger()
@ -184,7 +185,7 @@ def read_pickle_from_s3(bucket_name, s3_file_name):
logger.errpr("Incomplete credentials provided.")
return None
except Exception as e:
logger.errpr(f'Failed to download data from {bucket_name}/{s3_file_name}: {str(e)}')
logger.error(f'Failed to download data from {bucket_name}/{s3_file_name}: {str(e)}')
return None
# Deserialize data from pickle format
@ -224,3 +225,22 @@ def read_excel_from_s3(bucket_name, file_key, header_row):
df.reset_index(drop=True, inplace=True)
return df
def read_csv_from_s3(bucket_name, filepath):
s3 = boto3.client('s3')
# Get the object from s3
s3_object = s3.get_object(Bucket=bucket_name, Key=filepath)
# Read the CSV body from the s3 object
body = s3_object['Body'].read()
# Use StringIO to create a file-like object from the string
csv_data = StringIO(body.decode('utf-8'))
# Use csv library to read it into a list of dictionaries
reader = csv.DictReader(csv_data)
data = list(reader)
return data