diff --git a/.idea/.gitignore b/.idea/.gitignore
index 26d33521..8f00030d 100644
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
@@ -1,3 +1,5 @@
# Default ignored files
/shelf/
/workspace.xml
+# GitHub Copilot persisted chat sessions
+/copilot/chatSessions
diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..b0f9c00d 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
-
+
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 6f308057..1122b380 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
-
+
diff --git a/backend/Property.py b/backend/Property.py
index bfb4bf1f..12e2dbb1 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -1,4 +1,5 @@
import os
+import ast
from itertools import groupby
import pandas as pd
@@ -11,11 +12,17 @@ from utils.s3 import read_dataframe_from_s3_parquet
from etl.epc.settings import DATA_ANOMALY_MATCHES
from recommendations.rdsap_tables import FLOOR_LEVEL_MAP
from recommendations.recommendation_utils import (
- estimate_perimeter, get_wall_type, estimate_external_wall_area, esimtate_pitched_roof_area, estimate_windows
+ estimate_perimeter,
+ get_wall_type,
+ estimate_external_wall_area,
+ esimtate_pitched_roof_area,
+ estimate_windows,
)
-ENVIRONMENT = os.environ.get('ENVIRONMENT', 'dev')
-DATA_BUCKET = os.environ.get('DATA_BUCKET', 'retrofit-data-dev' if ENVIRONMENT == 'dev' else None)
+ENVIRONMENT = os.environ.get("ENVIRONMENT", "dev")
+DATA_BUCKET = os.environ.get(
+ "DATA_BUCKET", "retrofit-data-dev" if ENVIRONMENT == "dev" else None
+)
logger = setup_logger()
@@ -30,7 +37,7 @@ class Property:
"roof-description": "roof",
"walls-description": "walls",
"windows-description": "windows",
- "lighting-description": "lighting"
+ "lighting-description": "lighting",
}
floor = None
@@ -49,7 +56,14 @@ class Property:
DATA_ANOMALY_MATCHES = DATA_ANOMALY_MATCHES
- def __init__(self, id, postcode, address, epc_record):
+ # Surplus information, that can be provided as optional inputs, by a customer
+ n_bathrooms = None
+ n_bedrooms = None
+
+ def __init__(
+ self, id, postcode, address, epc_record, already_installed=None, non_invasive_recommendations=None,
+ **kwargs
+ ):
self.epc_record = epc_record
@@ -57,9 +71,20 @@ class Property:
self.address = address
self.postcode = postcode
- self.data = {k.replace("_", "-"): v for k, v in epc_record.get("prepared_epc").items()}
+ self.data = {
+ k.replace("_", "-"): v for k, v in epc_record.get("prepared_epc").items()
+ }
self.old_data = epc_record.get("old_data")
self.property_dimensions = None
+ # This is a list of measures that have already been installed in the property, typically found as a result
+ # of the non-invasive surveys. We reflect that this has been installed in the recommendations, but remove the
+ # cost and instead, provide a message that the measure has already been installed
+
+ self.already_installed = ast.literal_eval(already_installed['already_installed']) if already_installed else []
+ self.non_invasive_recommendations = (
+ ast.literal_eval(non_invasive_recommendations['recommendations']) if
+ non_invasive_recommendations else []
+ )
self.uprn = epc_record.get("uprn")
self.full_sap_epc = epc_record.get("full_sap_epc")
@@ -92,7 +117,9 @@ class Property:
"wind_turbine": epc_record.prepared_epc.get("wind_turbine_count"),
}
self.number_of_open_fireplaces = {
- "number_of_open_fireplaces": epc_record.prepared_epc.get("number_open_fireplaces"),
+ "number_of_open_fireplaces": epc_record.prepared_epc.get(
+ "number_open_fireplaces"
+ ),
}
self.number_of_extensions = {
"number_of_extensions": epc_record.prepared_epc.get("extension_count"),
@@ -105,22 +132,55 @@ class Property:
"length": epc_record.prepared_epc.get("unheated_corridor_length"),
"heat_loss_corridor_boolean": epc_record.get("heat_loss_corridor_bool"),
}
- self.mains_gas = epc_record.prepared_epc.get('mains_gas_flag')
- self.floor_height = epc_record.prepared_epc.get('floor_height')
+ self.mains_gas = epc_record.prepared_epc.get("mains_gas_flag")
+ self.floor_height = epc_record.prepared_epc.get("floor_height")
self.insulation_wall_area = None
- self.floor_area = epc_record.prepared_epc.get('total_floor_area')
+ self.floor_area = epc_record.prepared_epc.get("total_floor_area")
self.pitched_roof_area = None
self.insulation_floor_area = None
- self.number_lighting_outlets = epc_record.prepared_epc.get("fixed_lighting_outlets_count")
+ self.number_lighting_outlets = epc_record.prepared_epc.get(
+ "fixed_lighting_outlets_count"
+ )
self.floor_level = None
self.number_of_windows = None
self.solar_pv_percentage = None
self.current_adjusted_energy = None
self.expected_adjusted_energy = None
+ self.current_energy_bill = None
+ self.expected_energy_bill = None
self.recommendations_scoring_data = []
+ self.parse_kwargs(kwargs)
+
+ @classmethod
+ def extract_kwargs(cls, kwargs):
+ """
+ This method is to be used in the router, to extract the kwargs from the request and prevent any errors such as
+ non-integer values, or inputs that clash with the __init__ method of this class
+ :param kwargs:
+ :return:
+ """
+ n_bathrooms = kwargs.get("n_bathrooms", None)
+ if n_bathrooms is not None:
+ # We add on a small value to ensure that the number of bathrooms is rounded up, in case the value is 0.5
+ n_bathrooms = int(round(float(n_bathrooms) + 1e-5))
+
+ n_bedrooms = kwargs.get("n_bedrooms", None)
+ if n_bedrooms is not None:
+ n_bedrooms = int(round(float(n_bedrooms) + 1e-5))
+
+ return {
+ "n_bathrooms": n_bathrooms,
+ "n_bedrooms": n_bedrooms,
+ }
+
+ def parse_kwargs(self, kwargs):
+ # We extract the elements from kwargs that we recognise. Anything additional is ignored
+ self.n_bathrooms = kwargs.get("n_bathrooms", None)
+ self.n_bedrooms = kwargs.get("n_bedrooms", None)
+
def create_base_difference_epc_record(self, cleaned_lookup: dict):
"""
Creates a EPCDifferenceRecord object, which is used to store the difference between the current and
@@ -131,18 +191,30 @@ class Property:
# difference_record = self.epc_record - self.epc_record
# TODO: change these lower and replace in the settings file
- print("CHANGE THE LATEST FIELD TO REMOVE NUMBER HABITABLE ROOMS IF WE WANT TO USE STARTING/ENDING")
+ print(
+ "CHANGE THE LATEST FIELD TO REMOVE NUMBER HABITABLE ROOMS IF WE WANT TO USE STARTING/ENDING"
+ )
fixed_data_col_names = MANDATORY_FIXED_FEATURES + LATEST_FIELD
print("NEED TO CHANGE THE DASH TO LOWER CASE")
- fixed_data_col_names = [x.lower().replace("_", "-") for x in fixed_data_col_names]
+ fixed_data_col_names = [
+ x.lower().replace("_", "-") for x in fixed_data_col_names
+ ]
- fixed_data = {k.replace("-", "_"): v for k, v in self.data.items() if k in fixed_data_col_names}
+ fixed_data = {
+ k.replace("-", "_"): v
+ for k, v in self.data.items()
+ if k in fixed_data_col_names
+ }
# difference_record.append_fixed_data(fixed_data)
- difference_record = self.epc_record.create_EPCDifferenceRecord(self.epc_record, fixed_data)
+ difference_record = self.epc_record.create_EPCDifferenceRecord(
+ self.epc_record, fixed_data
+ )
- self.base_difference_record = TrainingDataset(datasets=[difference_record], cleaned_lookup=cleaned_lookup)
+ self.base_difference_record = TrainingDataset(
+ datasets=[difference_record], cleaned_lookup=cleaned_lookup
+ )
# TODO: adjust the base difference record with the previously calculated u values + features
# estimated_perimeter is different to the perimeter in the epc record
@@ -150,8 +222,7 @@ class Property:
# self.base_difference_record.df
def adjust_difference_record_with_recommendations(
- self, property_recommendations,
- property_representative_recommendations
+ self, property_recommendations, property_representative_recommendations
):
"""
This method will adjust the difference record, based on the recommendations made for the property
@@ -163,13 +234,23 @@ class Property:
"""
self.recommendations_scoring_data = []
- phases = sorted([r[0]["phase"] for r in property_recommendations if r[0]["phase"] is not None])
+ phases = sorted(
+ [
+ r[0]["phase"]
+ for r in property_recommendations
+ if r[0]["phase"] is not None
+ ]
+ )
for phase in phases:
- property_recommendations_by_phase = [r for r in property_recommendations if r[0]["phase"] == phase][0]
+ property_recommendations_by_phase = [
+ r for r in property_recommendations if r[0]["phase"] == phase
+ ][0]
previous_phases = [p for p in phases if p < phase]
previous_phase_representatives = [
- r for r in property_representative_recommendations if r["phase"] in previous_phases
+ r
+ for r in property_representative_recommendations
+ if r["phase"] in previous_phases
]
# For solid wall insulation, we will actually have 2 representative recommendations, since we consider
# both internal and external wall insulation as possible measures. We will use the representative that
@@ -177,15 +258,20 @@ class Property:
# Take the representative with the lowest efficiency, by phase
# To be safe, we sort by phase
- previous_phase_representatives = sorted(previous_phase_representatives, key=lambda x: x['phase'])
+ previous_phase_representatives = sorted(
+ previous_phase_representatives, key=lambda x: x["phase"]
+ )
previous_phase_representatives = [
- min(group, key=lambda x: x['efficiency']) for _, group in groupby(
- previous_phase_representatives, key=lambda x: x['phase']
+ min(group, key=lambda x: x["efficiency"])
+ for _, group in groupby(
+ previous_phase_representatives, key=lambda x: x["phase"]
)
]
- recommendation_record = self.base_difference_record.df.to_dict("records")[0].copy()
+ recommendation_record = self.base_difference_record.df.to_dict("records")[
+ 0
+ ].copy()
for rec in property_recommendations_by_phase:
# We simulate the impact of the recommendation at this current phase, and all of the prior phases
@@ -197,13 +283,18 @@ class Property:
property_id=self.id,
recommendation_record=recommendation_record,
recommendations=previous_phase_representatives + [rec],
- primary_recommendation_id=rec["recommendation_id"]
+ primary_recommendation_id=rec["recommendation_id"],
+ non_invasive_recommendations=self.non_invasive_recommendations,
)
self.recommendations_scoring_data.append(scoring_dict)
@staticmethod
def create_recommendation_scoring_data(
- property_id, recommendation_record, recommendations: list, primary_recommendation_id: int
+ property_id,
+ recommendation_record,
+ recommendations: list,
+ primary_recommendation_id: int,
+ non_invasive_recommendations: list = None,
):
"""
This function will iterate through a list of recommendations and apply a simulation for each recommendation
@@ -212,13 +303,17 @@ class Property:
:param recommendation_record: The record of the property, which will be updated
:param recommendations: The list of recommendations to apply
:param primary_recommendation_id: The id of the primary recommendation, which is used to identify the record
+ :param non_invasive_recommendations: The list of non-invasive recommendations
:return: The updated recommendation record
"""
output = recommendation_record.copy()
+ non_invasive_recommendations = [] if non_invasive_recommendations is None else non_invasive_recommendations
for col in [
- "walls_insulation_thickness", "floor_insulation_thickness", "roof_insulation_thickness"
+ "walls_insulation_thickness",
+ "floor_insulation_thickness",
+ "roof_insulation_thickness",
]:
if output[col] is None:
output[col] = "none"
@@ -228,14 +323,25 @@ class Property:
# We update the description to indicate it's insulated
if recommendation["type"] in [
- "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation"
+ "internal_wall_insulation",
+ "external_wall_insulation",
+ "cavity_wall_insulation",
]:
+
+ # # If we have a non-incasive recommendation that the cavity wall is partially filled, we skip the
+ # # cavity wall insulation recommendation (since on the EPC, the property will look like how it did
+ # # before any works)
+ # if "cavity_surveyed_as_filled_is_partial" in non_invasive_recommendations:
+ # continue
+
# The upgrade made here is to the u-value of the walls and the description of the
# insulation thickness
- output["walls_thermal_transmittance_ending"] = recommendation["new_u_value"]
+ output["walls_thermal_transmittance_ending"] = recommendation[
+ "new_u_value"
+ ]
# Setting the insulation thickness here to above average should be tested further because we
# don't see a high volume of instances for this
- output["walls_insulation_thickness_ending"] = "above average"
+ output["walls_insulation_thickness_ending"] = "average"
output["walls_energy_eff_ending"] = "Good"
# Note: often when the wall is insulatied, the internal/external insulation is not noted so we should
@@ -265,10 +371,14 @@ class Property:
# Update description to indicate it's insulate
if recommendation["type"] in [
- "solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation"
+ "solid_floor_insulation",
+ "suspended_floor_insulation",
+ "exposed_floor_insulation",
]:
if len(recommendation["parts"]) > 1:
- raise NotImplementedError("Have more than 1 floor insulation part - handle this case")
+ raise NotImplementedError(
+ "Have more than 1 floor insulation part - handle this case"
+ )
# output["floor_thermal_transmittance_ending"] = recommendation["new_u_value"]
# We don't really see above average for this in the training data
@@ -282,22 +392,43 @@ class Property:
if output["floor_insulation_thickness_ending"] is None:
output["floor_insulation_thickness_ending"] = "none"
- if recommendation["type"] in ["loft_insulation", "room_roof_insulation", "flat_roof_insulation"]:
- output["roof_thermal_transmittance_ending"] = recommendation["new_u_value"]
+ if recommendation["type"] in [
+ "loft_insulation",
+ "room_roof_insulation",
+ "flat_roof_insulation",
+ ]:
+ output["roof_thermal_transmittance_ending"] = recommendation[
+ "new_u_value"
+ ]
parts = recommendation["parts"]
if len(parts) != 1:
- raise ValueError("More than one part for roof insulation - investiage me")
+ raise ValueError(
+ "More than one part for roof insulation - investiage me"
+ )
# This is based on the values we have in the training data
valid_numeric_values = [
- 12, 25, 50, 75, 100, 150, 200, 250, 270, 300, 350, 400
+ 12,
+ 25,
+ 50,
+ 75,
+ 100,
+ 150,
+ 200,
+ 250,
+ 270,
+ 300,
+ 350,
+ 400,
]
proposed_depth = int(parts[0]["depth"])
if proposed_depth not in valid_numeric_values:
# Take the nearest value for scoring
- proposed_depth = min(valid_numeric_values, key=lambda x: abs(x - proposed_depth))
+ proposed_depth = min(
+ valid_numeric_values, key=lambda x: abs(x - proposed_depth)
+ )
output["roof_insulation_thickness_ending"] = str(proposed_depth)
if recommendation["type"] == "loft_insulation":
@@ -331,11 +462,17 @@ class Property:
if output["glazing_type_ending"] == "multiple":
pass
elif output["glazing_type_ending"] == "single":
- output["glazing_type_ending"] = "secondary" if is_secondary_glazing else "double"
+ output["glazing_type_ending"] = (
+ "secondary" if is_secondary_glazing else "double"
+ )
elif output["glazing_type_ending"] == "double":
- output["glazing_type_ending"] = "multiple" if is_secondary_glazing else "double"
+ output["glazing_type_ending"] = (
+ "multiple" if is_secondary_glazing else "double"
+ )
elif output["glazing_type_ending"] == "secondary":
- output["glazing_type_ending"] = "secondary" if is_secondary_glazing else "multiple"
+ output["glazing_type_ending"] = (
+ "secondary" if is_secondary_glazing else "multiple"
+ )
elif output["glazing_type_ending"] in ["triple", "high performance"]:
output["glazing_type_ending"] = "multiple"
else:
@@ -344,9 +481,13 @@ class Property:
if is_secondary_glazing:
output["glazed_type_ending"] = "secondary glazing"
else:
- output["glazed_type_ending"] = "double glazing installed during or after 2002"
+ output["glazed_type_ending"] = (
+ "double glazing installed during or after 2002"
+ )
- if recommendation["type"] in ["heating", "hot_water_tank_insulation"]:
+ if recommendation["type"] in [
+ "heating", "hot_water_tank_insulation", "heating_control", "secondary_heating"
+ ]:
# We update the data, as defined in the recommendaton
simulation_config = recommendation["simulation_config"]
@@ -366,15 +507,20 @@ class Property:
"internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
"loft_insulation", "room_roof_insulation", "flat_roof_insulation",
"solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation",
- "windows_glazing", "solar_pv", "heating", "hot_water_tank_insulation"
+ "windows_glazing", "solar_pv", "heating", "hot_water_tank_insulation",
+ "heating_control", "secondary_heating"
]:
- raise NotImplementedError("Implement me, given type %s" % recommendation["type"])
+ raise NotImplementedError(
+ "Implement me, given type %s" % recommendation["type"]
+ )
- output['id'] = "+".join([str(property_id), str(primary_recommendation_id)])
+ output["id"] = "+".join([str(property_id), str(primary_recommendation_id)])
return output
- def get_components(self, cleaned, photo_supply_lookup, floor_area_decile_thresholds):
+ def get_components(
+ self, cleaned, photo_supply_lookup, floor_area_decile_thresholds
+ ):
"""
Given the cleaning that has been performed, we'll use this to identify the property
components, from roof to walls to windows, heating and hot water
@@ -399,10 +545,12 @@ class Property:
if self.data[description] in self.DATA_ANOMALY_MATCHES:
template = cleaned[description][0]
fill_dict = dict(zip(template.keys(), [None] * len(template)))
- fill_dict.update({
- "original_description": self.data[description],
- "clean_description": self.data[description],
- })
+ fill_dict.update(
+ {
+ "original_description": self.data[description],
+ "clean_description": self.data[description],
+ }
+ )
setattr(
self,
self.ATTRIBUTE_MAP[description],
@@ -411,11 +559,15 @@ class Property:
continue
attributes = [
- x for x in cleaned[description] if x["original_description"] == self.data[description]
+ x
+ for x in cleaned[description]
+ if x["original_description"] == self.data[description]
]
if len(attributes) > 1:
- raise ValueError("Either No attributes or multiple found for %s" % description)
+ raise ValueError(
+ "Either No attributes or multiple found for %s" % description
+ )
if len(attributes) == 0:
# We attempt to perform the clean on the fly
@@ -423,8 +575,12 @@ class Property:
cleaner_cls = cleaner_cls(self.data[description])
processed = {
"original_description": self.data[description],
- "clean_description": cleaner_cls.description.replace("(assumed)", "").rstrip().capitalize(),
- **cleaner_cls.process()
+ "clean_description": cleaner_cls.description.replace(
+ "(assumed)", ""
+ )
+ .rstrip()
+ .capitalize(),
+ **cleaner_cls.process(),
}
attributes = [processed]
@@ -436,7 +592,8 @@ class Property:
self.set_floor_level()
self.set_windows_count()
self.set_solar_panel_area(
- photo_supply_lookup=photo_supply_lookup, floor_area_decile_thresholds=floor_area_decile_thresholds
+ photo_supply_lookup=photo_supply_lookup,
+ floor_area_decile_thresholds=floor_area_decile_thresholds,
)
self.set_energy_source()
@@ -453,7 +610,11 @@ class Property:
self.is_heritage = spatial["is_heritage_building"].values[0]
# We do an equals True, in the case of one of these variables being True
- if (self.in_conservation_area == True) | (self.is_listed == True) | (self.is_heritage == True):
+ if (
+ (self.in_conservation_area == True)
+ | (self.is_listed == True)
+ | (self.is_heritage == True)
+ ):
self.restricted_measures = True
spatial_dict = spatial.to_dict("records")[0]
@@ -495,7 +656,7 @@ class Property:
"tenure": self.data["tenure"],
"current_epc_rating": self.data["current-energy-rating"],
"current_sap_points": self.data["current-energy-efficiency"],
- "current_valuation": current_valuation
+ "current_valuation": current_valuation,
}
property_data = self._clean_upload_data(property_data)
@@ -507,7 +668,11 @@ class Property:
"""
Utility function for usage in the lambda, for preparing the _rating fields
"""
- return rating_lookup[field].value if (field not in cls.DATA_ANOMALY_MATCHES) and (field is not None) else None
+ return (
+ rating_lookup[field].value
+ if (field not in cls.DATA_ANOMALY_MATCHES) and (field is not None)
+ else None
+ )
def get_property_details_epc(self, portfolio_id: int, rating_lookup):
@@ -517,21 +682,37 @@ class Property:
"full_address": self.data["address"],
"total_floor_area": float(self.data["total-floor-area"]),
"walls": self.walls["clean_description"],
- "walls_rating": self._prepare_rating_field(self.data["walls-energy-eff"], rating_lookup),
+ "walls_rating": self._prepare_rating_field(
+ self.data["walls-energy-eff"], rating_lookup
+ ),
"roof": self.roof["clean_description"],
- "roof_rating": self._prepare_rating_field(self.data["roof-energy-eff"], rating_lookup),
+ "roof_rating": self._prepare_rating_field(
+ self.data["roof-energy-eff"], rating_lookup
+ ),
"floor": self.floor["clean_description"],
- "floor_rating": self._prepare_rating_field(self.data["floor-energy-eff"], rating_lookup),
+ "floor_rating": self._prepare_rating_field(
+ self.data["floor-energy-eff"], rating_lookup
+ ),
"windows": self.windows["clean_description"],
- "windows_rating": self._prepare_rating_field(self.data["windows-energy-eff"], rating_lookup),
+ "windows_rating": self._prepare_rating_field(
+ self.data["windows-energy-eff"], rating_lookup
+ ),
"heating": self.main_heating["clean_description"],
- "heating_rating": self._prepare_rating_field(self.data["mainheat-energy-eff"], rating_lookup),
+ "heating_rating": self._prepare_rating_field(
+ self.data["mainheat-energy-eff"], rating_lookup
+ ),
"heating_controls": self.main_heating_controls["clean_description"],
- "heating_controls_rating": self._prepare_rating_field(self.data["mainheatc-energy-eff"], rating_lookup),
+ "heating_controls_rating": self._prepare_rating_field(
+ self.data["mainheatc-energy-eff"], rating_lookup
+ ),
"hot_water": self.hotwater["clean_description"],
- "hot_water_rating": self._prepare_rating_field(self.data["hot-water-energy-eff"], rating_lookup),
+ "hot_water_rating": self._prepare_rating_field(
+ self.data["hot-water-energy-eff"], rating_lookup
+ ),
"lighting": self.lighting["clean_description"],
- "lighting_rating": self._prepare_rating_field(self.data["lighting-energy-eff"], rating_lookup),
+ "lighting_rating": self._prepare_rating_field(
+ self.data["lighting-energy-eff"], rating_lookup
+ ),
"mainfuel": self.main_fuel["clean_description"],
"ventilation": self.ventilation["ventilation"],
"solar_pv": self.solar_pv["solar_pv"],
@@ -540,7 +721,9 @@ class Property:
"floor_height": self.floor_height,
"heat_loss_corridor": self.heat_loss_corridor["heat_loss_corridor_boolean"],
"unheated_corridor_length": self.heat_loss_corridor["length"],
- "number_of_open_fireplaces": self.number_of_open_fireplaces["number_of_open_fireplaces"],
+ "number_of_open_fireplaces": self.number_of_open_fireplaces[
+ "number_of_open_fireplaces"
+ ],
"number_of_extensions": self.number_of_extensions["number_of_extensions"],
"number_of_storeys": self.number_of_storeys["number_of_storeys"],
"mains_gas": self.mains_gas,
@@ -548,20 +731,21 @@ class Property:
"primary_energy_consumption": self.energy["primary_energy_consumption"],
"co2_emissions": self.energy["co2_emissions"],
"adjusted_energy_consumption": self.current_adjusted_energy,
- "estimated": self.data.get("estimated", False)
+ "estimated": self.data.get("estimated", False),
}
return property_details_epc
def get_spatial_data(self, uprn_filenames):
-
"""
Given a property's UPRN, this method will pull the associated spatial data from s3
:return:
"""
if self.uprn is None:
- logger.warning("We do not have a UPRN for this property - this needs to be implemented")
+ logger.warning(
+ "We do not have a UPRN for this property - this needs to be implemented"
+ )
self.in_conservation_area = False
self.is_listed = False
self.is_heritage = False
@@ -569,12 +753,15 @@ class Property:
return
# We get the file name for the uprn
- filtered_df = uprn_filenames[(uprn_filenames['lower'] <= self.uprn) & (uprn_filenames['upper'] >= self.uprn)]
+ filtered_df = uprn_filenames[
+ (uprn_filenames["lower"] <= self.uprn)
+ & (uprn_filenames["upper"] >= self.uprn)
+ ]
if filtered_df.empty:
logger.warning("Could not find file containing UPRNS")
return None
- filename = filtered_df.iloc[0]['filenames']
+ filename = filtered_df.iloc[0]["filenames"]
spatial_data = read_dataframe_from_s3_parquet(
bucket_name=DATA_BUCKET, file_key=f"spatial/{filename}"
@@ -592,15 +779,27 @@ class Property:
:return: filtered property dimensions dataframe
"""
- result = property_dimensions[(property_dimensions["PROPERTY_TYPE"] == self.data["property-type"])]
+ result = property_dimensions[
+ (property_dimensions["PROPERTY_TYPE"] == self.data["property-type"])
+ ]
- if self.construction_age_band is not None and self.construction_age_band not in self.DATA_ANOMALY_MATCHES:
- result = result[(result["CONSTRUCTION_AGE_BAND"] == self.construction_age_band)]
+ if (
+ self.construction_age_band is not None
+ and self.construction_age_band not in self.DATA_ANOMALY_MATCHES
+ ):
+ result = result[
+ (result["CONSTRUCTION_AGE_BAND"] == self.construction_age_band)
+ ]
- if self.data["built-form"] not in self.DATA_ANOMALY_MATCHES and self.data["built-form"] in result["BUILT_FORM"]:
+ if (
+ self.data["built-form"] not in self.DATA_ANOMALY_MATCHES
+ and self.data["built-form"] in result["BUILT_FORM"]
+ ):
result = result[(result["BUILT_FORM"] == self.data["built-form"])]
- return result[["NUMBER_HABITABLE_ROOMS", "TOTAL_FLOOR_AREA", "FLOOR_HEIGHT"]].mean()
+ return result[
+ ["NUMBER_HABITABLE_ROOMS", "TOTAL_FLOOR_AREA", "FLOOR_HEIGHT"]
+ ].mean()
def set_basic_property_dimensions(self):
"""
@@ -619,7 +818,8 @@ class Property:
# They could also be added as attributes to the EPC Record
self.perimeter = estimate_perimeter(
- self.floor_area / self.number_of_floors, self.number_of_rooms / self.number_of_floors
+ self.floor_area / self.number_of_floors,
+ self.number_of_rooms / self.number_of_floors,
)
self.insulation_wall_area = estimate_external_wall_area(
@@ -637,8 +837,9 @@ class Property:
def set_floor_level(self):
self.floor_level = (
- FLOOR_LEVEL_MAP[self.data["floor-level"]] if
- self.data["floor-level"] not in self.DATA_ANOMALY_MATCHES and self.data['floor-level'] is not None
+ FLOOR_LEVEL_MAP[self.data["floor-level"]]
+ if self.data["floor-level"] not in self.DATA_ANOMALY_MATCHES
+ and self.data["floor-level"] is not None
else None
)
@@ -700,22 +901,30 @@ class Property:
raise NotImplementedError("Implement this floor type")
@staticmethod
- def _extract_component(component_data, component_rename_cols, component_drop_cols, rename_prefix=None):
+ def _extract_component(
+ component_data, component_rename_cols, component_drop_cols, rename_prefix=None
+ ):
for k in component_rename_cols:
component_data[f"{rename_prefix}_{k}"] = component_data.get(k)
component_data = {
- k: v for k, v in component_data.items() if k not in component_drop_cols + component_rename_cols
+ k: v
+ for k, v in component_data.items()
+ if k not in component_drop_cols + component_rename_cols
}
return component_data
- def set_adjusted_energy(self, current_adjusted_energy, expected_adjusted_energy):
+ def set_adjusted_energy(
+ self, current_adjusted_energy, expected_adjusted_energy, current_energy_bill, expected_energy_bill
+ ):
"""
Stores these values for usage later
"""
self.current_adjusted_energy = current_adjusted_energy
self.expected_adjusted_energy = expected_adjusted_energy
+ self.current_energy_bill = current_energy_bill
+ self.expected_energy_bill = expected_energy_bill
def set_windows_count(self):
"""
@@ -753,7 +962,7 @@ class Property:
is_flat=self.roof["is_flat"],
is_pitched=self.roof["is_pitched"],
is_roof_room=self.roof["is_roof_room"],
- floor_area=self.floor_area
+ floor_area=self.floor_area,
)
percentage_of_roof = photo_supply_matched["photo_supply_median"].mean()
@@ -769,8 +978,9 @@ class Property:
"""
return (
- self.insulation_floor_area * percentage_of_roof if self.roof["is_flat"] else
- self.pitched_roof_area * percentage_of_roof
+ self.insulation_floor_area * percentage_of_roof
+ if self.roof["is_flat"]
+ else self.pitched_roof_area * percentage_of_roof
)
def set_energy_source(self):
@@ -783,7 +993,12 @@ class Property:
# If the tariff explicitly indicates electricity use without a dual indication and mains_gas_flag is not True
# We check for the common electricity tariffs
if not self.data["mains-gas-flag"] and self.data["energy-tariff"] in [
- "Single", "off-peak 7 hour", "off-peak 10 hour", "off-peak 18 hour", "standard tariff", "24 hour"
+ "Single",
+ "off-peak 7 hour",
+ "off-peak 10 hour",
+ "off-peak 18 hour",
+ "standard tariff",
+ "24 hour",
]:
energy_source = "electricity"
diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py
index 4f6fd33d..db9ec4ff 100644
--- a/backend/SearchEpc.py
+++ b/backend/SearchEpc.py
@@ -30,7 +30,7 @@ vartypes = {
'environment-impact-potential': "Int64",
'glazed-type': 'str',
'heating-cost-current': 'float',
- 'address3': 'str',
+ # 'address3': 'str',
'mainheatcont-description': 'str',
'sheating-energy-eff': 'str',
'property-type': 'str',
@@ -40,7 +40,7 @@ vartypes = {
'mechanical-ventilation': 'str',
'hot-water-cost-current': 'str',
'county': 'str',
- 'postcode': 'str',
+ # 'postcode': 'str',
'solar-water-heating-flag': 'str',
'constituency': 'str',
'co2-emissions-potential': 'float',
@@ -55,7 +55,7 @@ vartypes = {
# 'inspection-date': str,
'mains-gas-flag': 'str',
'co2-emiss-curr-per-floor-area': 'float',
- 'address1': 'str',
+ # 'address1': 'str',
'heat-loss-corridor': 'str',
'flat-storey-count': "Int64",
'constituency-label': 'str',
@@ -67,7 +67,7 @@ vartypes = {
'roof-description': 'str',
'floor-energy-eff': 'str',
'number-habitable-rooms': 'float',
- 'address2': 'str',
+ # 'address2': 'str',
'hot-water-env-eff': 'str',
'posttown': 'str',
'mainheatc-energy-eff': 'str',
@@ -98,7 +98,7 @@ vartypes = {
# 'lodgement-date',
'extension-count': "Int64",
'mainheatc-env-eff': 'str',
- 'lmk-key': 'str',
+ # 'lmk-key': 'str',
'wind-turbine-count': "Int64",
'tenure': 'str',
'floor-level': 'str',
@@ -147,6 +147,7 @@ class SearchEpc:
uprn: [int, None] = None,
size=None,
property_type=None,
+ fast=False
):
"""
Address lines 1 and postcode are mandatory fields. The other address lines are optional
@@ -187,37 +188,37 @@ class SearchEpc:
self.size = size if size is not None else 25
self.property_type = property_type
+ self.fast = fast
@classmethod
def get_house_number(cls, address: str) -> str | None:
"""
- This method will use the usaddress library to parse an address and extract the house number
- :return:
+ This method uses the usaddress library to parse an address and extract the primary house or flat number.
"""
+ try:
- parsed = usaddress.parse(address)
- parsed_house_number = [x for x in parsed if (x[1] == "AddressNumber")]
- parsed_house_number = parsed_house_number[0][0] if parsed_house_number else None
-
- if parsed_house_number is None:
- # Because usaddress isn't optimal for parsing addresses with some prefixes such as 'Flat',
- # we also add a custom approach
-
- # Pattern to look for 'Flat' or 'Apartment' followed by a number, or just a number at the beginning
+ # Custom regex to catch a broad range of cases
pattern = r'(?i)(?:flat|apartment)\s*(\d+)|^\s*(\d+)'
-
match = re.search(pattern, address)
-
if match:
- # Return the first non-None group found
return next(g for g in match.groups() if g is not None)
- else:
- return None
- # Remove training commas
- parsed_house_number = parsed_house_number.replace(",", "")
+ parsed = usaddress.parse(address)
+ # First, try to get the 'OccupancyIdentifier' if 'OccupancyType' is detected
+ for part, type_ in parsed:
+ if type_ == 'OccupancyIdentifier':
+ return part # This assumes the first 'OccupancyIdentifier' after 'OccupancyType' is the primary
+ # number
- return parsed_house_number
+ # Fallback to 'AddressNumber' if no 'OccupancyIdentifier' is found
+ address_number = next((part for part, type_ in parsed if type_ == 'AddressNumber'), None)
+ if address_number:
+ return address_number.replace(",", "") # Remove any trailing commas
+
+ except Exception as e:
+ print(f"Error parsing address: {e}")
+
+ return None
@staticmethod
def extract_numeric_housenumber_part(house_number: str | None) -> int | None:
@@ -365,9 +366,6 @@ class SearchEpc:
# Finally, we identify the newest epc and the rest, and then return
newest_epc, older_epcs = self.filter_newest_epc(list_of_epcs=rows)
- # Retrieve postcode and address
- address_epc, postcode_epc = self.format_address(newest_epc=newest_epc)
-
# Ge the uprn from the newest record for this home
uprns = {r["uprn"] for r in rows if r["uprn"]}
# We can sometimes have no uprn for a property
@@ -384,6 +382,12 @@ class SearchEpc:
uprn = uprns.pop() if uprns else None
+ if self.fast:
+ return newest_epc, [], {}, "", "", None
+
+ # Retrieve postcode and address
+ address_epc, postcode_epc = self.format_address(newest_epc=newest_epc)
+
return newest_epc, older_epcs, full_sap_epc, address_epc, postcode_epc, uprn
@staticmethod
@@ -575,6 +579,11 @@ class SearchEpc:
property_type=property_type
)
+ # If we have missing lodgment date, we fill it with inspection-date
+ epc_data["lodgement-datetime"] = epc_data["lodgement-datetime"].fillna(epc_data["inspection-date"])
+ # If we still have missing dates, we set it to the mean of the non NA dates
+ epc_data["lodgement-datetime"] = epc_data["lodgement-datetime"].fillna(epc_data["lodgement-datetime"].mean())
+
# For each attribute, we need to determine the datatype and use an appropriate method
# to estimate.
estimated_epc = {}
@@ -609,7 +618,11 @@ class SearchEpc:
# Insert an estimated lodgement datetime, with a weighted average
estimated_epc["lodgement-datetime"] = self.calculate_weighted_lodgement_datetime(epc_data=epc_data)
# Extract logement date
- estimated_epc["lodgement-date"] = estimated_epc["lodgement-datetime"].strftime("%Y-%m-%d")
+ # It is possible that there is still no lodgement date, so we need to handle this
+ if pd.isnull(estimated_epc["lodgement-datetime"]):
+ estimated_epc["lodgement-date"] = None
+ else:
+ estimated_epc["lodgement-date"] = estimated_epc["lodgement-datetime"].strftime("%Y-%m-%d")
estimated_epc["postcode"] = self.postcode
estimated_epc["uprn"] = self.uprn
@@ -695,8 +708,13 @@ class SearchEpc:
self.full_sap_epc = {}
# Finally, set a standardised address 1 and postcode
- self.address_clean = self.ordnance_survey_client.address_os
- self.postcode_clean = self.ordnance_survey_client.postcode_os
+ self.address_clean = (
+ self.ordnance_survey_client.address_os if self.ordnance_survey_client.address_os else self.address1
+ )
+ self.postcode_clean = (
+ self.ordnance_survey_client.postcode_os if self.ordnance_survey_client.postcode_os else
+ self.postcode
+ )
return
os_response = self.ordnance_survey_client.get_places_api()
diff --git a/backend/app/db/functions/non_intrusive_surveys.py b/backend/app/db/functions/non_intrusive_surveys.py
new file mode 100644
index 00000000..93348121
--- /dev/null
+++ b/backend/app/db/functions/non_intrusive_surveys.py
@@ -0,0 +1,50 @@
+from sqlalchemy.orm import Session
+from backend.app.db.models.non_intrusive_surveys import NonIntrusiveSurvey, NonIntrusiveSurveyNotes
+
+
+def upload_non_intrusive_survey_notes(session: Session, non_invasive_notes, batch_size=500):
+ """
+ Uploads a list of non-intrusive survey notes into the database in batches. Each dictionary in the list represents
+ one survey and its associated notes.
+
+ :param session: SQLAlchemy Session object through which all database transactions are handled.
+ :param non_invasive_notes: List of dictionaries where each dictionary contains survey details including 'uprn',
+ 'survey_date', 'surveyor', and other notes as key-value pairs.
+ :param batch_size: The size of each batch to be processed (default is 500).
+ :return: None
+ """
+
+ # Helper function to process each batch
+ def process_batch(batch):
+ surveys = []
+ notes = []
+
+ for note in batch:
+ survey = NonIntrusiveSurvey(
+ uprn=note['uprn'],
+ survey_date=note['survey_date'],
+ surveyor=note['surveyor']
+ )
+ surveys.append(survey)
+
+ session.add_all(surveys)
+ session.flush() # Get IDs for surveys
+
+ for note, survey in zip(batch, surveys):
+ for key, value in note.items():
+ if key not in ['uprn', 'survey_date', 'surveyor']:
+ notes.append(NonIntrusiveSurveyNotes(
+ survey_id=survey.id,
+ title=key,
+ note=value
+ ))
+
+ session.bulk_save_objects(notes)
+ session.commit()
+
+ # Split the data into batches and process each batch
+ total = len(non_invasive_notes)
+ for start in range(0, total, batch_size):
+ end = min(start + batch_size, total)
+ batch = non_invasive_notes[start:end]
+ process_batch(batch)
diff --git a/backend/app/db/functions/portfolio_functions.py b/backend/app/db/functions/portfolio_functions.py
index a8a882bd..69203368 100644
--- a/backend/app/db/functions/portfolio_functions.py
+++ b/backend/app/db/functions/portfolio_functions.py
@@ -4,14 +4,14 @@ from backend.app.db.models.portfolio import Portfolio
def aggregate_portfolio_recommendations(
- session, portfolio_id: int, total_valuation_increase: float, labour_days: float
+ session, portfolio_id: int, total_valuation_increase: float, labour_days: float, aggregated_data: dict
):
# Aggregate multiple fields
aggregates = (
session.query(
func.sum(Recommendation.estimated_cost).label("cost"),
func.sum(Recommendation.total_work_hours).label("total_work_hours"),
- func.sum(Recommendation.heat_demand).label("energy_savings"),
+ func.sum(Recommendation.adjusted_heat_demand).label("energy_savings"),
func.sum(Recommendation.co2_equivalent_savings).label("co2_equivalent_savings"),
func.sum(Recommendation.energy_cost_savings).label("energy_cost_savings"),
)
@@ -27,6 +27,7 @@ def aggregate_portfolio_recommendations(
"energy_savings": aggregates.energy_savings or 0,
"co2_equivalent_savings": aggregates.co2_equivalent_savings or 0,
"energy_cost_savings": aggregates.energy_cost_savings or 0,
+ **aggregated_data
}
# Get the portfolio and update the fields
diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py
index 1426e339..b22ce92f 100644
--- a/backend/app/db/functions/recommendations_functions.py
+++ b/backend/app/db/functions/recommendations_functions.py
@@ -85,7 +85,8 @@ def upload_recommendations(session: Session, recommendations_to_upload, property
"co2_equivalent_savings": rec["co2_equivalent_savings"],
"total_work_hours": rec["labour_hours"],
"energy_cost_savings": rec["energy_cost_savings"],
- "labour_days": rec["labour_days"]
+ "labour_days": rec["labour_days"],
+ "already_installed": rec["already_installed"],
}
for rec in recommendations_to_upload
]
diff --git a/backend/app/db/models/non_intrusive_surveys.py b/backend/app/db/models/non_intrusive_surveys.py
new file mode 100644
index 00000000..bc2d8adc
--- /dev/null
+++ b/backend/app/db/models/non_intrusive_surveys.py
@@ -0,0 +1,22 @@
+from sqlalchemy import Column, BigInteger, String, TIMESTAMP, ForeignKey, Integer
+from sqlalchemy.orm import declarative_base
+
+Base = declarative_base()
+
+
+class NonIntrusiveSurvey(Base):
+ __tablename__ = 'non_intrusive_survey'
+
+ id = Column(BigInteger, primary_key=True, autoincrement=True)
+ uprn = Column(Integer, nullable=False)
+ survey_date = Column(TIMESTAMP, nullable=False)
+ surveyor = Column(String, nullable=False)
+
+
+class NonIntrusiveSurveyNotes(Base):
+ __tablename__ = 'non_intrusive_survey_notes'
+
+ id = Column(BigInteger, primary_key=True, autoincrement=True)
+ survey_id = Column(BigInteger, ForeignKey('non_intrusive_survey.id'), nullable=False)
+ title = Column(String, nullable=False)
+ note = Column(String, nullable=False)
diff --git a/backend/app/db/models/portfolio.py b/backend/app/db/models/portfolio.py
index 830866e6..aa0146c0 100644
--- a/backend/app/db/models/portfolio.py
+++ b/backend/app/db/models/portfolio.py
@@ -45,6 +45,21 @@ class Portfolio(Base):
labour_days = Column(Float)
created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
+ # Aggregations for summary
+ epc_breakdown_pre_retrofit = Column(Text)
+ epc_breakdown_post_retrofit = Column(Text)
+ n_units_to_retrofit = Column(Integer)
+ co2_per_unit_pre_retrofit = Column(Text)
+ co2_per_unit_post_retrofit = Column(Text)
+ energy_bill_per_unit_pre_retrofit = Column(Text)
+ energy_bill_per_unit_post_retrofit = Column(Text)
+ energy_consumption_per_unit_pre_retrofit = Column(Text)
+ energy_consumption_per_unit_post_retrofit = Column(Text)
+ valuation_improvement_per_unit = Column(Text)
+ cost_per_unit = Column(Text)
+ cost_per_co2_saved = Column(Text)
+ cost_per_sap_point = Column(Text)
+ valuation_return_on_investment = Column(Text)
class PropertyCreationStatus(enum.Enum):
diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py
index a492f2f2..186f87a8 100644
--- a/backend/app/db/models/recommendations.py
+++ b/backend/app/db/models/recommendations.py
@@ -30,6 +30,7 @@ class Recommendation(Base):
rental_yield_increase = Column(Float)
total_work_hours = Column(Float)
labour_days = Column(Float)
+ already_installed = Column(Boolean, nullable=False, default=False)
class RecommendationMaterials(Base):
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 0b98cf2c..06d1aadf 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -1,3 +1,4 @@
+import json
from datetime import datetime
from tqdm import tqdm
@@ -24,7 +25,7 @@ from backend.app.db.models.portfolio import rating_lookup
from backend.app.dependencies import validate_token
from backend.app.plan.schemas import PlanTriggerRequest
from backend.app.plan.utils import get_cleaned
-from backend.app.utils import epc_to_sap_lower_bound, read_csv_from_s3, sap_to_epc
+from backend.app.utils import epc_to_sap_lower_bound, sap_to_epc
from backend.ml_models.api import ModelApi
from backend.Property import Property
@@ -35,32 +36,186 @@ from recommendations.optimiser.GainOptimiser import GainOptimiser
from recommendations.optimiser.optimiser_functions import prepare_input_measures
from recommendations.Recommendations import Recommendations
from utils.logger import setup_logger
-from utils.s3 import read_dataframe_from_s3_parquet
+from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3
from backend.ml_models.Valuation import PropertyValuation
logger = setup_logger()
BATCH_SIZE = 5
+SCORING_BATCH_SIZE = 400
-def patch_epc(config, epc_records):
+def patch_epc(patch, epc_records):
"""
This utility function is useful to patch the epc data if we have data from the customer
:return:
"""
- number_habitable_rooms = config.get("number-habitable-rooms", None)
- number_heated_rooms = config.get("number-heated-rooms", None)
+ for patch_variable, patch_value in patch.items():
- if number_habitable_rooms is not None:
- epc_records["original_epc"]["number-habitable-rooms"] = int(number_habitable_rooms)
+ if patch_variable in ["address", "postcode"]:
+ continue
- if number_heated_rooms is not None:
- epc_records["original_epc"]["number-heated-rooms"] = int(number_heated_rooms)
+ if patch_value == "":
+ continue
+ if patch_variable in epc_records["original_epc"]:
+ epc_records["original_epc"][patch_variable] = patch_value
return epc_records
+def extract_portfolio_aggregation_data(
+ input_properties, total_valuation_increase, recommendations, new_epc_bands, property_value_increase_ranges
+):
+ # We aggregate a number of metrics for the portfolio:
+ # 1) A breakdown of the number of properties in each EPC band
+ # a) before retrofit
+ # b) after retrofit
+ # 2) Number of units
+ # 3) Co2/unit
+ # a) before retrofit
+ # b) after retrofit
+ # 4) Energy bill/unit
+ # a) before retrofit
+ # b) after retrofit
+ # 5) Average valuation improvement/unit
+ # 6) Total cost
+ # 7) Cost per unit
+ # 8) £ per CO2 saved
+ # 9) £ per SAP point
+
+ # We need to construct the underlyind data for this
+
+ # Helper function to reformat the EPC data
+ def reformat_epc_data(epc_counts):
+ # Define all possible EPC bands in the required order
+ epc_bands = ["G", "F", "E", "D", "C", "B", "A"]
+
+ # Create the formatted data list by checking each band in the order
+ formatted_data = []
+ for band in epc_bands:
+ # Get the count from the dictionary, defaulting to 0 if not present
+ count = epc_counts.get(band, 0)
+ # Append the formatted dictionary to the list
+ formatted_data.append({"name": band, band: count})
+
+ return formatted_data
+
+ n_units = len(input_properties)
+
+ agg_data = []
+ for p in input_properties:
+ # Get the recommendations for the property - we include all properties, even ones without recommendations
+ property_recommendations = recommendations.get(p.id, [])
+
+ # Get just the default recommendations
+ default_recommendations = [r for r in property_recommendations if r["default"]]
+
+ has_recommendations = len(default_recommendations) > 0
+
+ # We can now calculate multiple outputs based on default recommendations
+ carbon_savings = sum([r["co2_equivalent_savings"] for r in default_recommendations])
+
+ pre_retrofit_co2 = p.data["co2-emissions-current"]
+ post_retrofit_co2 = pre_retrofit_co2 - carbon_savings
+
+ pre_retrofit_energy_bill = p.current_energy_bill
+ post_retrofit_energy_bill = p.current_energy_bill - sum(
+ [r["energy_cost_savings"] for r in default_recommendations]
+ )
+
+ pre_retrofit_energy_consumption = p.current_adjusted_energy
+ post_retrofit_energy_consumption = p.current_adjusted_energy - sum(
+ [r["adjusted_heat_demand"] for r in default_recommendations]
+ )
+
+ # Add up energy savings
+ cost = sum([r["total"] for r in default_recommendations])
+ sap_point_improvement = sum([r["sap_points"] for r in default_recommendations])
+
+ lower_bound_valuation_uplift = (
+ property_value_increase_ranges[p.id]["lower_bound_increased_value"] -
+ property_value_increase_ranges[p.id]["current_value"]
+ )
+ upper_bound_valuation_uplift = (
+ property_value_increase_ranges[p.id]["upper_bound_increased_value"] -
+ property_value_increase_ranges[p.id]["current_value"]
+ )
+
+ agg_data.append({
+ "pre_retrofit_epc": p.data["current-energy-rating"],
+ "post_retrofit_epc": new_epc_bands[p.id],
+ "pre_retrofit_co2": pre_retrofit_co2,
+ "post_retrofit_co2": post_retrofit_co2,
+ "pre_retrofit_energy_bill": pre_retrofit_energy_bill,
+ "post_retrofit_energy_bill": post_retrofit_energy_bill,
+ "pre_retrofit_energy_consumption": pre_retrofit_energy_consumption,
+ "post_retrofit_energy_consumption": post_retrofit_energy_consumption,
+ "cost": cost,
+ "sap_point_improvement": sap_point_improvement,
+ "lower_bound_valuation_uplift": lower_bound_valuation_uplift,
+ "upper_bound_valuation_uplift": upper_bound_valuation_uplift,
+ "has_recommendations": has_recommendations
+ })
+
+ agg_data = pd.DataFrame(agg_data)
+
+ n_units_to_retrofit = agg_data["has_recommendations"].sum()
+
+ valuation_improvement_lower_bound_per_unit = (
+ agg_data["lower_bound_valuation_uplift"].mean()
+ )
+ valuation_improvement_upper_bound_per_unit = (
+ agg_data["upper_bound_valuation_uplift"].mean()
+ )
+
+ total_carbon_saved = agg_data["pre_retrofit_co2"].sum() - agg_data["post_retrofit_co2"].sum()
+ total_sap_points = agg_data["sap_point_improvement"].sum()
+
+ def format_money(amount):
+ return f"£{amount:,.0f}"
+
+ valuation_improvment_per_unit = str(
+ format_money(
+ total_valuation_increase / n_units) + (f" ({format_money(valuation_improvement_lower_bound_per_unit)} - "
+ f"{format_money(valuation_improvement_upper_bound_per_unit)})")
+ )
+
+ valuation_return_on_investment = str(
+ str(round(total_valuation_increase / agg_data["cost"].sum(), 2)) +
+ f" ("
+ f"{agg_data['lower_bound_valuation_uplift'].sum() / agg_data['cost'].sum():,.2f} - "
+ f"{agg_data['upper_bound_valuation_uplift'].sum() / agg_data['cost'].sum():,.2f})"
+ )
+
+ aggregation_data = {
+ "epc_breakdown_pre_retrofit": json.dumps(
+ reformat_epc_data(agg_data["pre_retrofit_epc"].value_counts().to_dict())
+ ),
+ "epc_breakdown_post_retrofit": json.dumps(
+ reformat_epc_data(agg_data["post_retrofit_epc"].value_counts().to_dict())
+ ),
+ "number_of_properties": int(n_units),
+ "n_units_to_retrofit": int(n_units_to_retrofit),
+ "co2_per_unit_pre_retrofit": str(round(agg_data["pre_retrofit_co2"].mean(), 2)) + "t",
+ "co2_per_unit_post_retrofit": str(round(agg_data["post_retrofit_co2"].mean(), 2)) + "t",
+ "energy_bill_per_unit_pre_retrofit": format_money(agg_data["pre_retrofit_energy_bill"].mean()),
+ "energy_bill_per_unit_post_retrofit": format_money(agg_data["post_retrofit_energy_bill"].mean()),
+ "energy_consumption_per_unit_pre_retrofit": str(
+ round(agg_data["pre_retrofit_energy_consumption"].mean())) + "kWh",
+ "energy_consumption_per_unit_post_retrofit": str(
+ round(agg_data["post_retrofit_energy_consumption"].mean())) + "kWh",
+ "valuation_improvement_per_unit": valuation_improvment_per_unit,
+ "cost_per_unit": format_money(agg_data["cost"].mean()),
+ "cost_per_co2_saved": format_money(agg_data["cost"].sum() / total_carbon_saved),
+ "cost_per_sap_point": format_money(agg_data["cost"].sum() / total_sap_points),
+ "valuation_return_on_investment": valuation_return_on_investment,
+ # TODO: Could we add 10yr carbon credits value?
+ }
+
+ return aggregation_data
+
+
router = APIRouter(
prefix="/plan",
tags=["plan"],
@@ -78,12 +233,29 @@ async def trigger_plan(body: PlanTriggerRequest):
# TODO: We should store the trigger file path in the database with the plan so we can track the file that
# triggered the plan
- # TODO: Create the ability to congigure/switch off certain measures
+ # TODO: if the measure is already installed, it should actually be the very first phase
try:
session.begin()
logger.info("Getting the inputs")
plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path)
+ # If we have patches or overrides, we should read them in here
+ patches = []
+ if body.patches_file_path:
+ patches = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.patches_file_path)
+
+ already_installed = []
+ if body.already_installed_file_path:
+ already_installed = read_csv_from_s3(
+ bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.already_installed_file_path
+ )
+
+ non_invasive_recommendations = []
+ if body.non_invasive_recommendations_file_path:
+ non_invasive_recommendations = read_csv_from_s3(
+ bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.non_invasive_recommendations_file_path
+ )
+
cleaning_data = read_dataframe_from_s3_parquet(
bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
)
@@ -91,19 +263,25 @@ async def trigger_plan(body: PlanTriggerRequest):
input_properties = []
for config in tqdm(plan_input):
# We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
+ uprn = config.get("uprn", None)
+ if uprn:
+ uprn = int(float(uprn))
epc_searcher = SearchEpc(
address1=config["address"],
postcode=config["postcode"],
+ uprn=uprn,
auth_token=get_settings().EPC_AUTH_TOKEN,
- os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY
+ os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY,
)
- epc_searcher.find_property()
+ epc_searcher.ordnance_survey_client.built_form = config.get("built_form", None)
+ epc_searcher.ordnance_survey_client.property_type = config.get("property_type", None)
+ # For the moment, our OS API access is unavailable, so we skip and interpolate
+ epc_searcher.find_property(skip_os=True)
# Create a record in db
property_id, is_new = create_property(
session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
)
- # if a new record was not created, we don't produduce recommendations
if not is_new:
continue
@@ -120,7 +298,11 @@ async def trigger_plan(body: PlanTriggerRequest):
'full_sap_epc': epc_searcher.full_sap_epc.copy(),
'old_data': epc_searcher.older_epcs.copy(),
}
- epc_records = patch_epc(config, epc_records)
+
+ patch = next((
+ x for x in patches if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
+ ), {})
+ epc_records = patch_epc(patch, epc_records)
prepared_epc = EPCRecord(
epc_records=epc_records,
@@ -128,12 +310,25 @@ async def trigger_plan(body: PlanTriggerRequest):
cleaning_data=cleaning_data
)
+ property_already_installed = next((
+ x for x in already_installed if
+ (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
+ ), {})
+
+ property_non_invasive_recommendations = next((
+ x for x in non_invasive_recommendations if
+ (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
+ ), {})
+
input_properties.append(
Property(
id=property_id,
address=epc_searcher.address_clean,
postcode=epc_searcher.postcode_clean,
epc_record=prepared_epc,
+ already_installed=property_already_installed,
+ non_invasive_recommendations=property_non_invasive_recommendations,
+ **Property.extract_kwargs(config)
)
)
@@ -160,15 +355,13 @@ async def trigger_plan(body: PlanTriggerRequest):
recommendations = {}
recommendations_scoring_data = []
representative_recommendations = {}
- for p in input_properties:
+ for p in tqdm(input_properties):
# Property recommendations
p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
- recommender = Recommendations(property_instance=p, materials=materials)
- # TODO: portfolio id as an input is temp
- print("DELETE PORTFOLIO ID AS AN INPUT!!")
- property_recommendations, property_representative_recommendations = recommender.recommend(body.portfolio_id)
+ recommender = Recommendations(property_instance=p, materials=materials, exclusions=body.exclusions)
+ property_recommendations, property_representative_recommendations = recommender.recommend()
if not property_recommendations:
continue
@@ -187,6 +380,7 @@ async def trigger_plan(body: PlanTriggerRequest):
logger.info("Preparing data for scoring in sap change api")
recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
+
recommendations_scoring_data = recommendations_scoring_data.drop(
columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
"carbon_ending"]
@@ -194,15 +388,26 @@ async def trigger_plan(body: PlanTriggerRequest):
model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
- all_predictions = model_api.predict_all(
- df=recommendations_scoring_data,
- bucket=get_settings().DATA_BUCKET,
- prediction_buckets={
- "sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET,
- "heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET,
- "carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET
- }
- )
+ all_predictions = {
+ "sap_change_predictions": pd.DataFrame(),
+ "heat_demand_predictions": pd.DataFrame(),
+ "carbon_change_predictions": pd.DataFrame()
+ }
+ to_loop_over = range(0, recommendations_scoring_data.shape[0], SCORING_BATCH_SIZE)
+ for chunk in tqdm(to_loop_over, total=len(to_loop_over)):
+ predictions_dict = model_api.predict_all(
+ df=recommendations_scoring_data.iloc[chunk:chunk + SCORING_BATCH_SIZE],
+ bucket=get_settings().DATA_BUCKET,
+ prediction_buckets={
+ "sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET,
+ "heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET,
+ "carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET
+ }
+ )
+
+ # Append the predictions to the predictions dictionary
+ for key, scored in predictions_dict.items():
+ all_predictions[key] = pd.concat([all_predictions[key], scored])
# Insert the predictions into the recommendations and run the optimiser
# TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a
@@ -214,7 +419,13 @@ async def trigger_plan(body: PlanTriggerRequest):
property_instance = [p for p in input_properties if p.id == property_id][0]
- recommendations_with_impact, current_adjusted_energy, expected_adjusted_energy = (
+ (
+ recommendations_with_impact,
+ current_adjusted_energy,
+ expected_adjusted_energy,
+ current_energy_bill,
+ expected_energy_bill
+ ) = (
Recommendations.calculate_recommendation_impact(
property_instance=property_instance,
all_predictions=all_predictions,
@@ -225,10 +436,12 @@ async def trigger_plan(body: PlanTriggerRequest):
# Store the resulting adjusted energy in the property instance
property_instance.set_adjusted_energy(
current_adjusted_energy=current_adjusted_energy,
- expected_adjusted_energy=expected_adjusted_energy
+ expected_adjusted_energy=expected_adjusted_energy,
+ current_energy_bill=current_energy_bill,
+ expected_energy_bill=expected_energy_bill
)
- input_measures = prepare_input_measures(recommendations_with_impact, body.goal, body.housing_type)
+ input_measures = prepare_input_measures(recommendations_with_impact, body.goal)
current_sap_points = int(property_instance.data["current-energy-efficiency"])
target_sap_points = epc_to_sap_lower_bound(body.goal_value)
@@ -256,16 +469,14 @@ async def trigger_plan(body: PlanTriggerRequest):
if any(x in [r["type"] for r in solution] for x in [
"internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation"
]):
- ventilation_rec = [
- r for r in recommendations_with_impact if r[0]["type"] == "mechanical_ventilation"
- ][0]
-
- selected_recommendations = set(
- list(selected_recommendations) + [ventilation_rec[0]["recommendation_id"]]
+ ventilation_rec = next(
+ (r[0] for r in recommendations_with_impact if r[0]["type"] == "mechanical_ventilation"),
+ None
)
- # We check if the selected recommendation is wall ventilation and if so, we make sure
- # mechanical ventilation is selected
+ # If a matching recommendation was found, add its ID to the selected recommendations
+ if ventilation_rec:
+ selected_recommendations.add(ventilation_rec["recommendation_id"])
# We'll use the set of selected recommendations to filter the recommendations to upload
final_recommendations = [
@@ -289,6 +500,8 @@ async def trigger_plan(body: PlanTriggerRequest):
logger.info("Uploading recommendations to the database")
property_valuation_increases = []
session.commit()
+ new_epc_bands = {}
+ property_value_increase_ranges = {}
for i in range(0, len(input_properties), BATCH_SIZE):
try:
# Take a slice of the input_properties list to make a batch
@@ -300,8 +513,10 @@ async def trigger_plan(body: PlanTriggerRequest):
total_sap_points = sum([r["sap_points"] for r in default_recommendations])
new_sap_points = float(p.data["current-energy-efficiency"]) + total_sap_points
new_epc = sap_to_epc(new_sap_points)
+ new_epc_bands[p.id] = new_epc
valuations = PropertyValuation.estimate(property_instance=p, target_epc=new_epc)
+ property_value_increase_ranges[p.id] = valuations
# Your existing operations
property_details_epc = p.get_property_details_epc(
@@ -365,11 +580,20 @@ async def trigger_plan(body: PlanTriggerRequest):
[sum(r["labour_days"] for r in rec_group if r["default"]) for p_id, rec_group in recommendations.items()]
))
+ aggregated_data = extract_portfolio_aggregation_data(
+ input_properties=input_properties,
+ total_valuation_increase=total_valuation_increase,
+ recommendations=recommendations,
+ new_epc_bands=new_epc_bands,
+ property_value_increase_ranges=property_value_increase_ranges
+ )
+
aggregate_portfolio_recommendations(
session,
portfolio_id=body.portfolio_id,
total_valuation_increase=total_valuation_increase,
- labour_days=labour_days
+ labour_days=labour_days,
+ aggregated_data=aggregated_data
)
# Commit final changes
diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py
index 9801375f..59c0ebef 100644
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@@ -1,10 +1,54 @@
-from pydantic import BaseModel
+from pydantic import BaseModel, conlist, validator
+from typing import Optional
class PlanTriggerRequest(BaseModel):
- budget: float | None = None
+ budget: Optional[float] = None
goal: str
housing_type: str
goal_value: str
portfolio_id: int
trigger_file_path: str
+ already_installed_file_path: Optional[str] = None
+ patches_file_path: Optional[str] = None
+ non_invasive_recommendations_file_path: Optional[str] = None
+ exclusions: Optional[conlist(str, min_items=1)] = None
+
+ # Pre-defined list of possibilities for exclusions
+ _allowed_exclusions = {
+ "wall_insulation",
+ "ventilation",
+ "roof_insulation",
+ "floor_insulation",
+ "windows",
+ "fireplace",
+ "heating",
+ "hot_water",
+ "lighting",
+ "solar_pv"
+ }
+
+ _allowed_goals = {"Increase EPC"}
+
+ _allowed_housing_types = {"Social", "Private"}
+
+ # Validator to ensure exclusions are within the pre-defined possibilities
+ @validator('exclusions', each_item=True)
+ def check_exclusions(cls, v):
+ if v not in cls._allowed_exclusions:
+ raise ValueError(f"{v} is not an allowed exclusion")
+ return v
+
+ # Validator to ensure that the goal is within the pre-defined possibilities
+ @validator('goal')
+ def check_goal(cls, v):
+ if v not in cls._allowed_goals:
+ raise ValueError(f"{v} is not a valid goal")
+ return v
+
+ # Validator to ensure that the housing type is within the pre-defined possibilities
+ @validator('housing_type')
+ def check_housing_type(cls, v):
+ if v not in cls._allowed_housing_types:
+ raise ValueError(f"{v} is not a valid housing type")
+ return v
diff --git a/backend/app/utils.py b/backend/app/utils.py
index ba5509e1..b3843206 100644
--- a/backend/app/utils.py
+++ b/backend/app/utils.py
@@ -1,6 +1,4 @@
import boto3
-import csv
-from io import StringIO
import string
import secrets
import logging
@@ -41,25 +39,6 @@ def setup_logger(log_file=None, level=logging.INFO, overwrite_handler=False):
return logger
-def read_csv_from_s3(bucket_name, filepath):
- s3 = boto3.client('s3')
-
- # Get the object from s3
- s3_object = s3.get_object(Bucket=bucket_name, Key=filepath)
-
- # Read the CSV body from the s3 object
- body = s3_object['Body'].read()
-
- # Use StringIO to create a file-like object from the string
- csv_data = StringIO(body.decode('utf-8'))
-
- # Use csv library to read it into a list of dictionaries
- reader = csv.DictReader(csv_data)
- data = list(reader)
-
- return data
-
-
def generate_api_key():
# Define the characters that will be used to generate the api key
characters = string.ascii_letters + string.digits
diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py
index 99fae4db..99d67126 100644
--- a/backend/ml_models/AnnualBillSavings.py
+++ b/backend/ml_models/AnnualBillSavings.py
@@ -10,13 +10,17 @@ class AnnualBillSavings:
AVERAGE_ELECTRICITY_CONSUMPTION = 2700
AVERAGE_GAS_CONSUMPTION = 11500
- # Latest price cap figures from Ofgem are for January 2024
- # https://www.ofgem.gov.uk/publications/changes-energy-price-cap-1-january-2024
- ELECTRICITY_PRICE_CAP = 0.29
- GAS_PRICE_CAP = 0.07
+ # Latest price cap figures from Ofgem are for April 2024
+ # https://www.ofgem.gov.uk/publications/new-energy-price-cap-level-april-june-2024-starts-today
+ ELECTRICITY_PRICE_CAP = 0.245
+ GAS_PRICE_CAP = 0.0604
# This is a weighted mean of the price caps, using the consumption figures above as weights
- PRICE_FACTOR = 0.11183098591549295
+ PRICE_FACTOR = 0.09549999999999999
+
+ # Daily standard charge, based on average across England, Scotland and Wales, and includes VAT
+ DAILY_STANDARD_CHARGE_GAS = 0.3143
+ DAILY_STANDARD_CHARGE_ELECTRICITY = 0.601
EPC_BANDS = ["G", "F", "E", "D", "C", "B", "A"]
@@ -38,6 +42,17 @@ class AnnualBillSavings:
"""
return cls.ELECTRICITY_PRICE_CAP * kwh
+ @classmethod
+ def calculate_annual_bill(cls, kwh):
+ """
+ This method will estimate the total annual bill for a property
+ It assumed gas & electricity are used
+ :param kwh: The total kwh consumption
+ :return: An estimate for annual bill
+ """
+
+ return cls.PRICE_FACTOR * kwh + (cls.DAILY_STANDARD_CHARGE_GAS + cls.DAILY_STANDARD_CHARGE_ELECTRICITY * 365)
+
@classmethod
def adjust_energy_to_metered(cls, epc_energy_consumption, current_epc_rating):
"""
diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py
index 2bb7de32..5c781979 100644
--- a/backend/ml_models/Valuation.py
+++ b/backend/ml_models/Valuation.py
@@ -52,6 +52,33 @@ class PropertyValuation:
10070056829: 76_000,
10070056920: 76_000,
10023345463: 76_000,
+ # IMMO Dudley Pilot - search by going to https://www.zoopla.co.uk/property/uprn/{uprn}/
+ 90070461: 172_000, # Based on Zoopla
+ 90022227: 181_000, # Based on Zoopla
+ 90106884: 180_000, # Based on Zoopla
+ 90051858: 201_000, # Based on Zoopla
+ 90060989: 172_000, # Based on Zoopla
+ 90048026: 196_000, # Based on Zoopla
+ 90077535: 192_000, # Based on Zoopla
+ 90093693: 279_000, # Based on Zoopla
+ 90055152: 149_000, # Based on Zoopla
+ 90028499: 238_000, # Based on Zoopla
+ # IMMO Dudley Pilot 2- search by going to https://www.zoopla.co.uk/property/uprn/{uprn}/
+ 90039318: 177_000, # Based on Zoopla
+ 90038384: 170_000, # Based on Zoopla
+ 90105380: 185_000, # Based on Zoopla
+ 90124001: 165_000, # Based on Zoopla
+ 90013980: 148_000, # Based on Zoopla
+ 90087154: 184_000, # Based on Zoopla
+ 90046817: 167_000, # Based on Zoopla
+ # Goldman Sachs Pilot for inrto - search by going to https://www.zoopla.co.uk/property/uprn/{uprn}/
+ 100070358888: 153_000, # Based on Zoopla
+ 10090436544: 282_000, # Based on Zoopla
+ 100070365751: 177_000, # Based on Zoopla
+ 10095952767: 168_000, # Based on Zoopla
+ 100070520130: 177_000, # Based on Zoopla
+ 100070333957: 185_000, # Based on Zoopla
+ 100070543258: 211_000, # Based on Zoopla
}
# We base our valuation uplifts on a number of sources
@@ -89,6 +116,29 @@ class PropertyValuation:
# {"start": "D", "end": "A", "increase_percentage": 0.017},
]
+ # Found here: https://www.rightmove.co.uk/news/articles/property-news/green-premium-epc-ratings/
+ # F -> C is + 15%
+ # E -> C is +7%
+ # D -> C is +3%
+ RIGHTMOVE_MAPPING = [
+ {"start": "G", "end": "C", "increase_percentage": 0.15},
+ {"start": "G", "end": "B", "increase_percentage": 0.15},
+ {"start": "G", "end": "A", "increase_percentage": 0.15},
+
+ {"start": "F", "end": "C", "increase_percentage": 0.15},
+ {"start": "F", "end": "B", "increase_percentage": 0.15},
+ {"start": "F", "end": "A", "increase_percentage": 0.15},
+
+ {"start": "E", "end": "C", "increase_percentage": 0.07},
+ {"start": "E", "end": "B", "increase_percentage": 0.07},
+ {"start": "E", "end": "A", "increase_percentage": 0.07},
+
+ {"start": "D", "end": "C", "increase_percentage": 0.03},
+ {"start": "D", "end": "B", "increase_percentage": 0.03},
+ {"start": "D", "end": "A", "increase_percentage": 0.03},
+
+ ]
+
EPC_BANDS = ["G", "F", "E", "D", "C", "B", "A"]
@classmethod
@@ -140,14 +190,18 @@ class PropertyValuation:
msm_increase, lloyds_increase = cls.get_increase(epc_band_range)
- # We now use the knight frank and nationwide data to get further valuation evidence, if we have it
+ # We now use the knight frank, nationwide and Rightmove data to get further valuation evidence, if we have it
kf_increase = [x for x in cls.KNIGHT_FRANK_MAPPING if x["start"] == current_epc and x["end"] == target_epc]
nw_increase = [x for x in cls.NATIONWIDE_MAPPING if x["start"] == current_epc and x["end"] == target_epc]
+ rm_increase = [x for x in cls.RIGHTMOVE_MAPPING if x["start"] == current_epc and x["end"] == target_epc]
kf_increase = kf_increase[0]["increase_percentage"] if kf_increase else None
nw_increase = nw_increase[0]["increase_percentage"] if nw_increase else None
+ rm_increase = rm_increase[0]["increase_percentage"] if rm_increase else None
- all_increases = [x for x in [msm_increase, lloyds_increase, kf_increase, nw_increase] if x is not None]
+ all_increases = [
+ x for x in [msm_increase, lloyds_increase, kf_increase, nw_increase, rm_increase] if x is not None
+ ]
max_increase = max(all_increases)
min_increase = min(all_increases)
diff --git a/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py b/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py
new file mode 100644
index 00000000..044cc830
--- /dev/null
+++ b/etl/air_source_heat_pump/AirSourceHeatPumpEfficiency.py
@@ -0,0 +1,114 @@
+import pandas as pd
+from tqdm import tqdm
+from utils.s3 import save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
+from utils.logger import setup_logger
+from etl.epc.settings import EARLIEST_EPC_DATE
+
+logger = setup_logger()
+
+
+class AirSourceHeatPumpEfficiency:
+
+ def __init__(self, file_directories, cleaned_lookup):
+ """
+ :param file_directories: A list of directories where files are stored.
+ :param cleaned_lookup: A dictionary containing cleaned lookup data.
+ """
+ self.file_directories = file_directories
+ self.cleaned_lookup = cleaned_lookup
+
+ self.results = []
+
+ def create_dataset(self):
+ logger.info("Creating solar photo supply dataset")
+
+ all_counts = []
+ for dir in tqdm(self.file_directories):
+ filepath = dir / "certificates.csv"
+ df = pd.read_csv(filepath, low_memory=False)
+ df = df[~pd.isnull(df["UPRN"])]
+ df["UPRN"] = df["UPRN"].astype(int).astype(str)
+ # Take entries after SAP12
+ df["LODGEMENT_DATE"] = pd.to_datetime(df["LODGEMENT_DATE"])
+ df = df[df["LODGEMENT_DATE"] > EARLIEST_EPC_DATE]
+
+ df = df[
+ ~df["TENURE"].isin(
+ [
+ "unknown",
+ "Not defined - use in the case of a new dwelling for which the intended tenure in not known. "
+ "It is not to be used for an existing dwelling"
+ ]
+ )
+ ]
+
+ # Take entries that contain an air source heat pump
+ df = df[
+ df["MAINHEAT_DESCRIPTION"].str.contains("air source heat pump", case=False, na=False)
+ ]
+
+ # Drop rows that have a missing PROPERTY_TYPE, BUILT_FORM, CONSTRUCTION_AGE_BAND, TOTAL_FLOOR_AREA
+ for col in ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA"]:
+ df = df[~pd.isnull(df[col])]
+ # Get the columns we're interested in
+ df = df[
+ [
+ "PROPERTY_TYPE",
+ "BUILT_FORM",
+ "MAINHEAT_DESCRIPTION",
+ "MAINHEAT_ENERGY_EFF",
+ "MAINHEATCONT_DESCRIPTION",
+ "MAINHEATC_ENERGY_EFF",
+ "MAIN_FUEL",
+ "HOTWATER_DESCRIPTION",
+ "HOT_WATER_ENERGY_EFF",
+ "MAINS_GAS_FLAG"
+ ]
+ ]
+
+ counts = df.groupby(
+ [
+ "PROPERTY_TYPE",
+ "BUILT_FORM",
+ "MAINHEAT_DESCRIPTION",
+ "MAINHEAT_ENERGY_EFF",
+ "MAINHEATCONT_DESCRIPTION",
+ "MAINHEATC_ENERGY_EFF",
+ "MAIN_FUEL",
+ "HOTWATER_DESCRIPTION",
+ "HOT_WATER_ENERGY_EFF",
+ "MAINS_GAS_FLAG"
+ ]
+ ).size().reset_index(name="count")
+
+ all_counts.append(counts)
+
+ all_counts = pd.concat(all_counts)
+
+ all_counts_agg = all_counts.groupby(
+ [
+ "PROPERTY_TYPE",
+ "BUILT_FORM",
+ "MAINHEAT_DESCRIPTION",
+ "MAINHEAT_ENERGY_EFF",
+ "MAINHEATCONT_DESCRIPTION",
+ "MAINHEATC_ENERGY_EFF",
+ "MAIN_FUEL",
+ "HOTWATER_DESCRIPTION",
+ "HOT_WATER_ENERGY_EFF",
+ "MAINS_GAS_FLAG"
+ ]
+ )["count"].sum().reset_index()
+
+ all_counts_agg.groupby("PROPERTY_TYPE")["count"].sum()
+ # In houses, 68% of the cases where we see air source heat pumps are in detached and semi-detached houses
+ all_counts_agg[all_counts_agg["PROPERTY_TYPE"] == "House"]["BUILT_FORM"].value_counts(normalize=True)
+
+ all_counts_agg[all_counts_agg["PROPERTY_TYPE"] == "Flat"]["BUILT_FORM"].value_counts()
+
+ # In Bungalows, 74% of cases where we see air source heat pumps are in detached and semi-detached houses
+ all_counts_agg[all_counts_agg["PROPERTY_TYPE"] == "Bungalow"]["BUILT_FORM"].value_counts(normalize=True)
+
+ # TODO: Research options for mid and end-terrace houses
+ # TODO: Research the options for flats - we see them appear in flats, but practically speaking, how does the
+ # install process work?
diff --git a/etl/air_source_heat_pump/app.py b/etl/air_source_heat_pump/app.py
new file mode 100644
index 00000000..ac87b34b
--- /dev/null
+++ b/etl/air_source_heat_pump/app.py
@@ -0,0 +1,24 @@
+from pathlib import Path
+from backend.app.plan.utils import get_cleaned
+from etl.air_source_heat_pump.AirSourceHeatPumpEfficiency import AirSourceHeatPumpEfficiency
+
+DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"
+
+
+def app():
+ """
+ This code reads in the EPC dataset and looks at the efficiency values for heating systems that inclue air source
+ heat pumps. This dataset is then used to inform the recommendations for the air source heat pump, so we know
+ how to set the simulation
+ :return:
+ """
+
+ directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
+ cleaned_lookup = get_cleaned()
+
+ ashp_data_client = AirSourceHeatPumpEfficiency(
+ file_directories=directories,
+ cleaned_lookup=cleaned_lookup
+ )
+
+ ashp_data_client.create_dataset()
diff --git a/etl/customers/gla_croydon_demo/asset_list.py b/etl/customers/gla_croydon_demo/asset_list.py
new file mode 100644
index 00000000..52e9422c
--- /dev/null
+++ b/etl/customers/gla_croydon_demo/asset_list.py
@@ -0,0 +1,208 @@
+import pandas as pd
+from utils.s3 import save_csv_to_s3
+
+USER_ID = 8
+PORTFOLIO_ID = 67
+
+archetype_1_uprns = [100020604138, 200001188299, 100020578756, 200001187196, 200001192253, 100020581792, 200001188304,
+ 100020625813, 100020618060, 100020585305, 100020617489, 100020615039, 100020618076, 100020588913,
+ 200001187197, 100020671205, 100020576940, 100020619814, 100020576472, 100020618083]
+archetype_2_uprns = [100020698027, 10001007455, 100020653785, 10090383198, 100020665632, 100020620659, 100020615603,
+ 100020609610, 100020625597, 100020665656, 100020665640, 100020587905, 100020665630, 100020624351,
+ 100020625451, 100020624348, 100020666735, 100020653786, 100020576458, 100020657902, 100020624350,
+ 100020637405, 100020666734, 100020616325, 100020666716, 100020653783, 100020665645, 100020642337,
+ 100020665638, 100022904981, 100020688226, 100020630285, 100020626800, 100020665634, 100022907528,
+ 100020665652, 100020624347, 100020666721, 100020585002, 10014055968, 10001008257, 100020621438,
+ 100020576459, 100020665643, 100020665654, 100022917303]
+archetype_3_uprns = [100020577523, 100020616446, 100020605342, 100020594652, 100020585394, 100020601138, 100020597485,
+ 100020614883, 100020633162, 100020697787, 200001185785, 100020646842, 100020581449, 100020595611,
+ 100020641814, 100020575611, 100020652986, 100020654671, 100020647336, 100020610518, 100020607980,
+ 100020692380, 100020581690]
+archetype_4_uprns = [100020650603, 100020582907, 100020605116, 100020650607, 100020589325, 100020655500, 100020642537,
+ 200001187539, 100020631683, 100020610165, 100020596436, 100020598277, 100020660228]
+
+
+def app():
+ """
+ We shall define a small portfolio of properties, based in Croydon
+ :return:
+ """
+
+ # Firstly, read in the EPC data for Croydon
+ epc_data = pd.read_csv(
+ "local_data/all-domestic-certificates/domestic-E09000008-Croydon/certificates.csv",
+ low_memory=False
+ )
+
+ # Filter on entries where we have a UPRN
+ epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
+
+ # Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this
+ epc_data["LODGEMENT_DATE"] = pd.to_datetime(epc_data["LODGEMENT_DATE"])
+
+ epc_data = epc_data.sort_values("LODGEMENT_DATE", ascending=False).drop_duplicates("UPRN")
+
+ # Now filter on social properties
+ epc_data = epc_data[epc_data["TENURE"].isin(["rental (social)", "Rented (social)"])]
+ # There are 17337 properties with a registered EPC in Croydon
+ # Take below EPC C properties
+ epc_data = epc_data[epc_data["CURRENT_ENERGY_EFFICIENCY"].astype(int) < 69]
+ # 7994 properties are below EPC C (46%)
+
+ # 79% D, 19% E, 1% F, 0.2% G - it probably makes the most sense to focus on E and D properties
+ epc_data["CURRENT_ENERGY_RATING"].value_counts(normalize=True)
+
+ # For the purpose of the sample, take the properties have surveys done in the last 3 years
+ # This gives us 1351 remaining properties
+ three_years_ago = pd.Timestamp.now() - pd.DateOffset(days=int(3 * 365))
+ epc_data = epc_data[epc_data["LODGEMENT_DATE"] >= three_years_ago]
+
+ # Archetype 1: defined below:
+ # 1) House
+ # 2) Unfilled cavity
+ # 3) A roof that could be insulated (flat or pitched with no more than 50mm insulation)
+ # 4) EPC E or D
+ # 24 properties
+ archetype_1_sample = epc_data[
+ epc_data["PROPERTY_TYPE"].isin(["House"]) &
+ (epc_data["CURRENT_ENERGY_RATING"].isin(["D", "E"])) &
+ epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"]) &
+ epc_data["ROOF_DESCRIPTION"].isin(
+ [
+ "Pitched, 12 mm loft insulation",
+ "Pitched, 0 mm loft insulation",
+ "Pitched, no insulation",
+ "Pitched, 50 mm loft insulation",
+ "Flat, no insulation (assumed)",
+ "Pitched, no insulation (assumed)"
+ ]
+ )
+ ]
+ archetype_1_sample_asset_list = archetype_1_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
+ archetype_1_sample_asset_list["ARCHETYPE"] = "Archetype 1"
+
+ # Archetype 2: defined below:
+ # 1) Flat
+ # 2) Unfilled cavity
+ # 3) Another property above
+ # 4) EPC E
+ # 57 properties here
+ archetype_2_sample = epc_data[
+ epc_data["PROPERTY_TYPE"].isin(["Flat"]) &
+ (epc_data["CURRENT_ENERGY_RATING"].isin(["E", "D"])) &
+ epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"]) &
+ epc_data["ROOF_DESCRIPTION"].isin(
+ [
+ "(another dwelling above)"
+ ]
+ )
+ ]
+ archetype_2_sample_asset_list = archetype_2_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
+ archetype_2_sample_asset_list["ARCHETYPE"] = "Archetype 2"
+
+ # Archetype 3: defined below:
+ # 1) EPC E or below
+ # 2) Solid brick wall
+ # 3) House
+ # 4) Pitched roof with no insulation
+ # Just 7 properties (more expensive to retrofit)
+ archetype_3_sample = epc_data[
+ epc_data["PROPERTY_TYPE"].isin(["House"]) &
+ (epc_data["CURRENT_ENERGY_RATING"].isin(["E", "F", "G"])) &
+ epc_data["WALLS_DESCRIPTION"].isin(["Solid brick, as built, no insulation (assumed)"]) &
+ epc_data["ROOF_DESCRIPTION"].isin(
+ [
+ "Pitched, no insulation",
+ "Pitched, limited insulation (assumed)",
+ "Pitched, 100 mm loft insulation",
+ "Pitched, no insulation (assumed)",
+ ]
+ )
+ ]
+ archetype_3_sample_asset_list = archetype_3_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
+ archetype_3_sample_asset_list["ARCHETYPE"] = "Archetype 3"
+
+ # Archetype 4: defined below:
+ # 1) Maisonette
+ # 2) Empty cavity
+ # 3) EPC E
+ # 16 properties here
+ archetype_4_sample = epc_data[
+ epc_data["PROPERTY_TYPE"].isin(["Maisonette"]) &
+ epc_data["WALLS_DESCRIPTION"].isin(
+ ["Cavity wall, as built, no insulation (assumed)"]
+ )
+ ]
+
+ archetype_4_sample_asset_list = archetype_4_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
+ archetype_4_sample_asset_list["ARCHETYPE"] = "Archetype 4"
+
+ asset_list = pd.concat(
+ [
+ archetype_1_sample_asset_list,
+ archetype_2_sample_asset_list,
+ archetype_3_sample_asset_list,
+ archetype_4_sample_asset_list
+ ]
+ )
+
+ asset_list = asset_list.rename(
+ columns={
+ "UPRN": "uprn",
+ "ADDRESS1": "address",
+ "POSTCODE": "postcode",
+ "ARCHETYPE": "archetype"
+ }
+ )
+
+ asset_list["uprn"] = asset_list["uprn"].astype(int)
+
+ # We end up with some properties that are currently an EPC C, but we do not have this data in the download, so we
+ # manually remove
+ # 1) 3 Reid Close, CR5 3BL
+ # 2) Flat 6, Collier Court 2A, St. Peters Road CR0 1HD
+ asset_list = asset_list[
+ ~asset_list["uprn"].isin(
+ [
+ 100020576460,
+ 100020624352,
+ ]
+ )
+ ]
+ # We have slightly too many properties, so we take a random sample of each archetype
+ # achetype_1_size = 20
+ # achetype_2_size = 46
+ # achetype_3_size = 23
+ # achetype_4_size = 13
+ # archetype_1_uprns = asset_list[asset_list["archetype"] == "Archetype 1"]["uprn"].sample(
+ # int(achetype_1_size)
+ # ).tolist()
+ # archetype_2_uprns = asset_list[asset_list["archetype"] == "Archetype 2"]["uprn"].sample(
+ # int(achetype_2_size)
+ # ).tolist()
+ # archetype_3_uprns = asset_list[asset_list["archetype"] == "Archetype 3"]["uprn"].sample(
+ # int(achetype_3_size)
+ # ).tolist()
+ # archetype_4_uprns = asset_list[asset_list["archetype"] == "Archetype 4"]["uprn"].sample(
+ # int(achetype_4_size)
+ # ).tolist()
+ uprns_to_keep = archetype_1_uprns + archetype_2_uprns + archetype_3_uprns + archetype_4_uprns
+ asset_list = asset_list[asset_list["uprn"].isin(uprns_to_keep)]
+
+ filename = f"{USER_ID}/{PORTFOLIO_ID}/inputs.csv"
+ save_csv_to_s3(
+ dataframe=asset_list,
+ bucket_name="retrofit-plan-inputs-dev",
+ file_name=filename
+ )
+
+ body = {
+ "portfolio_id": str(PORTFOLIO_ID),
+ "housing_type": "Social",
+ "goal": "Increase EPC",
+ "goal_value": "C",
+ "trigger_file_path": filename,
+ "budget": None,
+ "exclusions": ["floor_insulation"]
+ }
+ print(body)
diff --git a/etl/customers/gla_croydon_demo/slides.py b/etl/customers/gla_croydon_demo/slides.py
new file mode 100644
index 00000000..9f791bbd
--- /dev/null
+++ b/etl/customers/gla_croydon_demo/slides.py
@@ -0,0 +1,760 @@
+"""
+This script contains the code to generate the data required to populate the slides
+We connect to the database amd extract the data for the portfolio needed so it is recommended to use
+a environment akin to the backend to run this script
+"""
+import pandas as pd
+import numpy as np
+from backend.app.db.connection import db_engine
+from sqlalchemy.orm import sessionmaker
+from utils.s3 import read_csv_from_s3
+from etl.customers.slide_utils import (
+ plot_epc_distribution,
+ get_property_details_by_portfolio_id,
+ get_plan_by_portfolio_id,
+ get_properties_with_default_recommendations,
+ create_powerpoint,
+ create_recommendations_summary
+)
+from backend.ml_models.AnnualBillSavings import AnnualBillSavings
+
+USER_ID = 8
+PORTFOLIO_ID_1 = 67
+PORTFOLIO_ID_2 = 68
+EPC_TARGET_1 = "C"
+EPC_TARGET_2 = "A"
+SAP_TARGET_1 = 69
+SAP_TARGET_2 = 100
+CUSTOMER_KEY = "gla-demo"
+
+# Sample UPRNS
+archetype_1_sample = ['100020604138', '200001192253', '100020581792', '100020576940', '200001187196', '100020618060',
+ '100020625813', '100020578756', '100020618076', '200001187197', '100020619814', '100020617489',
+ '100020588913']
+
+archetype_2_sample = ['100020585002', '100020615603', '100020665652', '100020626800', '100020624347', '100020624348',
+ '100020576459', '10001007455', '100020666716', '100020609610', '100020625451', '100020625597',
+ '100020624351', '100020665634', '100020624350', '100020665640', '100020665632', '100022917303',
+ '100020665656', '10014055968', '100020630285', '100020665638', '100020616325', '100020637405',
+ '100020698027', '100020657902', '100020688226', '100020653786', '100020642337', '100020665643']
+
+archetype_3_sample = ['100020594652', '100020697787', '100020577523', '100020633162', '100020601138', '100020595611',
+ '100020597485', '100020614883', '100020605342', '100020654671', '100020575611', '100020607980',
+ '200001185785', '100020616446', '100020692380']
+
+archetype_4_sample = ['100020596436', '100020610165', '200001187539', '100020655500', '100020582907', '100020598277',
+ '100020650607', '100020605116', '100020650603']
+
+
+def scenario_1():
+ # Connect to database
+ session = sessionmaker(bind=db_engine)()
+
+ ########################################################################
+ # Get the data we need
+ ########################################################################
+
+ portfolio_id = PORTFOLIO_ID_1
+
+ # Get the asset list
+ asset_list = read_csv_from_s3(
+ "retrofit-plan-inputs-dev", f"{USER_ID}/67/inputs.csv"
+ )
+ asset_list = pd.DataFrame(asset_list)
+
+ # Get the properties for the portfolio
+ properties = get_properties_with_default_recommendations(session, portfolio_id)
+ properties_df = pd.DataFrame(properties)
+
+ # We now pull the data for the property details
+ property_details = get_property_details_by_portfolio_id(session, portfolio_id)
+ property_details_df = pd.DataFrame(property_details)
+ # We estimate bills based on the adjusted_energy_consumption
+ property_details_df["energy_bill"] = property_details_df["adjusted_energy_consumption"].apply(
+ lambda x: AnnualBillSavings.calculate_annual_bill(x)
+ )
+ # Merge on uprn
+ property_details_df = property_details_df.merge(
+ properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
+ on="property_id"
+ )
+
+ plans = get_plan_by_portfolio_id(session, portfolio_id)
+ plans_df = pd.DataFrame(plans)
+
+ # Unnest the recommendations. Each recommendation is a list of dictionaries
+ recommendations_exploded = properties_df["recommendations"].explode().tolist()
+ recommendations_df = pd.DataFrame([r for r in recommendations_exploded if not pd.isnull(r)])
+ # Add uprn on
+ recommendations_df = recommendations_df.merge(
+ properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
+ how="left",
+ on="property_id"
+ )
+
+ recommendations_summary = create_recommendations_summary(
+ recommendations_df,
+ properties_df,
+ property_details_df,
+ SAP_TARGET_1
+ )
+
+ # Calculate % changes of energ, co2 and abs
+ recommendations_summary["carbon_percent_change"] = (
+ recommendations_summary["total_carbon"] / recommendations_summary["current_co2"]
+ )
+
+ recommendations_summary["energy_percent_change"] = (
+ recommendations_summary["adjusted_heat_demand"] / recommendations_summary["current_energy"]
+ )
+
+ recommendations_summary["bills_percent_change"] = (
+ recommendations_summary["total_bill_savings"] / recommendations_summary["current_energy_bill"]
+ )
+
+ ########################
+ # Overview
+ ########################
+ overview_totals = recommendations_summary.sum()
+ overview_means = recommendations_summary.mean()
+
+ ########################
+ # Measures
+ ########################
+ measures_count = recommendations_df.groupby("type")["id"].count().reset_index()
+ wall_insulation_measures = measures_count[
+ measures_count["type"].isin(["cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation"])
+ ]["id"].sum()
+ ventilation_measures = measures_count[
+ measures_count["type"].isin(["mechanical_ventilation"])
+ ]["id"].sum()
+ roof_insulation_measures = measures_count[
+ measures_count["type"].isin(["loft_insulation", "flat_roof_insulation"])
+ ]["id"].sum()
+ floor_insulation_measures = measures_count[
+ measures_count["type"].isin(["solid_floor_insulation", "suspended_floor_insulation"])
+ ]["id"].sum()
+ windows = measures_count[
+ measures_count["type"].isin(["windows_glazing"])
+ ]["id"].sum()
+ heating = measures_count[
+ measures_count["type"].isin(["heating"])
+ ]["id"].sum()
+ heating_controls = measures_count[
+ measures_count["type"].isin(["heating_control"])
+ ]["id"].sum()
+ solar = measures_count[
+ measures_count["type"].isin(["solar_pv"])
+ ]["id"].sum()
+ other = measures_count[
+ ~measures_count["type"].isin([
+ "cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation",
+ "loft_insulation", "flat_roof_insulation", "solid_floor_insulation",
+ "suspended_floor_insulation", "windows_glazing", "heating", "heating_control", "solar_pv",
+ "mechanical_ventilation"
+ ])
+ ]["id"].sum()
+
+ # Summary information by each archetype
+ ########################
+ # Archetype 1
+ ########################
+ archetype_1 = asset_list[asset_list["archetype"] == "Archetype 1"]
+ recommendations_arch_1_summary = recommendations_summary[
+ recommendations_summary["uprn"].astype(str).isin(archetype_1["uprn"].values)
+ ]
+
+ arch_1_property_details = property_details_df[
+ property_details_df["uprn"].astype(str).isin(archetype_1["uprn"].values)
+ ]
+ arch_1_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
+
+ # Take the mean, median and maximum of each value
+ cols_to_keep = ["total_cost", "total_carbon", "total_bill_savings", "total_sap_points", "adjusted_heat_demand",
+ "energy_percent_change", "carbon_percent_change", "bills_percent_change"]
+ arch_1_recommendation_min = recommendations_arch_1_summary.min()[cols_to_keep]
+ arch_1_recommendation_max = recommendations_arch_1_summary.max()[cols_to_keep]
+ arch_1_recommendation_means = recommendations_arch_1_summary.mean()[cols_to_keep]
+ arch_1_totals = recommendations_arch_1_summary.sum()[cols_to_keep]
+
+ annual_total_co2 = recommendations_arch_1_summary["total_carbon"].sum()
+ annual_total_bills = recommendations_arch_1_summary["total_bill_savings"].sum()
+ annual_total_energy_savings = recommendations_arch_1_summary["adjusted_heat_demand"].sum()
+ archetype_measures = \
+ recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_1["uprn"].values)].groupby("type")[
+ "id"].count().reset_index()
+
+ cost_text = (f"{round(arch_1_recommendation_means['total_cost'], 2)}: "
+ f"{arch_1_recommendation_min['total_cost']} - {arch_1_recommendation_max['total_cost']}")
+
+ sap_text = (f"{round(arch_1_recommendation_means['total_sap_points'], 2)}: "
+ f"{arch_1_recommendation_min['total_sap_points']} - {arch_1_recommendation_max['total_sap_points']}")
+
+ energy_text = (f"{round(arch_1_recommendation_means['adjusted_heat_demand'], 2)}: "
+ f"{arch_1_recommendation_min['adjusted_heat_demand']} - "
+ f"{arch_1_recommendation_max['adjusted_heat_demand']}")
+
+ energy_percent_text = (f"{round(arch_1_recommendation_means['energy_percent_change'], 2)}: "
+ f"{arch_1_recommendation_min['energy_percent_change']} - "
+ f"{arch_1_recommendation_max['energy_percent_change']}")
+
+ carbon_text = (f"{round(arch_1_recommendation_means['total_carbon'], 2)}: "
+ f"{arch_1_recommendation_min['total_carbon']} - {arch_1_recommendation_max['total_carbon']}")
+
+ carbon_percent_text = (f"{round(arch_1_recommendation_means['carbon_percent_change'], 2)}: "
+ f"{arch_1_recommendation_min['carbon_percent_change']} - "
+ f"{arch_1_recommendation_max['carbon_percent_change']}")
+
+ bill_text = (f"{round(arch_1_recommendation_means['total_bill_savings'], 2)}: "
+ f"{arch_1_recommendation_min['total_bill_savings']} - "
+ f"{arch_1_recommendation_max['total_bill_savings']}")
+
+ bill_percent_text = (f"{round(arch_1_recommendation_means['bills_percent_change'], 2)}: "
+ f"{arch_1_recommendation_min['bills_percent_change']} - "
+ f"{arch_1_recommendation_max['bills_percent_change']}")
+
+ ########################
+ # Archetype 2
+ ########################
+ archetype_2 = asset_list[asset_list["archetype"] == "Archetype 2"]
+ recommendations_arch_2_summary = recommendations_summary[
+ recommendations_summary["uprn"].astype(str).isin(archetype_2["uprn"].values)
+ ]
+
+ arch_2_property_details = property_details_df[
+ property_details_df["uprn"].astype(str).isin(archetype_2["uprn"].values)
+ ]
+ arch_2_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
+
+ # Take the mean, median and maximum of each value
+ arch_2_recommendation_min = recommendations_arch_2_summary.min()
+ arch_2_recommendation_max = recommendations_arch_2_summary.max()
+ arch_2_recommendation_means = recommendations_arch_2_summary.mean().round(2)
+
+ total_cost = recommendations_arch_2_summary["total_cost"].sum()
+ annual_total_co2 = recommendations_arch_2_summary["total_carbon"].sum()
+ annual_total_bills = recommendations_arch_2_summary["total_bill_savings"].sum()
+ annual_total_energy_savings = recommendations_arch_2_summary["adjusted_heat_demand"].sum()
+ archetype_measures = \
+ recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_2["uprn"].values)].groupby("type")[
+ "id"].count().reset_index()
+
+ cost_text = (f"{round(arch_2_recommendation_means['total_cost'], 2)}: "
+ f"{arch_2_recommendation_min['total_cost']} - {arch_2_recommendation_max['total_cost']}")
+
+ sap_text = (f"{round(arch_2_recommendation_means['total_sap_points'], 2)}: "
+ f"{arch_2_recommendation_min['total_sap_points']} - {arch_2_recommendation_max['total_sap_points']}")
+
+ energy_text = (f"{round(arch_2_recommendation_means['adjusted_heat_demand'], 2)}: "
+ f"{arch_2_recommendation_min['adjusted_heat_demand']} - "
+ f"{arch_2_recommendation_max['adjusted_heat_demand']}")
+
+ energy_percent_text = (f"{round(arch_2_recommendation_means['energy_percent_change'], 2)}: "
+ f"{arch_2_recommendation_min['energy_percent_change']} - "
+ f"{arch_2_recommendation_max['energy_percent_change']}")
+
+ carbon_text = (f"{round(arch_2_recommendation_means['total_carbon'], 2)}: "
+ f"{arch_2_recommendation_min['total_carbon']} - {arch_2_recommendation_max['total_carbon']}")
+
+ carbon_percent_text = (f"{round(arch_2_recommendation_means['carbon_percent_change'], 2)}: "
+ f"{arch_2_recommendation_min['carbon_percent_change']} - "
+ f"{arch_2_recommendation_max['carbon_percent_change']}")
+
+ bill_text = (f"{round(arch_2_recommendation_means['total_bill_savings'], 2)}: "
+ f"{arch_2_recommendation_min['total_bill_savings']} - "
+ f"{arch_2_recommendation_max['total_bill_savings']}")
+
+ bill_percent_text = (f"{round(arch_2_recommendation_means['bills_percent_change'], 2)}: "
+ f"{arch_2_recommendation_min['bills_percent_change']} - "
+ f"{arch_2_recommendation_max['bills_percent_change']}")
+
+ ########################
+ # Archetype 3
+ ########################
+ archetype_3 = asset_list[asset_list["archetype"] == "Archetype 3"]
+ recommendations_arch_3_summary = recommendations_summary[
+ recommendations_summary["uprn"].astype(str).isin(archetype_3["uprn"].values)
+ ]
+
+ arch_3_property_details = property_details_df[
+ property_details_df["uprn"].astype(str).isin(archetype_3["uprn"].values)
+ ]
+ arch_3_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
+
+ # Take the mean, median and maximum of each value
+ arch_3_recommendation_min = recommendations_arch_3_summary.min()
+ arch_3_recommendation_max = recommendations_arch_3_summary.max()
+ arch_3_recommendation_means = recommendations_arch_3_summary.mean()
+
+ total_cost = recommendations_arch_3_summary["total_cost"].sum()
+ annual_total_co2 = recommendations_arch_3_summary["total_carbon"].sum()
+ annual_total_bills = recommendations_arch_3_summary["total_bill_savings"].sum()
+ annual_total_energy_savings = recommendations_arch_3_summary["adjusted_heat_demand"].sum()
+ archetype_measures = \
+ recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_3["uprn"].values)].groupby("type")[
+ "id"].count().reset_index()
+
+ cost_text = (f"{round(arch_3_recommendation_means['total_cost'], 2)}: "
+ f"{arch_3_recommendation_min['total_cost']} - {arch_3_recommendation_max['total_cost']}")
+
+ sap_text = (f"{round(arch_3_recommendation_means['total_sap_points'], 2)}: "
+ f"{arch_3_recommendation_min['total_sap_points']} - {arch_3_recommendation_max['total_sap_points']}")
+
+ energy_text = (f"{round(arch_3_recommendation_means['adjusted_heat_demand'], 2)}: "
+ f"{arch_3_recommendation_min['adjusted_heat_demand']} - "
+ f"{arch_3_recommendation_max['adjusted_heat_demand']}")
+
+ energy_percent_text = (f"{round(arch_3_recommendation_means['energy_percent_change'], 2)}: "
+ f"{arch_3_recommendation_min['energy_percent_change']} - "
+ f"{arch_3_recommendation_max['energy_percent_change']}")
+
+ carbon_text = (f"{round(arch_3_recommendation_means['total_carbon'], 2)}: "
+ f"{arch_3_recommendation_min['total_carbon']} - {arch_3_recommendation_max['total_carbon']}")
+
+ carbon_percent_text = (f"{round(arch_3_recommendation_means['carbon_percent_change'], 2)}: "
+ f"{arch_3_recommendation_min['carbon_percent_change']} - "
+ f"{arch_3_recommendation_max['carbon_percent_change']}")
+
+ bill_text = (f"{round(arch_3_recommendation_means['total_bill_savings'], 2)}: "
+ f"{arch_3_recommendation_min['total_bill_savings']} - "
+ f"{arch_3_recommendation_max['total_bill_savings']}")
+
+ bill_percent_text = (f"{round(arch_3_recommendation_means['bills_percent_change'], 2)}: "
+ f"{arch_3_recommendation_min['bills_percent_change']} - "
+ f"{arch_3_recommendation_max['bills_percent_change']}")
+
+ ########################
+ # Archetype 4
+ ########################
+ archetype_4 = asset_list[asset_list["archetype"] == "Archetype 4"]
+ recommendations_arch_4_summary = recommendations_summary[
+ recommendations_summary["uprn"].astype(str).isin(archetype_4["uprn"].values)
+ ]
+
+ arch_4_property_details = property_details_df[
+ property_details_df["uprn"].astype(str).isin(archetype_4["uprn"].values)
+ ]
+ arch_4_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
+
+ # Take the mean, median and maximum of each value
+ arch_4_recommendation_min = recommendations_arch_4_summary.min()
+ arch_4_recommendation_max = recommendations_arch_4_summary.max()
+ arch_4_recommendation_means = recommendations_arch_4_summary.mean()
+
+ total_cost = recommendations_arch_4_summary["total_cost"].sum()
+ annual_total_co2 = recommendations_arch_4_summary["total_carbon"].sum()
+ annual_total_bills = recommendations_arch_4_summary["total_bill_savings"].sum()
+ annual_total_energy_savings = recommendations_arch_4_summary["adjusted_heat_demand"].sum()
+ archetype_measures = \
+ recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_4["uprn"].values)].groupby("type")[
+ "id"].count().reset_index()
+
+ cost_text = (f"{round(arch_4_recommendation_means['total_cost'], 2)}: "
+ f"{arch_4_recommendation_min['total_cost']} - {arch_4_recommendation_max['total_cost']}")
+
+ sap_text = (f"{round(arch_4_recommendation_means['total_sap_points'], 2)}: "
+ f"{arch_4_recommendation_min['total_sap_points']} - {arch_4_recommendation_max['total_sap_points']}")
+
+ energy_text = (f"{round(arch_4_recommendation_means['adjusted_heat_demand'], 2)}: "
+ f"{arch_4_recommendation_min['adjusted_heat_demand']} - "
+ f"{arch_4_recommendation_max['adjusted_heat_demand']}")
+
+ energy_percent_text = (f"{round(arch_4_recommendation_means['energy_percent_change'], 2)}: "
+ f"{arch_4_recommendation_min['energy_percent_change']} - "
+ f"{arch_4_recommendation_max['energy_percent_change']}")
+
+ carbon_text = (f"{round(arch_4_recommendation_means['total_carbon'], 2)}: "
+ f"{arch_4_recommendation_min['total_carbon']} - {arch_4_recommendation_max['total_carbon']}")
+
+ carbon_percent_text = (f"{round(arch_4_recommendation_means['carbon_percent_change'], 2)}: "
+ f"{arch_4_recommendation_min['carbon_percent_change']} - "
+ f"{arch_4_recommendation_max['carbon_percent_change']}")
+
+ bill_text = (f"{round(arch_4_recommendation_means['total_bill_savings'], 2)}: "
+ f"{arch_4_recommendation_min['total_bill_savings']} - "
+ f"{arch_4_recommendation_max['total_bill_savings']}")
+
+ bill_percent_text = (f"{round(arch_4_recommendation_means['bills_percent_change'], 2)}: "
+ f"{arch_4_recommendation_min['bills_percent_change']} - "
+ f"{arch_4_recommendation_max['bills_percent_change']}")
+
+ ########################
+ # Overview
+ ########################
+ overview_totals = recommendations_summary.sum()
+
+
+def make_sample():
+ # sample_proportion = 67 / 102
+ # Get the asset list
+ asset_list = read_csv_from_s3(
+ "retrofit-plan-inputs-dev", f"{USER_ID}/67/inputs.csv"
+ )
+ asset_list = pd.DataFrame(asset_list)
+
+ # From the asset list, we deduce how many properties we need
+ # Need to figure out the sizes
+ archetype_1_sample_size = 13
+ archetype_2_sample_size = 30
+ archetype_3_sample_size = 15
+ archetype_4_sample_size = 9
+
+ # We take the sample and we'll keep the uprns static
+ archetype_1_sample = asset_list[
+ asset_list["archetype"] == "Archetype 1"
+ ].sample(archetype_1_sample_size)["uprn"].to_list()
+
+ archetype_2_sample = asset_list[
+ asset_list["archetype"] == "Archetype 2"
+ ].sample(archetype_2_sample_size)["uprn"].to_list()
+
+ archetype_3_sample = asset_list[
+ asset_list["archetype"] == "Archetype 3"
+ ].sample(archetype_3_sample_size)["uprn"].to_list()
+
+ archetype_4_sample = asset_list[
+ asset_list["archetype"] == "Archetype 4"
+ ].sample(archetype_4_sample_size)["uprn"].to_list()
+
+
+def scenario_2():
+ # Connect to database
+ session = sessionmaker(bind=db_engine)()
+
+ ########################################################################
+ # Get the data we need
+ ########################################################################
+
+ portfolio_id = PORTFOLIO_ID_2
+
+ # Get the asset list
+ asset_list = read_csv_from_s3(
+ "retrofit-plan-inputs-dev", f"{USER_ID}/67/inputs.csv"
+ )
+ asset_list = pd.DataFrame(asset_list)
+
+ sample_uprns = archetype_1_sample + archetype_2_sample + archetype_3_sample + archetype_4_sample
+
+ # Filter on sample uprns
+ asset_list = asset_list[asset_list["uprn"].astype(str).isin(sample_uprns)]
+
+ # Get the properties for the portfolio
+ properties = get_properties_with_default_recommendations(session, portfolio_id)
+ properties_df = pd.DataFrame(properties)
+ properties_df = properties_df[properties_df["uprn"].astype(str).isin(sample_uprns)]
+
+ # We now pull the data for the property details
+ property_details = get_property_details_by_portfolio_id(session, portfolio_id)
+ property_details_df = pd.DataFrame(property_details)
+ property_details_df = property_details_df[property_details_df["property_id"].isin(properties_df["id"].values)]
+ # We estimate bills based on the adjusted_energy_consumption
+ property_details_df["energy_bill"] = property_details_df["adjusted_energy_consumption"].apply(
+ lambda x: AnnualBillSavings.calculate_annual_bill(x)
+ )
+ # Merge on uprn
+ property_details_df = property_details_df.merge(
+ properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
+ on="property_id"
+ )
+
+ plans = get_plan_by_portfolio_id(session, portfolio_id)
+ plans_df = pd.DataFrame(plans)
+
+ # Unnest the recommendations. Each recommendation is a list of dictionaries
+ recommendations_exploded = properties_df["recommendations"].explode().tolist()
+ recommendations_df = pd.DataFrame([r for r in recommendations_exploded if not pd.isnull(r)])
+ # Add uprn on
+ recommendations_df = recommendations_df.merge(
+ properties_df[["uprn", "id"]].rename(columns={"id": "property_id"}),
+ how="left",
+ on="property_id"
+ )
+
+ recommendations_summary = create_recommendations_summary(
+ recommendations_df,
+ properties_df,
+ property_details_df,
+ SAP_TARGET_1
+ )
+
+ # Calculate % changes of energ, co2 and abs
+ recommendations_summary["carbon_percent_change"] = (
+ recommendations_summary["total_carbon"] / recommendations_summary["current_co2"]
+ )
+
+ recommendations_summary["energy_percent_change"] = (
+ recommendations_summary["adjusted_heat_demand"] / recommendations_summary["current_energy"]
+ )
+
+ recommendations_summary["bills_percent_change"] = (
+ recommendations_summary["total_bill_savings"] / recommendations_summary["current_energy_bill"]
+ )
+
+ ########################
+ # Overview
+ ########################
+ overview_totals = recommendations_summary.sum()
+ overview_means = recommendations_summary.mean()
+
+ ########################
+ # Measures
+ ########################
+ measures_count = recommendations_df.groupby("type")["id"].count().reset_index()
+ wall_insulation_measures = measures_count[
+ measures_count["type"].isin(["cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation"])
+ ]["id"].sum()
+ ventilation_measures = measures_count[
+ measures_count["type"].isin(["mechanical_ventilation"])
+ ]["id"].sum()
+ roof_insulation_measures = measures_count[
+ measures_count["type"].isin(["loft_insulation", "flat_roof_insulation"])
+ ]["id"].sum()
+ floor_insulation_measures = measures_count[
+ measures_count["type"].isin(["solid_floor_insulation", "suspended_floor_insulation"])
+ ]["id"].sum()
+ windows = measures_count[
+ measures_count["type"].isin(["windows_glazing"])
+ ]["id"].sum()
+ heating = measures_count[
+ measures_count["type"].isin(["heating"])
+ ]["id"].sum()
+ heating_controls = measures_count[
+ measures_count["type"].isin(["heating_control"])
+ ]["id"].sum()
+ solar = measures_count[
+ measures_count["type"].isin(["solar_pv"])
+ ]["id"].sum()
+ other = measures_count[
+ ~measures_count["type"].isin([
+ "cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation",
+ "loft_insulation", "flat_roof_insulation", "solid_floor_insulation",
+ "suspended_floor_insulation", "windows_glazing", "heating", "heating_control", "solar_pv",
+ "mechanical_ventilation"
+ ])
+ ]["id"].sum()
+
+ z = recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_3_sample)]
+
+ recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_3_sample)]["type"].value_counts()
+
+ # Summary information by each archetype
+ ########################
+ # Archetype 1
+ ########################
+ archetype_1 = asset_list[asset_list["archetype"] == "Archetype 1"]
+ recommendations_arch_1_summary = recommendations_summary[
+ recommendations_summary["uprn"].astype(str).isin(archetype_1["uprn"].values)
+ ]
+
+ arch_1_property_details = property_details_df[
+ property_details_df["uprn"].astype(str).isin(archetype_1["uprn"].values)
+ ]
+ arch_1_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
+
+ # Take the mean, median and maximum of each value
+ arch_1_recommendation_min = recommendations_arch_1_summary.min()
+ arch_1_recommendation_max = recommendations_arch_1_summary.max()
+ arch_1_recommendation_means = recommendations_arch_1_summary.mean()
+
+ arch_1_totals = recommendations_arch_1_summary.sum()
+
+ annual_total_co2 = recommendations_arch_1_summary["total_carbon"].sum()
+ annual_total_bills = recommendations_arch_1_summary["total_bill_savings"].sum()
+ annual_total_energy_savings = recommendations_arch_1_summary["adjusted_heat_demand"].sum()
+ archetype_measures = \
+ recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_1["uprn"].values)].groupby("type")[
+ "id"].count().reset_index()
+
+ cost_text = (f"{round(arch_1_recommendation_means['total_cost'], 2)}: "
+ f"{arch_1_recommendation_min['total_cost']} - {arch_1_recommendation_max['total_cost']}")
+
+ sap_text = (f"{round(arch_1_recommendation_means['total_sap_points'], 2)}: "
+ f"{arch_1_recommendation_min['total_sap_points']} - {arch_1_recommendation_max['total_sap_points']}")
+
+ energy_text = (f"{round(arch_1_recommendation_means['adjusted_heat_demand'], 2)}: "
+ f"{arch_1_recommendation_min['adjusted_heat_demand']} - "
+ f"{arch_1_recommendation_max['adjusted_heat_demand']}")
+
+ energy_percent_text = (f"{round(arch_1_recommendation_means['energy_percent_change'], 2)}: "
+ f"{arch_1_recommendation_min['energy_percent_change']} - "
+ f"{arch_1_recommendation_max['energy_percent_change']}")
+
+ carbon_text = (f"{round(arch_1_recommendation_means['total_carbon'], 2)}: "
+ f"{arch_1_recommendation_min['total_carbon']} - {arch_1_recommendation_max['total_carbon']}")
+
+ carbon_percent_text = (f"{round(arch_1_recommendation_means['carbon_percent_change'], 2)}: "
+ f"{arch_1_recommendation_min['carbon_percent_change']} - "
+ f"{arch_1_recommendation_max['carbon_percent_change']}")
+
+ bill_text = (f"{round(arch_1_recommendation_means['total_bill_savings'], 2)}: "
+ f"{arch_1_recommendation_min['total_bill_savings']} - "
+ f"{arch_1_recommendation_max['total_bill_savings']}")
+
+ bill_percent_text = (f"{round(arch_1_recommendation_means['bills_percent_change'], 2)}: "
+ f"{arch_1_recommendation_min['bills_percent_change']} - "
+ f"{arch_1_recommendation_max['bills_percent_change']}")
+
+ ########################
+ # Archetype 2
+ ########################
+ archetype_2 = asset_list[asset_list["archetype"] == "Archetype 2"]
+ recommendations_arch_2_summary = recommendations_summary[
+ recommendations_summary["uprn"].astype(str).isin(archetype_2["uprn"].values)
+ ]
+
+ arch_2_property_details = property_details_df[
+ property_details_df["uprn"].astype(str).isin(archetype_2["uprn"].values)
+ ]
+ arch_2_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
+
+ # Take the mean, median and maximum of each value
+ arch_2_recommendation_min = recommendations_arch_2_summary.min()
+ arch_2_recommendation_max = recommendations_arch_2_summary.max()
+ arch_2_recommendation_means = recommendations_arch_2_summary.mean().round(2)
+
+ total_cost = recommendations_arch_2_summary["total_cost"].sum()
+ annual_total_co2 = recommendations_arch_2_summary["total_carbon"].sum()
+ annual_total_bills = recommendations_arch_2_summary["total_bill_savings"].sum()
+ annual_total_energy_savings = recommendations_arch_2_summary["adjusted_heat_demand"].sum()
+ archetype_measures = \
+ recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_2["uprn"].values)].groupby("type")[
+ "id"].count().reset_index()
+
+ cost_text = (f"{round(arch_2_recommendation_means['total_cost'], 2)}: "
+ f"{arch_2_recommendation_min['total_cost']} - {arch_2_recommendation_max['total_cost']}")
+
+ sap_text = (f"{round(arch_2_recommendation_means['total_sap_points'], 2)}: "
+ f"{arch_2_recommendation_min['total_sap_points']} - {arch_2_recommendation_max['total_sap_points']}")
+
+ energy_text = (f"{round(arch_2_recommendation_means['adjusted_heat_demand'], 2)}: "
+ f"{arch_2_recommendation_min['adjusted_heat_demand']} - "
+ f"{arch_2_recommendation_max['adjusted_heat_demand']}")
+
+ energy_percent_text = (f"{round(arch_2_recommendation_means['energy_percent_change'], 2)}: "
+ f"{arch_2_recommendation_min['energy_percent_change']} - "
+ f"{arch_2_recommendation_max['energy_percent_change']}")
+
+ carbon_text = (f"{round(arch_2_recommendation_means['total_carbon'], 2)}: "
+ f"{arch_2_recommendation_min['total_carbon']} - {arch_2_recommendation_max['total_carbon']}")
+
+ carbon_percent_text = (f"{round(arch_2_recommendation_means['carbon_percent_change'], 2)}: "
+ f"{arch_2_recommendation_min['carbon_percent_change']} - "
+ f"{arch_2_recommendation_max['carbon_percent_change']}")
+
+ bill_text = (f"{round(arch_2_recommendation_means['total_bill_savings'], 2)}: "
+ f"{arch_2_recommendation_min['total_bill_savings']} - "
+ f"{arch_2_recommendation_max['total_bill_savings']}")
+
+ bill_percent_text = (f"{round(arch_2_recommendation_means['bills_percent_change'], 2)}: "
+ f"{arch_2_recommendation_min['bills_percent_change']} - "
+ f"{arch_2_recommendation_max['bills_percent_change']}")
+
+ ########################
+ # Archetype 3
+ ########################
+ archetype_3 = asset_list[asset_list["archetype"] == "Archetype 3"]
+ recommendations_arch_3_summary = recommendations_summary[
+ recommendations_summary["uprn"].astype(str).isin(archetype_3["uprn"].values)
+ ]
+
+ arch_3_property_details = property_details_df[
+ property_details_df["uprn"].astype(str).isin(archetype_3["uprn"].values)
+ ]
+ arch_3_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
+
+ # Take the mean, median and maximum of each value
+ arch_3_recommendation_min = recommendations_arch_3_summary.min()
+ arch_3_recommendation_max = recommendations_arch_3_summary.max()
+ arch_3_recommendation_means = recommendations_arch_3_summary.mean()
+
+ total_cost = recommendations_arch_3_summary["total_cost"].sum()
+ annual_total_co2 = recommendations_arch_3_summary["total_carbon"].sum()
+ annual_total_bills = recommendations_arch_3_summary["total_bill_savings"].sum()
+ annual_total_energy_savings = recommendations_arch_3_summary["adjusted_heat_demand"].sum()
+ archetype_measures = \
+ recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_3["uprn"].values)].groupby("type")[
+ "id"].count().reset_index()
+
+ cost_text = (f"{round(arch_3_recommendation_means['total_cost'], 2)}: "
+ f"{arch_3_recommendation_min['total_cost']} - {arch_3_recommendation_max['total_cost']}")
+
+ sap_text = (f"{round(arch_3_recommendation_means['total_sap_points'], 2)}: "
+ f"{arch_3_recommendation_min['total_sap_points']} - {arch_3_recommendation_max['total_sap_points']}")
+
+ energy_text = (f"{round(arch_3_recommendation_means['adjusted_heat_demand'], 2)}: "
+ f"{arch_3_recommendation_min['adjusted_heat_demand']} - "
+ f"{arch_3_recommendation_max['adjusted_heat_demand']}")
+
+ energy_percent_text = (f"{round(arch_3_recommendation_means['energy_percent_change'], 2)}: "
+ f"{arch_3_recommendation_min['energy_percent_change']} - "
+ f"{arch_3_recommendation_max['energy_percent_change']}")
+
+ carbon_text = (f"{round(arch_3_recommendation_means['total_carbon'], 2)}: "
+ f"{arch_3_recommendation_min['total_carbon']} - {arch_3_recommendation_max['total_carbon']}")
+
+ carbon_percent_text = (f"{round(arch_3_recommendation_means['carbon_percent_change'], 2)}: "
+ f"{arch_3_recommendation_min['carbon_percent_change']} - "
+ f"{arch_3_recommendation_max['carbon_percent_change']}")
+
+ bill_text = (f"{round(arch_3_recommendation_means['total_bill_savings'], 2)}: "
+ f"{arch_3_recommendation_min['total_bill_savings']} - "
+ f"{arch_3_recommendation_max['total_bill_savings']}")
+
+ bill_percent_text = (f"{round(arch_3_recommendation_means['bills_percent_change'], 2)}: "
+ f"{arch_3_recommendation_min['bills_percent_change']} - "
+ f"{arch_3_recommendation_max['bills_percent_change']}")
+
+ ########################
+ # Archetype 4
+ ########################
+ archetype_4 = asset_list[asset_list["archetype"] == "Archetype 4"]
+ recommendations_arch_4_summary = recommendations_summary[
+ recommendations_summary["uprn"].astype(str).isin(archetype_4["uprn"].values)
+ ]
+
+ arch_4_property_details = property_details_df[
+ property_details_df["uprn"].astype(str).isin(archetype_4["uprn"].values)
+ ]
+ arch_4_property_details["co2_emissions"].sum() / property_details_df["co2_emissions"].sum()
+
+ # Take the mean, median and maximum of each value
+ arch_4_recommendation_min = recommendations_arch_4_summary.min()
+ arch_4_recommendation_max = recommendations_arch_4_summary.max()
+ arch_4_recommendation_means = recommendations_arch_4_summary.mean()
+
+ total_cost = recommendations_arch_4_summary["total_cost"].sum()
+ annual_total_co2 = recommendations_arch_4_summary["total_carbon"].sum()
+ annual_total_bills = recommendations_arch_4_summary["total_bill_savings"].sum()
+ annual_total_energy_savings = recommendations_arch_4_summary["adjusted_heat_demand"].sum()
+ archetype_measures = \
+ recommendations_df[recommendations_df["uprn"].astype(str).isin(archetype_4["uprn"].values)].groupby("type")[
+ "id"].count().reset_index()
+
+ cost_text = (f"{round(arch_4_recommendation_means['total_cost'], 2)}: "
+ f"{arch_4_recommendation_min['total_cost']} - {arch_4_recommendation_max['total_cost']}")
+
+ sap_text = (f"{round(arch_4_recommendation_means['total_sap_points'], 2)}: "
+ f"{arch_4_recommendation_min['total_sap_points']} - {arch_4_recommendation_max['total_sap_points']}")
+
+ energy_text = (f"{round(arch_4_recommendation_means['adjusted_heat_demand'], 2)}: "
+ f"{arch_4_recommendation_min['adjusted_heat_demand']} - "
+ f"{arch_4_recommendation_max['adjusted_heat_demand']}")
+
+ energy_percent_text = (f"{round(arch_4_recommendation_means['energy_percent_change'], 2)}: "
+ f"{arch_4_recommendation_min['energy_percent_change']} - "
+ f"{arch_4_recommendation_max['energy_percent_change']}")
+
+ carbon_text = (f"{round(arch_4_recommendation_means['total_carbon'], 2)}: "
+ f"{arch_4_recommendation_min['total_carbon']} - {arch_4_recommendation_max['total_carbon']}")
+
+ carbon_percent_text = (f"{round(arch_4_recommendation_means['carbon_percent_change'], 2)}: "
+ f"{arch_4_recommendation_min['carbon_percent_change']} - "
+ f"{arch_4_recommendation_max['carbon_percent_change']}")
+
+ bill_text = (f"{round(arch_4_recommendation_means['total_bill_savings'], 2)}: "
+ f"{arch_4_recommendation_min['total_bill_savings']} - "
+ f"{arch_4_recommendation_max['total_bill_savings']}")
+
+ bill_percent_text = (f"{round(arch_4_recommendation_means['bills_percent_change'], 2)}: "
+ f"{arch_4_recommendation_min['bills_percent_change']} - "
+ f"{arch_4_recommendation_max['bills_percent_change']}")
diff --git a/etl/customers/goldman/asset_list.py b/etl/customers/goldman/asset_list.py
new file mode 100644
index 00000000..afe3c64c
--- /dev/null
+++ b/etl/customers/goldman/asset_list.py
@@ -0,0 +1,63 @@
+import pandas as pd
+from utils.s3 import read_excel_from_s3
+from utils.s3 import save_csv_to_s3
+
+PORTFOLIO_ID = 75
+USER_ID = 8
+
+
+def app():
+ asset_list = [
+ {
+ "address": "19 Emily Gardens",
+ "postcode": "B16 0ED",
+ },
+ {
+ "address": "Flat 6 41 Bradford Street",
+ "postcode": "B5 6HX",
+ },
+ {
+ "address": "197 FIELD LANE",
+ "postcode": "B32 4HL",
+ },
+ {
+ "address": "FLAT 4 108 SUMMER ROAD",
+ "postcode": "B23 6DY",
+ },
+ {
+ "address": "1, St. Benedicts Road",
+ "postcode": "B10 9DP",
+ },
+ {
+ "address": "29 COOKSEY LANE",
+ "postcode": "B44 9QL",
+ },
+ {
+ "address": "40 TRITTIFORD ROAD",
+ "postcode": "B13 0HG",
+ }
+ ]
+
+ asset_list = pd.DataFrame(asset_list)
+
+ # Store the asset list in s3
+ filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
+ save_csv_to_s3(
+ dataframe=asset_list,
+ bucket_name="retrofit-plan-inputs-dev",
+ file_name=filename
+ )
+
+ # EPC C portoflio
+ body = {
+ "portfolio_id": str(PORTFOLIO_ID),
+ "housing_type": "Private",
+ "goal": "Increase EPC",
+ "goal_value": "B",
+ "trigger_file_path": filename,
+ "already_installed_file_path": "",
+ "patches_file_path": "",
+ "non_invasive_recommendations_file_path": "",
+ "budget": None,
+ }
+ print(body)
diff --git a/etl/customers/goldman/epc_f_g_properties.py b/etl/customers/goldman/epc_f_g_properties.py
new file mode 100644
index 00000000..28197126
--- /dev/null
+++ b/etl/customers/goldman/epc_f_g_properties.py
@@ -0,0 +1,25 @@
+import pandas as pd
+
+
+def app():
+ """
+ Pulling the list of EPC G & F properties in Birmingham for Goldman Sachs
+ """
+ epc_data = pd.read_csv(
+ "local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv",
+ low_memory=False
+ )
+
+ epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
+ epc_data["UPRN"] = epc_data["UPRN"].astype(int).astype(str)
+
+ # Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this
+ epc_data["LODGEMENT_DATETIME"] = pd.to_datetime(epc_data["LODGEMENT_DATETIME"], format='mixed')
+
+ epc_data = epc_data.sort_values("LODGEMENT_DATETIME", ascending=False).drop_duplicates("UPRN")
+
+ # Get G & F properties
+ epc_data = epc_data[epc_data["CURRENT_ENERGY_RATING"].isin(["G", "F"])]
+
+ # Save as an excel
+ epc_data.to_excel("Birmingham EPC F & G Properties.xlsx", index=False)
diff --git a/etl/customers/goldman/property_ownership.py b/etl/customers/goldman/property_ownership.py
new file mode 100644
index 00000000..24922f68
--- /dev/null
+++ b/etl/customers/goldman/property_ownership.py
@@ -0,0 +1,407 @@
+import re
+import pandas as pd
+from tqdm import tqdm
+import Levenshtein
+from backend.SearchEpc import SearchEpc
+
+# Average value of a property in the midlands in 2024 was £238,000. Since these are EPC F & G properties, we assume
+# £207,000 since they trade at a discount. This is based on the rightmove study where moving from an EPC F/G -> C has a
+# +15% impact on valuation and D -> C has a +3% impact on valuation.
+# The mode EPC rating is D, so we associate the £238k valuation with an EPC D property
+# Therefore value_of_F * 1.15 = value_of_D * 1.03
+# Therefore value_of_F = value_of_D * 1.03/1.15 = 238k * (1.03/1.15) = 213165
+PROPERTY_VALUE_ESTIMATE = 213_165
+
+
+def aggregate_matches(matching_lookup, company_ownership, properties):
+ df = matching_lookup.merge(
+ company_ownership, how="left", on="Title Number"
+ ).merge(
+ properties[["UPRN", "LOCAL_AUTHORITY_LABEL"]], how="left", on="UPRN"
+ )
+ counts = (
+ df.groupby(["Company Registration No. (1)", "Proprietor Name (1)", "LOCAL_AUTHORITY_LABEL"])["UPRN"]
+ .count()
+ .reset_index(name="number_of_properties")
+ )
+ counts = counts.sort_values("number_of_properties", ascending=False)
+
+ pivot_counts = counts.pivot_table(
+ index=["Company Registration No. (1)", "Proprietor Name (1)"], # Rows: companies and proprietors
+ columns="LOCAL_AUTHORITY_LABEL", # Columns: each local authority
+ values="number_of_properties", # The counts of properties
+ fill_value=0 # Fill missing values with 0 (where there are no properties owned)
+ ).reset_index()
+
+ total_counts = (
+ df.groupby(["Company Registration No. (1)", "Proprietor Name (1)"])["UPRN"]
+ .count()
+ .reset_index(name="total_number_of_properties")
+ )
+
+ pivot_counts = pivot_counts.merge(
+ total_counts, how="left", on=["Company Registration No. (1)", "Proprietor Name (1)"]
+ )
+
+ pivot_counts = pivot_counts.sort_values("total_number_of_properties", ascending=False)
+
+ pivot_counts["approx_value"] = PROPERTY_VALUE_ESTIMATE * pivot_counts["total_number_of_properties"]
+ pivot_counts["cumulative_value"] = pivot_counts["approx_value"].cumsum()
+
+ return pivot_counts
+
+
+def find_f_g_properties(paths):
+ data = []
+ for path in tqdm(paths):
+ epc_data = pd.read_csv(path, low_memory=False)
+
+ epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
+ epc_data["UPRN"] = epc_data["UPRN"].astype(int).astype(str)
+
+ # Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this
+ epc_data["LODGEMENT_DATETIME"] = pd.to_datetime(epc_data["LODGEMENT_DATETIME"], format='mixed')
+
+ epc_data = epc_data.sort_values("LODGEMENT_DATETIME", ascending=False).drop_duplicates("UPRN")
+
+ # Get G & F properties
+ epc_data = epc_data[epc_data["CURRENT_ENERGY_RATING"].isin(["G", "F"])]
+ data.append(epc_data)
+
+ data = pd.concat(data)
+
+ # Save as an excel
+ data.to_excel("EPC F & G Properties.xlsx", index=False)
+
+
+def remove_text_in_brackets(address: str) -> str:
+ """
+ Removes any text within parentheses, including the parentheses themselves.
+
+ Parameters:
+ - address (str): The address string to clean.
+
+ Returns:
+ - str: The cleaned address with text in parentheses removed.
+ """
+ # Regex to find and remove content in parentheses
+ cleaned_address = re.sub(r'\s*\([^)]*\)', '', address)
+ return cleaned_address
+
+
+def extract_numeric_part(house_number: str) -> str:
+ """
+ Extracts only the numeric part from a house number that may contain letters.
+
+ Parameters:
+ - house_number (str): The house number string possibly containing letters.
+
+ Returns:
+ - str: The numeric part of the house number.
+ """
+ # Use regular expression to replace all non-digit characters with nothing
+ numeric_part = re.sub(r'\D', '', house_number)
+ return numeric_part
+
+
+def levenstein_match(matching_string, df, address_col):
+ match_to = df[address_col].tolist()
+ # Strip out punctuation and spaces
+ match_to = [re.sub(r'[^\w\s]', '', x) for x in match_to]
+ match_to = [x.replace(" ", "") for x in match_to]
+
+ # Perform matching between full key and match_to
+ distances = [Levenshtein.distance(matching_string, s) for s in match_to]
+ best_match_index = distances.index(min(distances))
+ # We might want to consider a threshold for the distance, however for the momeny,
+ # we don't consider this for the moment
+ df = df.iloc[best_match_index:best_match_index + 1]
+
+ return df
+
+
+def extract_range_from_house_number(house_number_range: str):
+ """
+ Detects if the house number includes a numeric range (formatted as 'x-y') and extracts all values within this range.
+ Non-numeric strings containing hyphens are ignored.
+
+ Parameters:
+ - house_number_range (str): The house number string that might contain a range.
+
+ Returns:
+ - list of str: A list of all numbers within the range if it is a range; otherwise, returns None.
+ """
+
+ if not house_number_range:
+ return None
+
+ if '-' in house_number_range:
+ parts = house_number_range.split('-')
+ if len(parts) == 2 and parts[0].isdigit() and parts[1].isdigit():
+ # Both parts are numeric, so it's a valid range
+ start, end = map(int, parts) # Convert parts to integers
+ return [str(x) for x in range(start, end + 1)]
+ else:
+ # Not a valid numeric range
+ return None
+ else:
+ # No hyphen present or not a range
+ return None
+
+
+def is_in_range(row, house_no):
+ """ Check if the house number is within the range provided in the row. """
+ if row and any(house_no == num for num in row):
+ return True
+ return False
+
+
+def remove_duplicate_matches(matching_lookup, properties, company_ownership):
+ duplicated_titles = matching_lookup[matching_lookup["Title Number"].duplicated()]["Title Number"].unique()
+
+ to_drop = []
+ for dupe_title in duplicated_titles:
+ dupe_data = matching_lookup[matching_lookup["Title Number"] == dupe_title].copy()
+ matched_addresses = dupe_data.merge(
+ properties[["UPRN", "ADDRESS"]].rename(columns={"ADDRESS": "epc_address"}),
+ how="left", on="UPRN"
+ ).merge(
+ company_ownership[["Title Number", "Property Address"]],
+ how="left", on="Title Number"
+ )
+ # We perform levenstein to get the best match
+ best_match = levenstein_match(
+ matching_string=matched_addresses["Property Address"].values[0],
+ df=matched_addresses,
+ address_col="epc_address"
+ )
+ matches_to_drop = matched_addresses[
+ ~matched_addresses["UPRN"].isin(best_match["UPRN"].values)
+ ]
+
+ to_drop.append(
+ matches_to_drop[["UPRN", "Title Number"]].copy()
+ )
+
+ to_drop = pd.concat(to_drop)
+
+ if not to_drop.empty:
+ merged = pd.merge(matching_lookup, to_drop, on=['UPRN', 'Title Number'], how='left', indicator=True)
+ merged[merged['_merge'] == 'left_only'].drop(columns=['_merge'])
+
+ return merged
+
+ return matching_lookup
+
+
+def app():
+ """
+ This script is for scoping property ownership for EPC F & G rated properties in Birmingam, for Goldman Sachs
+ """
+ # paths = [
+ # "local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E08000031-Wolverhampton/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E08000026-Coventry/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E06000016-Leicester/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E06000015-Derby/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E06000021-Stoke-on-Trent/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E06000018-Nottingham/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E07000154-Northampton/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E06000061-North-Northamptonshire/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E06000062-West-Northamptonshire/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E07000152-East-Northamptonshire/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E07000155-South-Northamptonshire/certificates.csv",
+ # #
+ # "local_data/all-domestic-certificates/domestic-E08000027-Dudley/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E08000029-Solihull/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E07000234-Bromsgrove/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E08000030-Walsall/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E08000028-Sandwell/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E06000019-Herefordshire-County-of/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E06000020-Telford-and-Wrekin/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E07000218-North-Warwickshire/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E07000222-Warwick/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E07000237-Worcester/certificates.csv",
+ # # East midlands
+ # "local_data/all-domestic-certificates/domestic-E07000035-Derbyshire-Dales/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E07000038-North-East-Derbyshire/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E07000039-South-Derbyshire/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E06000012-North-East-Lincolnshire/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E06000013-North-Lincolnshire/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E07000138-Lincoln/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E07000134-North-West-Leicestershire/certificates.csv",
+ # "local_data/all-domestic-certificates/domestic-E06000017-Rutland/certificates.csv",
+ # ]
+ # paths = list(set(paths))
+ # find_f_g_properties(paths)
+
+ properties = pd.read_excel("EPC F & G Properties.xlsx")
+ company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_04.csv")
+ company_ownership["is_overseas"] = False
+ overseas_company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/OCOD_FULL_2024_04 2.csv")
+ overseas_company_ownership["is_overseas"] = True
+
+ company_ownership = pd.concat([company_ownership, overseas_company_ownership])
+
+ # FIlter on relevant postcodes
+ company_ownership = company_ownership[
+ company_ownership["Postcode"].str.lower().isin(properties["POSTCODE"].str.lower().unique())]
+
+ # Now we filter properties the other way around
+ properties = properties[properties["POSTCODE"].str.lower().isin(company_ownership["Postcode"].str.lower().unique())]
+ # We end up with 7.4k entires on a postcode match, however we need to now do a direct address match
+ # Take just private rentals
+ properties = properties[
+ properties["TENURE"].isin(["rental (private)", "Rented (private)", "owner-occupied", "Owner-occupied"])
+ ]
+
+ # Remove entries where the address begins with the term "land adjoining", or other records that don't reference the
+ # the property itself
+ starting_terms = [
+ "land adjoining", "land on the", "land to the rear of", "land and buildings on the",
+ "garage adjoining", "car park adjoining", "the land adjoining", "land and buildings adjoining",
+ "all royal mines"
+ ]
+ for starting_term in starting_terms:
+ company_ownership = company_ownership[
+ ~company_ownership["Property Address"].str.lower().str.startswith(starting_term)
+ ]
+
+ freehold_matching_lookup = [] # 634
+ leasehold_matching_lookup = [] # 86
+ shared_leasehold_match = []
+ shared_freehold_match = []
+ for _, address in tqdm(properties.iterrows(), total=len(properties)):
+ match_type = "exact"
+ filtered = company_ownership[
+ company_ownership["Postcode"].str.lower() == address["POSTCODE"].lower()
+ ].copy()
+
+ # Remove postcode and remove trailing commas
+ filtered["house_number"] = (
+ filtered["Property Address"]
+ .apply(remove_text_in_brackets)
+ .apply(SearchEpc.get_house_number)
+ .str.lower()
+ .str.replace(",", "")
+ )
+ house_no = SearchEpc.get_house_number(address["ADDRESS1"])
+ if house_no is not None:
+ house_no = house_no.replace(",", "")
+
+ if house_no is None:
+ # It's hard for us to get a reliable match
+ # filtered = filtered[filtered["Property Address"].str.contains(address["ADDRESS1"])]
+ # if filtered.shape[0] > 1:
+ # raise Exception("No valid - maybe we should do levenstein?")
+ continue
+
+ else:
+
+ if house_no not in filtered["house_number"].values:
+ # If this happens, we check house_number for a x-y range of addresses
+ filtered["house_number_range"] = filtered["house_number"].apply(extract_range_from_house_number)
+ # If we have found a house number range, we check if the house number is in the range and if not,
+ # we drop the row
+ filtered['is_in_range'] = filtered['house_number_range'].apply(lambda x: is_in_range(x, house_no))
+
+ if filtered['is_in_range'].any():
+ # If house_no is found in any range, keep only rows where it is in range
+ filtered = filtered[filtered['is_in_range']]
+ else:
+ # If house_no is not found in any range, filter out rows where 'house_number_range' is not None
+ filtered = filtered[filtered['house_number_range'].isnull()]
+
+ # Strip out letters from house_no and house_number
+ house_no = extract_numeric_part(house_no)
+ filtered["house_number"] = filtered["house_number"].astype(str).apply(extract_numeric_part)
+ match_type = "approximate"
+
+ filtered = filtered[filtered["house_number"] == house_no]
+
+ if filtered.empty:
+ continue
+
+ filtered_freehold = filtered[filtered["Tenure"] == "Freehold"]
+ filtered_leasehold = filtered[filtered["Tenure"] == "Leasehold"]
+
+ if filtered_freehold.shape[0] > 1:
+ matched = filtered_leasehold[["Title Number"]].copy()
+ matched.insert(0, "UPRN", address["UPRN"])
+ shared_freehold_match.append(matched)
+ elif not filtered_freehold.empty:
+ freehold_matching_lookup.append(
+ {
+ "UPRN": address["UPRN"],
+ "Title Number": filtered_freehold["Title Number"].values[0],
+ "match_type": match_type,
+ }
+ )
+
+ if filtered_leasehold.shape[0] > 1:
+ matched = filtered_leasehold[["Title Number"]].copy()
+ matched.insert(0, "UPRN", address["UPRN"])
+ shared_leasehold_match.append(matched)
+ elif not filtered_leasehold.empty:
+ leasehold_matching_lookup.append(
+ {
+ "UPRN": address["UPRN"],
+ "Title Number": filtered_leasehold["Title Number"].values[0],
+ "match_type": match_type,
+ }
+ )
+
+ freehold_matching_lookup = pd.DataFrame(freehold_matching_lookup)
+ leasehold_matching_lookup = pd.DataFrame(leasehold_matching_lookup)
+ shared_leasehold_match = pd.concat(shared_leasehold_match)
+
+ # The approximate matches aren't very good
+ freehold_matching_lookup = freehold_matching_lookup[freehold_matching_lookup["match_type"] == "exact"]
+ leasehold_matching_lookup = leasehold_matching_lookup[leasehold_matching_lookup["match_type"] == "exact"]
+
+ # There are some cases where we have duplicates
+ freehold_matching_lookup = remove_duplicate_matches(freehold_matching_lookup, properties, company_ownership)
+ leasehold_matching_lookup = remove_duplicate_matches(leasehold_matching_lookup, properties, company_ownership)
+
+ matched_addresses = freehold_matching_lookup.merge(
+ properties[["UPRN", "ADDRESS"]].rename(columns={"ADDRESS": "epc_address"}),
+ how="left", on="UPRN"
+ ).merge(
+ company_ownership[["Title Number", "Property Address"]],
+ how="left", on="Title Number"
+ )
+
+ # shared_freehold_match = pd.DataFrame(shared_freehold_match)
+ # Strore these files
+ freehold_matching_lookup.to_excel("freehold_matching_lookup.xlsx")
+ leasehold_matching_lookup.to_excel("leasehold_matching_lookup.xlsx")
+ shared_leasehold_match.to_excel("shared_leasehold_match.xlsx")
+ # shared_freehold_match.to_excel("shared_freehold_match.xlsx")
+
+ freehold_aggregate = aggregate_matches(freehold_matching_lookup, company_ownership, properties)
+ leasehold_aggregate = aggregate_matches(leasehold_matching_lookup, company_ownership, properties)
+
+ combined_aggregate = aggregate_matches(
+ pd.concat([freehold_matching_lookup, leasehold_matching_lookup]), company_ownership, properties
+ )
+
+ df = pd.concat([freehold_matching_lookup, leasehold_matching_lookup])
+
+ investment_20m = combined_aggregate[combined_aggregate["cumulative_value"] <= 20_500_000]
+ investment_50m = combined_aggregate[combined_aggregate["cumulative_value"] <= 51_000_000]
+
+ properties["WALLS_DESCRIPTION"].value_counts(normalize=True)
+
+
+def company_aggregation():
+ company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_04.csv")
+ aggregation = (
+ company_ownership
+ .groupby(["Proprietor Name (1)", "Company Registration No. (1)"])
+ ["Property Address"]
+ .count()
+ .reset_index(name="Number of Properties")
+ )
+ aggregation = aggregation.sort_values("Number of Properties", ascending=False)
+
+ aggregation.to_excel("Company ownership aggregation.xlsx")
diff --git a/etl/customers/guiness/route_march.py b/etl/customers/guiness/route_march.py
new file mode 100644
index 00000000..28f350d3
--- /dev/null
+++ b/etl/customers/guiness/route_march.py
@@ -0,0 +1,98 @@
+import os
+
+import pandas as pd
+from tqdm import tqdm
+
+from dotenv import load_dotenv
+from utils.s3 import read_excel_from_s3
+from backend.SearchEpc import SearchEpc
+from epc_api.client import EpcClient
+from utils.s3 import save_csv_to_s3
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def app():
+ """
+ This app is satisying an adhoc request to retrieve EPC data for properties owned by Guiness, to help plan the
+ route march
+
+ These properties were provided to us by Ecosurv
+ :return:
+ """
+ asset_list = read_excel_from_s3(
+ bucket_name="retrofit-datalake-dev",
+ file_key="customers/guiness/TGP CW Properties PV.xlsx",
+ header_row=0
+ )
+
+ epc_data = []
+ for _, guiness_property in tqdm(asset_list.iterrows(), total=len(asset_list)):
+
+ searcher = SearchEpc(
+ address1=str(guiness_property["Address"]),
+ postcode=guiness_property["POSTCODES"],
+ auth_token=EPC_AUTH_TOKEN,
+ os_api_key="",
+ property_type=None,
+ fast=True
+ )
+ # Force the skipping of estimating the EPC
+ searcher.ordnance_survey_client.property_type = None
+ searcher.ordnance_survey_client.built_form = None
+
+ searcher.find_property(skip_os=True)
+ if searcher.newest_epc is None:
+ continue
+
+ epc = {
+ "asset_list_address": guiness_property["Address"],
+ "asset_list_postcode": guiness_property["POSTCODES"],
+ **searcher.newest_epc.copy()
+ }
+
+ epc_data.append(epc)
+
+ epc_df = pd.DataFrame(epc_data)
+
+ # Retrieve just the data we need
+ epc_df = epc_df[
+ [
+ "asset_list_address",
+ "asset_list_postcode",
+ "uprn",
+ "property-type",
+ "built-form",
+ "inspection-date",
+ "current-energy-rating",
+ "current-energy-efficiency",
+ "roof-description",
+ "walls-description",
+ "transaction-type"
+ ]
+ ]
+
+ asset_list = asset_list.merge(
+ epc_df, how="left", left_on=["Address", "POSTCODES"], right_on=["asset_list_address", "asset_list_postcode"]
+ )
+
+ # De-dupe on the address and postcode, since 137 Badger Avenue was duplicated
+ asset_list = asset_list.drop_duplicates(subset=["Address", "POSTCODES"])
+ asset_list = asset_list.drop(columns=["asset_list_address", "asset_list_postcode"])
+
+ # Rename the columns
+ asset_list = asset_list.rename(columns={
+ "property-type": "Property Type",
+ "built-form": "Archetype",
+ "inspection-date": "Last EPC Inspection Date",
+ "current-energy-rating": "Last survey EPC Rating",
+ "current-energy-efficiency": "Last survey SAP Score",
+ "roof-description": "Roof Construction",
+ "walls-description": "Wall Construction",
+ "transaction-type": "Last EPC Reason"
+ })
+
+ # Store as an excel
+ filename = "Guiness EPC data.xlsx"
+ asset_list.to_excel(filename, index=False)
diff --git a/etl/customers/immo/pilot/asset_list.py b/etl/customers/immo/pilot/asset_list.py
new file mode 100644
index 00000000..6329a2be
--- /dev/null
+++ b/etl/customers/immo/pilot/asset_list.py
@@ -0,0 +1,157 @@
+import pandas as pd
+from utils.s3 import read_excel_from_s3
+from utils.s3 import save_csv_to_s3
+
+USER_ID = 8
+PORTFOLIO_ID = 70
+
+council_tax_bands = [
+ {'address': '8 Corporation Road', 'postcode': 'DY2 7PX', 'band': 'A'},
+ {'address': '21 Wells Road', 'postcode': 'DY5 3TB', 'band': 'A'},
+ {'address': '27 Milton Road', 'postcode': 'WV14 8HZ', 'band': 'A'},
+ {'address': '195 Ashenhurst Road', 'postcode': 'DY1 2JB', 'band': 'A'},
+ {'address': '53 Bromley', 'postcode': 'DY5 4PJ', 'band': 'A'},
+ {'address': '91 Osprey Drive', 'postcode': 'DY1 2JS', 'band': 'B'},
+ {'address': '47 Fairfield Road', 'postcode': 'DY8 5UJ', 'band': 'B'},
+ {'address': '150 Huntingtree Road', 'postcode': 'B63 4HP', 'band': 'C'},
+ {'address': '6 Beech Road', 'postcode': 'DY1 4BP', 'band': 'A'},
+ {'address': '5 Oaklands', 'postcode': 'B62 0JA', 'band': 'A'},
+]
+council_tax_bands = pd.DataFrame(council_tax_bands)
+
+# This is information we need to override on the EPC itself, for instance if a new survey has been conducted and
+# that has not reached the API
+# For 53 Bromley, the non-invasives found the walls to be partially filled
+patches = [
+ {
+ 'address': '6 Beech Road', 'postcode': 'DY1 4BP',
+ 'walls-description': 'Cavity wall, filled cavity',
+ 'walls-energy-eff': 'Good',
+ 'roof-description': 'Pitched, 12 mm loft insulation',
+ 'roof-energy-eff': 'Very Poor',
+ 'windows-description': 'Fully double glazed',
+ 'windows-energy-eff': 'Good',
+ 'mainheat-description': 'Room heaters, electric',
+ 'mainheat-energy-eff': 'Very Poor',
+ 'mainheatcont-description': 'Appliance thermostats',
+ 'mainheatc-energy-eff': 'Good',
+ 'lighting-description': 'Low energy lighting in 25% of fixed outlets',
+ 'lighting-energy-eff': 'Good',
+ 'floor-description': 'Solid, no insulation (assumed)',
+ 'secondheat-description': 'None',
+ 'current-energy-efficiency': '32',
+ 'energy-consumption-current': '491',
+ 'co2-emissions-current': '5.0',
+ 'potential-energy-efficiency': '87'
+ },
+ {
+ 'address': '53 Bromley', 'postcode': 'DY5 4PJ',
+ 'walls-description': 'Cavity wall, partial insulation (assumed)',
+ },
+]
+
+# This is information that is found as a result of the non-invasives, that mean that certain measures
+# have been installed already. To reflect this in the front end, it is included in the recommendation, however
+# the cost is removed and instead, a message is presented saying that the measure is already installed.
+already_installed = [
+ {
+ 'address': '5 Oaklands',
+ 'postcode': 'B62 0JA',
+ "already_installed": ["windows_glazing"]
+ }
+]
+
+non_invasive_recommendations = [
+ {'address': '8 Corporation Road', 'postcode': 'DY2 7PX', 'recommendations': []},
+ {'address': '21 Wells Road', 'postcode': 'DY5 3TB', 'recommendations': ['cavity_extract_and_refill']},
+ {'address': '27 Milton Road', 'postcode': 'WV14 8HZ', 'recommendations': ['cavity_extract_and_refill']},
+ {'address': '195 Ashenhurst Road', 'postcode': 'DY1 2JB', 'recommendations': ['cavity_extract_and_refill']},
+ {'address': '53 Bromley', 'postcode': 'DY5 4PJ', 'recommendations': ['cavity_surveyed_as_filled_is_partial']},
+ {'address': '91 Osprey Drive', 'postcode': 'DY1 2JS', 'recommendations': ['cavity_extract_and_refill']},
+ {'address': '47 Fairfield Road', 'postcode': 'DY8 5UJ', 'recommendations': ['cavity_extract_and_refill']},
+ {'address': '150 Huntingtree Road', 'postcode': 'B63 4HP', 'recommendations': ['cavity_extract_and_refill']},
+ {'address': '6 Beech Road', 'postcode': 'DY1 4BP', 'recommendations': []},
+ {'address': '5 Oaklands', 'postcode': 'B62 0JA', 'recommendations': ['cavity_extract_and_refill']},
+]
+
+
+def app():
+ raw_asset_list = read_excel_from_s3(
+ bucket_name="retrofit-datalake-dev",
+ file_key="customers/Immo/IMMO Sample Assets_Dudley.xlsx",
+ header_row=0
+ )
+ raw_asset_list = raw_asset_list.drop(columns=["Unnamed: 0"])
+ # Extract address and postcode
+ raw_asset_list["address"] = raw_asset_list["Full Address"].str.split(",").str[0]
+ raw_asset_list["postcode"] = raw_asset_list["Full Address"].str.split(",").str[-1].str.strip()
+
+ asset_list = raw_asset_list.merge(council_tax_bands, how="left", on=["address", "postcode"])
+
+ # We're provided with number of bathrooms and number of bedrooms.
+ asset_list = asset_list.rename(
+ columns={
+ "No. of Beds": "n_bedrooms",
+ "No. of WC's": "n_bathrooms"
+ }
+ )
+
+ # Store the asset list in s3
+ filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
+ save_csv_to_s3(
+ dataframe=asset_list,
+ bucket_name="retrofit-plan-inputs-dev",
+ file_name=filename
+ )
+
+ # Store overrides in s3
+ already_installed_filename = f"{USER_ID}/{PORTFOLIO_ID}/already_installed.json"
+ save_csv_to_s3(
+ dataframe=pd.DataFrame(already_installed),
+ bucket_name="retrofit-plan-inputs-dev",
+ file_name=already_installed_filename
+ )
+
+ # Store patches in s3
+ patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.json"
+ save_csv_to_s3(
+ dataframe=pd.DataFrame(patches),
+ bucket_name="retrofit-plan-inputs-dev",
+ file_name=patches_filename
+ )
+
+ # Store non-invasive recommendations in S3
+ non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.json"
+ save_csv_to_s3(
+ dataframe=pd.DataFrame(non_invasive_recommendations),
+ bucket_name="retrofit-plan-inputs-dev",
+ file_name=non_invasive_recommendations_filename
+ )
+
+ # EPC C portoflio
+ body = {
+ "portfolio_id": str(PORTFOLIO_ID),
+ "housing_type": "Private",
+ "goal": "Increase EPC",
+ "goal_value": "C",
+ "trigger_file_path": filename,
+ "already_installed_file_path": already_installed_filename,
+ "patches_file_path": patches_filename,
+ "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+ "budget": None,
+ }
+ print(body)
+
+ # EPC B portoflio
+ body = {
+ "portfolio_id": str(PORTFOLIO_ID + 1),
+ "housing_type": "Private",
+ "goal": "Increase EPC",
+ "goal_value": "B",
+ "trigger_file_path": filename,
+ "already_installed_file_path": already_installed_filename,
+ "patches_file_path": patches_filename,
+ "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+ "budget": None,
+ }
+ print(body)
diff --git a/etl/customers/immo/pilot/asset_list_2.py b/etl/customers/immo/pilot/asset_list_2.py
new file mode 100644
index 00000000..52260f57
--- /dev/null
+++ b/etl/customers/immo/pilot/asset_list_2.py
@@ -0,0 +1,152 @@
+import pandas as pd
+from utils.s3 import read_excel_from_s3
+from utils.s3 import save_csv_to_s3
+
+USER_ID = 8
+PORTFOLIO_ID = 72
+
+# For
+patches = [
+ {
+ 'address': '116 Parkes Hall Road',
+ 'postcode': 'DY1 3RJ',
+ 'uprn': '90046817',
+ 'walls-description': 'Cavity wall, filled cavity',
+ 'walls-energy-eff': 'Average',
+ 'roof-description': 'Pitched, 270 mm loft insulation',
+ 'roof-energy-eff': 'Good',
+ 'windows-description': 'Fully double glazed',
+ 'windows-energy-eff': 'Good',
+ 'mainheat-description': 'Boiler and radiators, mains gas',
+ 'mainheat-energy-eff': 'Good',
+ 'mainheatcont-description': 'Programmer, room thermostat and TRVs',
+ 'mainheatc-energy-eff': 'Good',
+ 'lighting-description': 'Low energy lighting in 27% of fixed outlets',
+ 'lighting-energy-eff': 'Average',
+ 'floor-description': 'Solid, no insulation (assumed)',
+ 'secondheat-description': 'None',
+ 'current-energy-efficiency': '73',
+ 'current-energy-rating': 'C',
+ 'energy-consumption-current': '184',
+ 'co2-emissions-current': '2.4',
+ 'potential-energy-efficiency': '88',
+ 'total-floor-area': '73',
+ 'construction-age-band': 'England and Wales: 1930-1949',
+ 'property-type': 'House',
+ 'built-form': 'Mid-Terrace',
+ }
+]
+
+# This is information that is found as a result of the non-invasives, that mean that certain measures
+# have been installed already. To reflect this in the front end, it is included in the recommendation, however
+# the cost is removed and instead, a message is presented saying that the measure is already installed.
+already_installed = [
+ {
+ 'address': '28 Sangwin Road', 'postcode': 'WV14 9EQ', "already_installed": ["loft_insulation"]
+ },
+ {
+ 'address': '51 Hillwood Road', 'postcode': 'B62 8NQ', "already_installed": ["loft_insulation"]
+ },
+ {
+ 'address': '47 Watsons Close', 'postcode': 'DY2 7HL', "already_installed": ["loft_insulation"]
+ },
+ {
+ 'address': '44 Hatfield Road',
+ 'postcode': 'DY9 7LW',
+ "already_installed": ["loft_insulation", "cavity_wall_insulation"]
+ }
+]
+
+non_invasive_recommendations = []
+
+
+def app():
+ raw_asset_list = read_excel_from_s3(
+ bucket_name="retrofit-datalake-dev",
+ file_key="customers/Immo/Dudley Asset List - Hestia - pilot2.xlsx",
+ header_row=0
+ )
+
+ raw_asset_list = raw_asset_list[raw_asset_list["in_pilot"]].copy()
+
+ # Extract address and postcode
+ raw_asset_list["address"] = raw_asset_list["Full Address"].str.split(",").str[0]
+ raw_asset_list["postcode"] = raw_asset_list["Full Address"].str.split(",").str[-1].str.strip()
+
+ # We're provided with number of bathrooms and number of bedrooms.
+ # THe UPRNs are not the official ones
+ asset_list = raw_asset_list.rename(
+ columns={
+ "No. of Beds": "n_bedrooms",
+ "No. of WC's": "n_bathrooms",
+ 'Property Type': 'property_type',
+ 'Architype': 'built_form'
+ }
+ )
+
+ # Remap the values
+ asset_list["built_form"] = asset_list["built_form"].map({
+ "SEMI DETACHED": "Semi-Detached",
+ "MID TERRACE": "Mid-Terrace",
+ "END TERRACE": "End-Terrace",
+ })
+
+ # Store the asset list in s3
+ filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
+ save_csv_to_s3(
+ dataframe=asset_list,
+ bucket_name="retrofit-plan-inputs-dev",
+ file_name=filename
+ )
+
+ # Store overrides in s3
+ already_installed_filename = f"{USER_ID}/{PORTFOLIO_ID}/already_installed.json"
+ save_csv_to_s3(
+ dataframe=pd.DataFrame(already_installed),
+ bucket_name="retrofit-plan-inputs-dev",
+ file_name=already_installed_filename
+ )
+
+ # Store patches in s3
+ patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.json"
+ save_csv_to_s3(
+ dataframe=pd.DataFrame(patches),
+ bucket_name="retrofit-plan-inputs-dev",
+ file_name=patches_filename
+ )
+
+ # Store non-invasive recommendations in S3
+ non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.json"
+ save_csv_to_s3(
+ dataframe=pd.DataFrame(non_invasive_recommendations),
+ bucket_name="retrofit-plan-inputs-dev",
+ file_name=non_invasive_recommendations_filename
+ )
+
+ # EPC C portoflio
+ body = {
+ "portfolio_id": str(PORTFOLIO_ID),
+ "housing_type": "Private",
+ "goal": "Increase EPC",
+ "goal_value": "C",
+ "trigger_file_path": filename,
+ "already_installed_file_path": already_installed_filename,
+ "patches_file_path": patches_filename,
+ "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+ "budget": None,
+ }
+ print(body)
+
+ # EPC B portoflio
+ body = {
+ "portfolio_id": str(PORTFOLIO_ID + 1),
+ "housing_type": "Private",
+ "goal": "Increase EPC",
+ "goal_value": "B",
+ "trigger_file_path": filename,
+ "already_installed_file_path": already_installed_filename,
+ "patches_file_path": patches_filename,
+ "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
+ "budget": None,
+ }
+ print(body)
diff --git a/etl/customers/immo/pilot/non_invasive.py b/etl/customers/immo/pilot/non_invasive.py
new file mode 100644
index 00000000..6dc22c62
--- /dev/null
+++ b/etl/customers/immo/pilot/non_invasive.py
@@ -0,0 +1,210 @@
+# import extract_msg
+from datetime import datetime
+from sqlalchemy.orm import sessionmaker
+from backend.app.db.connection import db_engine
+from backend.app.db.functions.non_intrusive_surveys import upload_non_intrusive_survey_notes
+
+
+def parse_msg_body(text):
+ # Split the text into lines
+ lines = text.split('\r\n')
+
+ # Dictionary to hold the parsed data
+ data = {}
+
+ # Process each line
+ for line in lines:
+ # Remove all asterisks and extra whitespace
+ clean_line = line.replace('*', '').strip()
+
+ if clean_line: # Ensure the line is not empty after cleaning
+ # Attempt to split clean '=' if present
+ if '=' in clean_line:
+ clean_line = clean_line.replace(' = ', ': ')
+
+ # Use line content as a key with a default value indicating presence
+ # Generate a unique key for lines without '='
+ data[f"Info{len(data) + 1}"] = clean_line
+
+ return data
+
+
+def app():
+ """
+ This code retrieves the results of the non-invasive surveys, to be stored in S3
+ :return:
+ """
+
+ # filepath = ("/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/5 Oaklands B62 "
+ # "0JA/Immo - 5 Oaklands Halesowen B62 0JA.msg")
+ # filepath = ("/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/6 Beech Rd DY1 "
+ # "4BP/IMMO - 6 Beech Road Dudley DY1 4BP.msg")
+ # filepath = (
+ # "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/8 Corporation Rd DY2 "
+ # "7PX/IMMO - 8 Corporation Road Dudley DY2 7PX.msg"
+ # )
+ # filepath = (
+ # "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/21 Wells Rd DY5 3TB/"
+ # "IMMO - 21 Wells Road Brierley Hill DY5 3TB.msg"
+ # )
+ # filepath = (
+ # "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/47 Fairfield Rd DY8 "
+ # "5UJ/IMMO - 47 Fairfield Road Wordsley Stourbridge DY8 5UJ.msg"
+ # )
+ # filepath = (
+ # "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/91 Osprey Drive DY1 "
+ # "2JS/IMMO - 91 Osprey Drive Dudley DY1 2JS.msg"
+ # )
+ # filepath = (
+ # "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/195 Ashenhurst Rd DY1 "
+ # "2JB/IMMO - 195 Ashenhurst Road Dudley DY1 2JB.msg"
+ # )
+ # filepath = (
+ # "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data/27 Milton Rd DY1 2JB/IMMO "
+ # "- 27 Milton Road Coseley Bilston WV14 8HZ.msg"
+ # )
+ #
+ # with extract_msg.Message(filepath) as msg:
+ # body = msg.body
+ #
+ # from pprint import pprint
+ # pprint(parse_msg_body(body))
+
+ # We manually create the non-invasive notes for the pilot
+ non_invasive_notes = [
+ {
+ 'uprn': 90028499,
+ # 'address': '5 Oaklands',
+ # 'postcode': 'B62 0JA',
+ 'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+ 'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
+ 'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
+ 'CIGA check and extracting the cavity, replacing with bead insulation. '
+ 'There is a shared alleyway with the neighbour, that is a solid brick wall.',
+ 'Wall Render': 'Partial render between top of ground floor window and bottom of 1st floor window',
+ 'Existing solar PV': 'No existing solar',
+ 'Orientation': 'Front house direction: North East, Back house direction: South West',
+ 'Access to mains?': 'Property has access to the mains',
+ },
+ {
+ 'uprn': 90055152,
+ # 'address': '6 Beech Road',
+ # 'postcode': 'DY1 4BP',
+ 'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+ 'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
+ 'Wall Insulation': '1st floor is solid brick with external wall insulation. 2nd floor is cavity, '
+ 'retro drilled, containing loose fibre insulation. Consider getting a '
+ 'CIGA check and extracting the cavity, replacing with bead insulation.',
+ 'Wall Render': None,
+ 'Existing solar PV': 'No existing solar',
+ 'Orientation': 'Side house direction: North East',
+ 'Access to mains?': 'Property has access to the mains',
+ },
+ {
+ 'uprn': 90070461,
+ # 'address': '8 Corporation Road',
+ # 'postcode': 'DY2 7PX',
+ 'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+ 'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
+ 'Wall Insulation': "External wall insulation",
+ 'Wall Render': "Render finish throughout",
+ 'Existing solar PV': 'No existing solar',
+ 'Orientation': 'Front house direction: North East, Back house direction: South West',
+ 'Access to mains?': None,
+ },
+ {
+ 'uprn': 90022227,
+ # 'address': '21 Wells Road',
+ # 'postcode': 'DY5 3TB',
+ 'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+ 'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
+ 'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
+ 'CIGA check and extracting the cavity, replacing with bead insulation.',
+ 'Wall Render': None,
+ 'Existing solar PV': 'No existing solar',
+ 'Orientation': 'Front house direction: East, Back house direction: West',
+ 'Access to mains?': 'Property has access to the mains',
+ },
+ {
+ 'uprn': 90077535,
+ # 'address': '47 Fairfield Road',
+ # 'postcode': 'DY8 5UJ',
+ 'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+ 'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
+ 'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
+ 'CIGA check and extracting the cavity, replacing with bead insulation.',
+ 'Wall Render': None,
+ 'Existing solar PV': 'No existing solar',
+ 'Orientation': 'Front house direction: East, Back house direction: West',
+ 'Access to mains?': 'Property has access to the mains',
+ },
+ {
+ 'uprn': 90060989,
+ # 'address': '53 Bromley',
+ # 'postcode': 'DY5 4PJ',
+ 'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+ 'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
+ 'Wall Insulation': "Filled at build, partially filled - celotex/king board, 50mm cavity remaining - "
+ "recommends a cavity wall fill",
+ "Roof": "Hipped roof",
+ 'Existing solar PV': 'No existing solar',
+ 'Orientation': "Front house direction: North, Back house direction: South, Side house direction: West",
+ 'Access to mains?': 'Property has access to the mains',
+ },
+ {
+ 'uprn': 90048026,
+ # 'address': '91 Osprey Drive',
+ # 'postcode': 'DY1 2JS',
+ 'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+ 'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
+ 'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
+ 'CIGA check and extracting the cavity, replacing with bead insulation.',
+ 'Wall Render': 'Tile hung front and rear of property',
+ 'Existing solar PV': 'No existing solar',
+ 'Orientation': 'Side house direction: East',
+ 'Access to mains?': 'Property has access to the mains',
+ },
+ {
+ 'uprn': 90093693,
+ # 'address': '150 Huntingtree Road',
+ # 'postcode': 'B63 4HP',
+ 'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+ 'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
+ 'Heating': 'Electric (storage heaters)',
+ 'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
+ 'CIGA check and extracting the cavity, replacing with bead insulation.',
+ "Roof": "Hipped roof",
+ 'Existing solar PV': 'No existing solar',
+ 'Orientation': "Front house direction: North West, Back house direction: South East, Side house direction: "
+ "North East",
+ },
+ {
+ 'uprn': 90051858,
+ # 'address': '195 Ashenhurst Road',
+ # 'postcode': 'DY1 2JB',
+ 'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+ 'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
+ 'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
+ 'CIGA check and extracting the cavity, replacing with bead insulation.',
+ 'Wall Render': "Solid render front and rear of property",
+ 'Existing solar PV': 'No existing solar',
+ 'Orientation': 'Front house direction: South, Back house direction: North',
+ 'Access to mains?': 'Property has access to the mains',
+ },
+ {
+ 'uprn': 90106884,
+ # 'address': '27 Milton Road',
+ # 'postcode': 'WV14 8HZ',
+ 'surveyor': 'Carl Fitzgerald - The Warmfront Team',
+ 'survey_date': datetime.strptime('2024-04-11', '%Y-%m-%d'),
+ 'Wall Insulation': 'Cavity wall, retro drilled, containing loose fibre insulation. Consider getting a '
+ 'CIGA check and extracting the cavity, replacing with bead insulation.',
+ 'Wall Render': "Solid render front and rear of property",
+ 'Existing solar PV': 'No existing solar',
+ 'Orientation': 'Front house direction: South East, Back house direction: North West',
+ 'Access to mains?': 'Property has access to the mains',
+ },
+ ]
+
+ session = sessionmaker(bind=db_engine)()
+ upload_non_intrusive_survey_notes(session=session, non_invasive_notes=non_invasive_notes, batch_size=500)
diff --git a/etl/customers/immo/pilot/requirements.txt b/etl/customers/immo/pilot/requirements.txt
new file mode 100644
index 00000000..4673ab35
--- /dev/null
+++ b/etl/customers/immo/pilot/requirements.txt
@@ -0,0 +1 @@
+extract-msg
diff --git a/etl/customers/livewest/route_march.py b/etl/customers/livewest/route_march.py
new file mode 100644
index 00000000..9e69fd43
--- /dev/null
+++ b/etl/customers/livewest/route_march.py
@@ -0,0 +1,134 @@
+import os
+
+import pandas as pd
+from tqdm import tqdm
+
+from dotenv import load_dotenv
+from utils.s3 import read_excel_from_s3
+from backend.SearchEpc import SearchEpc
+from epc_api.client import EpcClient
+from utils.s3 import save_csv_to_s3
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def route_march_may_2024():
+ """
+ This code pulls supplementary data for a route march that is expected to happen in May 2024. This code
+ was authored on the 30th April 2024.
+ """
+
+ asset_list = read_excel_from_s3(
+ bucket_name="retrofit-datalake-dev",
+ file_key="customers/Livewest/Livewest proposed route march Apr-May 2024.xlsx",
+ header_row=0
+ )
+
+ epc_data = []
+ for _, unit in tqdm(asset_list.iterrows(), total=len(asset_list)):
+
+ lst = [unit["NO"], unit["ADDRESS 1"], unit["ADDRESS 2"], unit["ADDRESS 3"], unit["POSTCODE"]]
+ lst = [str(x).strip() for x in lst if not pd.isnull(x)]
+
+ full_address = ", ".join(lst)
+
+ searcher = SearchEpc(
+ address1=str(unit["NO"]),
+ postcode=unit["POSTCODE"],
+ auth_token=EPC_AUTH_TOKEN,
+ os_api_key="",
+ property_type=None,
+ fast=True,
+ full_address=full_address
+ )
+ # Force the skipping of estimating the EPC
+ searcher.ordnance_survey_client.property_type = None
+ searcher.ordnance_survey_client.built_form = None
+
+ searcher.find_property(skip_os=True)
+ if searcher.newest_epc is None:
+ # We try with a different address 1
+ add1 = str(unit["NO"]).lower()
+ add1 = (
+ add1
+ .replace("flat", "")
+ .replace("ft", "")
+ .replace("t", "").strip()
+ )
+
+ searcher = SearchEpc(
+ address1=add1,
+ postcode=unit["POSTCODE"],
+ auth_token=EPC_AUTH_TOKEN,
+ os_api_key="",
+ property_type=None,
+ fast=True,
+ full_address=full_address
+ )
+ # Force the skipping of estimating the EPC
+ searcher.ordnance_survey_client.property_type = None
+ searcher.ordnance_survey_client.built_form = None
+
+ searcher.find_property(skip_os=True)
+
+ if searcher.newest_epc is None:
+ continue
+
+ epc = {
+ "asset_list_house_no": unit["NO"],
+ "asset_list_address1": unit["ADDRESS 1"],
+ "asset_list_postcode": unit["POSTCODE"],
+ **searcher.newest_epc.copy()
+ }
+
+ epc_data.append(epc)
+
+ epc_df = pd.DataFrame(epc_data)
+
+ #
+
+ # Retrieve just the data we need
+ epc_df = epc_df[
+ [
+ "asset_list_house_no",
+ "asset_list_address1",
+ "asset_list_postcode",
+ "uprn",
+ "address",
+ "property-type",
+ "built-form",
+ "inspection-date",
+ "current-energy-rating",
+ "current-energy-efficiency",
+ "roof-description",
+ "walls-description",
+ "transaction-type"
+ ]
+ ].rename(columns={"address": "Matched EPC Address"})
+
+ asset_list = asset_list.merge(
+ epc_df,
+ how="left",
+ left_on=["NO", "ADDRESS 1", "POSTCODE"],
+ right_on=["asset_list_house_no", "asset_list_address1", "asset_list_postcode"]
+ )
+
+ asset_list = asset_list.drop_duplicates(subset=["NO", "ADDRESS 1", "POSTCODE"])
+ asset_list = asset_list.drop(columns=["asset_list_house_no", "asset_list_address1", "asset_list_postcode"])
+
+ # Rename the columns
+ asset_list = asset_list.rename(columns={
+ "property-type": "Property Type",
+ "built-form": "Archetype",
+ "inspection-date": "Last EPC Inspection Date",
+ "current-energy-rating": "Last survey EPC Rating",
+ "current-energy-efficiency": "Last survey SAP Score",
+ "roof-description": "Roof Construction",
+ "walls-description": "Wall Construction",
+ "transaction-type": "Last EPC Reason"
+ })
+
+ # Store as an excel
+ filename = "Livewest EPC data.xlsx"
+ asset_list.to_excel(filename, index=False)
diff --git a/etl/customers/places_for_people/route_march.py b/etl/customers/places_for_people/route_march.py
new file mode 100644
index 00000000..c38c71d3
--- /dev/null
+++ b/etl/customers/places_for_people/route_march.py
@@ -0,0 +1,137 @@
+import os
+
+import pandas as pd
+from tqdm import tqdm
+
+from dotenv import load_dotenv
+from utils.s3 import read_excel_from_s3
+from backend.SearchEpc import SearchEpc
+from epc_api.client import EpcClient
+from utils.s3 import save_csv_to_s3
+
+load_dotenv(dotenv_path="backend/.env")
+EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+
+
+def app():
+ """
+ This app is satisying an adhoc request to retrieve EPC data for properties owned by Guiness, to help plan the
+ route march
+
+ These properties were provided to us by Ecosurv
+ :return:
+ """
+ asset_list = read_excel_from_s3(
+ bucket_name="retrofit-datalake-dev",
+ file_key="customers/Places For People/PFP ROUTE MARCH PHASE 1.xlsx",
+ header_row=1
+ )
+
+ epc_data = []
+ for _, pfp_property in tqdm(asset_list.iterrows(), total=len(asset_list)):
+
+ lst = [
+ pfp_property["ADDRESS"],
+ pfp_property["ADDRESS.1"],
+ pfp_property["ADDRESS.2"],
+ pfp_property["POSTCODE"]
+ ]
+ lst = [str(x).strip() for x in lst if not pd.isnull(x)]
+
+ full_address = ", ".join(lst)
+
+ searcher = SearchEpc(
+ address1=str(pfp_property["ADDRESS"]),
+ postcode=pfp_property["POSTCODE"],
+ auth_token=EPC_AUTH_TOKEN,
+ os_api_key="",
+ property_type=None,
+ fast=True,
+ full_address=full_address
+ )
+ # Force the skipping of estimating the EPC
+ searcher.ordnance_survey_client.property_type = None
+ searcher.ordnance_survey_client.built_form = None
+
+ searcher.find_property(skip_os=True)
+ if searcher.newest_epc is None:
+ # We try with a different address 1
+ add1 = str(pfp_property["ADDRESS"]).lower()
+ add1 = add1.replace("ft", "").replace("t", "").strip()
+
+ searcher = SearchEpc(
+ address1=add1,
+ postcode=pfp_property["POSTCODE"],
+ auth_token=EPC_AUTH_TOKEN,
+ os_api_key="",
+ property_type=None,
+ fast=True,
+ full_address=full_address
+ )
+ # Force the skipping of estimating the EPC
+ searcher.ordnance_survey_client.property_type = None
+ searcher.ordnance_survey_client.built_form = None
+
+ searcher.find_property(skip_os=True)
+
+ if searcher.newest_epc is None:
+ continue
+
+ epc = {
+ "asset_list_address": pfp_property["ADDRESS"],
+ "asset_list_address1": pfp_property["ADDRESS.1"],
+ "asset_list_postcode": pfp_property["POSTCODE"],
+ **searcher.newest_epc.copy()
+ }
+
+ epc_data.append(epc)
+
+ epc_df = pd.DataFrame(epc_data)
+
+ # 702
+
+ # Retrieve just the data we need
+ epc_df = epc_df[
+ [
+ "asset_list_address",
+ "asset_list_address1",
+ "asset_list_postcode",
+ "uprn",
+ "address",
+ "property-type",
+ "built-form",
+ "inspection-date",
+ "current-energy-rating",
+ "current-energy-efficiency",
+ "roof-description",
+ "walls-description",
+ "transaction-type"
+ ]
+ ].rename(columns={"address": "Matched EPC Address"})
+
+ asset_list = asset_list.merge(
+ epc_df,
+ how="left",
+ left_on=["ADDRESS", "ADDRESS.1", "POSTCODE"],
+ right_on=["asset_list_address", "asset_list_address1", "asset_list_postcode"]
+ )
+
+ # De-dupe on the address and postcode, since 137 Badger Avenue was duplicated
+ asset_list = asset_list.drop_duplicates(subset=["ADDRESS", "ADDRESS.1", "POSTCODE"])
+ asset_list = asset_list.drop(columns=["asset_list_address", "asset_list_address1", "asset_list_postcode"])
+
+ # Rename the columns
+ asset_list = asset_list.rename(columns={
+ "property-type": "Property Type",
+ "built-form": "Archetype",
+ "inspection-date": "Last EPC Inspection Date",
+ "current-energy-rating": "Last survey EPC Rating",
+ "current-energy-efficiency": "Last survey SAP Score",
+ "roof-description": "Roof Construction",
+ "walls-description": "Wall Construction",
+ "transaction-type": "Last EPC Reason"
+ })
+
+ # Store as an excel
+ filename = "Places For People EPC data.xlsx"
+ asset_list.to_excel(filename, index=False)
diff --git a/etl/customers/slide_utils.py b/etl/customers/slide_utils.py
index d1efce47..9170ab17 100644
--- a/etl/customers/slide_utils.py
+++ b/etl/customers/slide_utils.py
@@ -246,7 +246,7 @@ def create_powerpoint(data, save_location):
prs.save(save_location)
-def create_recommendations_summary(recommendations_df, properties_df, sap_target):
+def create_recommendations_summary(recommendations_df, properties_df, property_details_df, sap_target):
# Aggregate the impact of the recommendations
# We want:
# Total number of sap points
@@ -259,13 +259,15 @@ def create_recommendations_summary(recommendations_df, properties_df, sap_target
total_valuation_impact=("property_valuation_increase", "sum"),
total_bill_savings=("energy_cost_savings", "sum"),
total_cost=("estimated_cost", "sum"),
- total_carbon=("co2_equivalent_savings", "sum")
+ total_carbon=("co2_equivalent_savings", "sum"),
+ adjusted_heat_demand=("adjusted_heat_demand", "sum")
).reset_index()
- # Merge on current sap points
+ # Merge on current sap points, current CO2, current adjusted_heat_demand, current annual bill
recommendations_summary = recommendations_summary.merge(
properties_df[["id", "uprn", "current_sap_points"]].rename(columns={"id": "property_id"}), on="property_id",
how="left"
)
+
recommendations_summary["expected_sap_points"] = (
recommendations_summary["current_sap_points"] + recommendations_summary["total_sap_points"]
)
@@ -274,4 +276,18 @@ def create_recommendations_summary(recommendations_df, properties_df, sap_target
)
recommendations_summary["sap_difference"] = sap_target - recommendations_summary["expected_sap_points"]
+ if property_details_df is not None:
+ recommendations_summary = recommendations_summary.merge(
+ property_details_df[["uprn", "co2_emissions", "adjusted_energy_consumption", "energy_bill"]].rename(
+ columns={
+ "id": "property_id",
+ "co2_emissions": "current_co2",
+ "adjusted_energy_consumption": "current_energy",
+ "energy_bill": "current_energy_bill"
+ }
+ ),
+ on="uprn",
+ how="left"
+ )
+
return recommendations_summary
diff --git a/etl/customers/vander_elliot/__init__.py b/etl/customers/vander_elliot/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/etl/customers/vander_elliot/single_property_pilot.py b/etl/customers/vander_elliot/single_property_pilot.py
new file mode 100644
index 00000000..99624dfc
--- /dev/null
+++ b/etl/customers/vander_elliot/single_property_pilot.py
@@ -0,0 +1,56 @@
+import pandas as pd
+from utils.s3 import read_excel_from_s3
+from utils.s3 import save_csv_to_s3
+
+PORTFOLIO_ID = 77
+USER_ID = 8
+
+patches = [
+ {
+ "address": "79 Perryn Road",
+ "postcode": "W3 7LT",
+ "roof-description": "Pitched, no insulation (assumed)"
+ }
+]
+
+
+def app():
+ asset_list = [
+ {
+ 'uprn': 12103117,
+ "address": "79 Perryn Road",
+ "postcode": "W3 7LT",
+ },
+
+ ]
+
+ asset_list = pd.DataFrame(asset_list)
+
+ # Store the asset list in s3
+ filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
+ save_csv_to_s3(
+ dataframe=asset_list,
+ bucket_name="retrofit-plan-inputs-dev",
+ file_name=filename
+ )
+
+ # Store patches in s3
+ patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.json"
+ save_csv_to_s3(
+ dataframe=pd.DataFrame(patches),
+ bucket_name="retrofit-plan-inputs-dev",
+ file_name=patches_filename
+ )
+
+ body = {
+ "portfolio_id": str(PORTFOLIO_ID),
+ "housing_type": "Private",
+ "goal": "Increase EPC",
+ "goal_value": "B",
+ "trigger_file_path": filename,
+ "already_installed_file_path": "",
+ "patches_file_path": patches_filename,
+ "non_invasive_recommendations_file_path": "",
+ "budget": None,
+ }
+ print(body)
diff --git a/etl/eligibility/Eligibility.py b/etl/eligibility/Eligibility.py
index 906ff594..b594579f 100644
--- a/etl/eligibility/Eligibility.py
+++ b/etl/eligibility/Eligibility.py
@@ -145,6 +145,7 @@ class Eligibility:
"reason": None,
"thickness_classification": thickness_classification
}
+ return
# Insulation is already thick enough
self.loft = {
@@ -164,8 +165,10 @@ class Eligibility:
"""
is_cavity = self.walls["is_cavity_wall"]
- is_empty = (not self.walls["is_filled_cavity"]) or (
+ is_empty = (not self.walls["is_filled_cavity"])
+ is_as_built = (
self.walls["is_as_built"] and self.walls["insulation_thickness"] not in ["average", "above average"]
+ and self.walls["is_assumed"]
)
is_partial_filled = "partial" in self.walls["clean_description"].lower()
# We look for potentially under performing cavities - anything that is assumed, as built and insulated
@@ -175,6 +178,7 @@ class Eligibility:
is_unfilled_cavity = is_cavity and (is_empty and not is_partial_filled)
is_partial_filled_cavity = is_cavity and is_partial_filled
+ is_assumed_filled_cavity = is_cavity and is_as_built
is_underperforming_cavity = is_cavity and is_underperforming
# Check if it has internal or external wall insulation
@@ -195,6 +199,13 @@ class Eligibility:
}
return
+ if is_assumed_filled_cavity:
+ self.cavity = {
+ "suitability": True,
+ "type": "as built assumed",
+ }
+ return
+
if is_partial_filled_cavity:
self.cavity = {
"suitability": True,
@@ -340,13 +351,35 @@ class Eligibility:
# Check if the property is suitable for cavity wall
self.cavity_insulation()
- self.loft_insulation()
- self.gbis_warmfront = (self.cavity["suitability"]) and (
- int(self.epc["current-energy-efficiency"]) <= 68
- )
+ current_sap = int(self.epc["current-energy-efficiency"])
+ # We have a strict suitability check and a non-strict check
- def check_eco4_warmfront(self, post_retrofit_sap=None):
+ # Perfect strictness
+ if (self.cavity["type"] == "empty") and (current_sap < 69):
+ self.gbis_warmfront = {
+ "eligible": True,
+ "strict": True,
+ "message": "Perfect suitability",
+ }
+ return
+
+ # Near perfect
+ if self.cavity["suitability"] and (current_sap < 69):
+ self.gbis_warmfront = {
+ "eligible": True,
+ "strict": True,
+ "message": "Near perfect suitability",
+ }
+ return
+
+ self.gbis_warmfront = {
+ "eligible": False,
+ "strict": False,
+ "message": "All conditions fail",
+ }
+
+ def check_eco4_warmfront(self):
"""
This funciton will check if the property is eligible for funding under the ECO4 scheme
@@ -378,49 +411,121 @@ class Eligibility:
self.cavity_insulation()
self.loft_insulation()
- # make sure conditions 2 and 3 are true
- is_eligible = self.cavity["suitability"] & self.loft["suitability"]
+ # We put in a placeholder when the roof is not a loft
+ if self.loft["reason"] == "roof not loft":
+ self.loft["thickness"] = 999
- if current_sap >= 69:
+ # Case 1: No conditions meet
+ if not self.cavity["suitability"] and (self.loft["thickness"] > 100) and current_sap >= 55:
self.eco4_warmfront = {
"eligible": False,
- "message": "sap too high",
+ "strict": False,
+ "message": "All conditions fail",
"cavity_type": self.cavity["type"],
"loft_type": self.loft["thickness_classification"]
}
return
- if post_retrofit_sap is None:
-
- if current_sap >= 55:
- message = "Possibly eligible but property currently EPC D"
- else:
- message = "subject to post retrofit sap" if is_eligible else "not eligible"
-
- # Update the message to flag properties that failed just because of a full cavity.
- # We need to double check that the wall is a cavity, that the loft is suitable and that the
- # sap is within reason
- # We can then estimate the age of the cavity fill
- if not is_eligible and (current_sap < 69) and self.loft["suitability"] and self.walls["is_cavity_wall"]:
- message = "Failed due to full cavity - check cavity age"
-
+ # Case 2 - perfect match
+ if (self.cavity["type"] == "empty") and (self.loft["thickness"] <= 100) and (current_sap < 55):
self.eco4_warmfront = {
- "eligible": is_eligible,
- "message": message,
+ "eligible": True,
+ "strict": True,
+ "message": "Perfect suitability",
"cavity_type": self.cavity["type"],
"loft_type": self.loft["thickness_classification"]
}
return
- is_eligible = is_eligible & (post_retrofit_sap >= 69)
+ # Case 2.5 - near perfect match - but we would not recommend this using the model
+ if self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap < 55):
+ self.eco4_warmfront = {
+ "eligible": True,
+ "strict": True,
+ "message": "Near perfect suitability",
+ "cavity_type": self.cavity["type"],
+ "loft_type": self.loft["thickness_classification"]
+ }
+ return
- self.eco4_warmfront = {
- "eligible": is_eligible,
- "message": None,
- "cavity_type": self.cavity["type"],
- "loft_type": self.loft["thickness_classification"]
- }
- return
+ # Case 3 - cavity is suitable, loft is within 150mm, sap is good
+ if self.cavity["suitability"] and (self.loft["thickness"] <= 150) and (current_sap < 55):
+ self.eco4_warmfront = {
+ "eligible": True,
+ "strict": False,
+ "message": "Meets cavity, loft borderline, meets sap",
+ "cavity_type": self.cavity["type"],
+ "loft_type": self.loft["thickness_classification"]
+ }
+ return
+
+ # Case 3 - cavity is suitable, loft is not, sap is good
+ if self.cavity["suitability"] and (self.loft["thickness"] > 150) and (current_sap < 55):
+ self.eco4_warmfront = {
+ "eligible": True,
+ "strict": False,
+ "message": "Meets cavity and sap",
+ "cavity_type": self.cavity["type"],
+ "loft_type": self.loft["thickness_classification"]
+ }
+ return
+
+ # Case 4 - cavity is not suitable, loft is, sap is not - we say this is not elifible
+ if not self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap < 55):
+ self.eco4_warmfront = {
+ "eligible": False,
+ "strict": False,
+ "message": "failed fabric check",
+ "cavity_type": self.cavity["type"],
+ "loft_type": self.loft["thickness_classification"]
+ }
+ return
+
+ # Case 5 - cavity and loft suitable, sap too high
+ if self.cavity["suitability"] and (self.loft["thickness"] <= 150) and (current_sap >= 55):
+ self.eco4_warmfront = {
+ "eligible": True,
+ "strict": False,
+ "message": "Meets fabric, fails SAP check",
+ "cavity_type": self.cavity["type"],
+ "loft_type": self.loft["thickness_classification"]
+ }
+ return
+
+ # Case 6 - meets just cavity
+ if self.cavity["suitability"] and (self.loft["thickness"] > 100) and (current_sap >= 55):
+ self.eco4_warmfront = {
+ "eligible": True,
+ "strict": False,
+ "message": "Meets just cavity",
+ "cavity_type": self.cavity["type"],
+ "loft_type": self.loft["thickness_classification"]
+ }
+ return
+
+ # Case 7 - fails cavity, loft but meets sap
+ if not self.cavity["suitability"] and (self.loft["thickness"] > 100) and (current_sap < 55):
+ self.eco4_warmfront = {
+ "eligible": False,
+ "strict": False,
+ "message": "Fails cavity and loft, meets SAP",
+ "cavity_type": self.cavity["type"],
+ "loft_type": self.loft["thickness_classification"]
+ }
+ return
+
+ # Case 8 - fails cavity, meets loft, fails sap
+ if not self.cavity["suitability"] and (self.loft["thickness"] <= 100) and (current_sap >= 55):
+ self.eco4_warmfront = {
+ "eligible": False,
+ "strict": False,
+ "message": "Fails cavity, meets loft, fails SAP",
+ "cavity_type": self.cavity["type"],
+ "loft_type": self.loft["thickness_classification"]
+ }
+ return
+
+ raise ValueError("Implement me")
def check_gbis(self):
diff --git a/etl/eligibility/ha_15_32/app.py b/etl/eligibility/ha_15_32/app.py
index a68bf272..378a0e83 100644
--- a/etl/eligibility/ha_15_32/app.py
+++ b/etl/eligibility/ha_15_32/app.py
@@ -387,17 +387,19 @@ def prepare_model_data_row(
}
simulations = [
- [cavity_simulation],
- [loft_simulation]
+ cavity_simulation,
+ loft_simulation
]
- p.adjust_difference_record_with_recommendations(simulations)
+ recommendation_record = p.base_difference_record.df.to_dict("records")[0].copy()
+ scoring_dict = p.create_recommendation_scoring_data(
+ property_id=p.id,
+ recommendation_record=recommendation_record,
+ recommendations=simulations,
+ primary_recommendation_id=cavity_simulation["recommendation_id"]
+ )
- # Make sure we definitely have the correct data
- cavity_scoring = [x for x in p.recommendations_scoring_data if "cavity" in x["id"]][0]
- loft_scoring = [x for x in p.recommendations_scoring_data if "loft" in x["id"]][0]
-
- return [cavity_scoring, loft_scoring]
+ return [scoring_dict]
def get_ha_32data(ha_data, cleaned, cleaning_data, created_at):
diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
index 92956337..f99c7b1a 100644
--- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
+++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py
@@ -1,11 +1,15 @@
import os
+import re
import openpyxl
+import Levenshtein
from pathlib import Path
import msgpack
from datetime import datetime
import pandas as pd
import numpy as np
-from utils.s3 import read_from_s3, read_dataframe_from_s3_parquet, save_pickle_to_s3, read_pickle_from_s3
+from utils.s3 import (
+ read_from_s3, read_dataframe_from_s3_parquet, save_pickle_to_s3, read_pickle_from_s3, save_dataframe_to_s3_parquet
+)
from utils.logger import setup_logger
from dotenv import load_dotenv
from tqdm import tqdm
@@ -15,6 +19,10 @@ from etl.eligibility.ha_15_32.app import prepare_model_data_row
from backend.ml_models.api import ModelApi
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
from recommendations.recommendation_utils import calculate_cavity_age
+from etl.epc.Record import EPCRecord
+from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
+from etl.epc.DataProcessor import EPCDataProcessor
+from datetime import datetime
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
@@ -23,6 +31,486 @@ DATA_FOLDER = Path(__file__).parent / "local_data" / "ha_data"
logger = setup_logger()
load_dotenv(ENV_FILE)
+PROPERTY_TYPE_LOOKUP = {
+ "HA1": {
+ "built_form": {
+ 'Mid Terrace': 'Mid-Terrace',
+ 'Semi-Detached': 'Semi-Detached',
+ 'End Terrace': 'End-Terrace',
+ 'Detached': 'Detached',
+ 'Enclosed Mid': 'Mid-Terrace',
+ 'Detached Local Connect': 'Detached',
+ }
+ },
+ "HA2": {
+ 'HOUSE': 'House',
+ 'FLAT': 'Flat',
+ 'SHELTERED': None,
+ 'BUNGALOW': 'Bungalow',
+ 'BED-SIT': None,
+ 'MAISONETTE': "Maisonette",
+ 'HOSTEL': None
+ },
+ "HA5": {
+ "House": "House",
+ "Flat": "Flat",
+ "Bungalow": "Bungalow",
+ "Bedsit": None
+ },
+ "HA6": {
+ "property_type": {
+ 'HOUSE': "House",
+ 'GROUND FLOOR FLAT': "Flat",
+ 'UPPER FLOOR FLAT': "Flat",
+ 'MAISONETTE': "Maisonette",
+ 'BUNGALOW': "Bungalow",
+ 'WARDEN BUNGALOW': "Bungalow",
+ 'WARDEN FLAT': "Flat",
+ 'EXTRACARE SCHEME': "Flat",
+ }
+ },
+ "HA7": {
+ "property_type": {
+ "House": "House",
+ "Flat": "Flat",
+ "Bungalow": "Bungalow",
+ "Maisonette": "Maisonette",
+ },
+ "built_form": {
+ "Semi Detached": "Semi-Detached",
+ "Mid Terrace": "Mid-Terrace",
+ "End Terrace": "End-Terrace",
+ "Detached": "Detached",
+ "End Terraced": "End-Terrace",
+ }
+ },
+ "HA8": {
+ "House": "House",
+ "Flat": "Flat",
+ "Bungalow": "Bungalow",
+ "Maisonette": "Maisonette",
+ "Bedsit": None,
+ "Room": None,
+ "Other": None,
+ "Commerical": None
+ },
+ "HA11": {
+ "Flat": "Flat",
+ "House": "House",
+ "Semi-Det House": "House",
+ "Bedsit": None,
+ "End-Terr House": "House",
+ "Mid-Terr House": "House",
+ "Bungalow": "Bungalow",
+ "Maisonette": "Maisonette",
+ "End Terr Flat": "Flat",
+ "Mid Terr Flat": "Flat",
+ "Detached Flat": "Flat",
+ },
+ "HA12": {
+ "House": "House",
+ "Flat": "Flat",
+ "Bungalow": "Bungalow",
+ "Maisonette": "Maisonette",
+ "Bedsit": None,
+ },
+ "HA13": {
+ 'House': "House",
+ 'Flat': "Flat",
+ 'House MT': "House",
+ 'House SD': "House",
+ 'House ET': "House",
+ 'Bungalow MT': "Bungalow",
+ 'Bungalow ET': "Bungalow",
+ 'ii': None,
+ },
+ "HA14": {
+ "property_type": {
+ "House": "House",
+ "Flat": "Flat",
+ "Bungalow": "Bungalow",
+ "Maisonette": "Maisonette",
+ }
+ },
+ "HA15": {
+ 'House': 'House',
+ 'Flat': 'Flat',
+ 'Bungalow': 'Bungalow',
+ 'Maisonette': 'Maisonette',
+ 'Flat over garage': 'Flat',
+ },
+ "HA16": {
+ 'Semi Detached Bungalow': {"property-type": "Bungalow", "built-form": "Semi-Detached"},
+ 'Mid Terraced House': {"property-type": "House", "built-form": "Mid-Terrace"},
+ 'End Terraced House': {"property-type": "House", "built-form": "End-Terrace"},
+ 'Low Rise Flat': {"property-type": "Flat", "built-form": "Mid-Terrace"},
+ 'Semi-Detached House': {"property-type": "House", "built-form": "Semi-Detached"},
+ 'Detached Bungalow': {"property-type": "Bungalow", "built-form": "Detached"},
+ 'End Terraced Bungalow': {"property-type": "Bungalow", "built-form": "End-Terrace"},
+ 'Mid Terraced Bungalow': {"property-type": "Bungalow", "built-form": "Mid-Terrace"},
+ 'Medium Rise Flat': {"property-type": "Flat", "built-form": "Mid-Terrace"},
+ 'Detached House': {"property-type": "House", "built-form": "Detached"},
+ 'Cottage Flat': {"property-type": "Flat", "built-form": "Semi-Detached"},
+ 'Maisonette Medium Rise': {"property-type": "Flat", "built-form": "Mid-Terrace"},
+ 'Maisonette Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"},
+ 'End Terraced Town House': {"property-type": "House", "built-form": "End-Terrace"},
+ 'Flat Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"},
+ 'Mid Terraced Town House': {"property-type": "House", "built-form": "Mid-Terrace"},
+ },
+ "HA18": {
+ "House": "House",
+ "Flat": "Flat",
+ "Bungalow": "Bungalow",
+ "Maisonette": "Maisonette",
+ "Bedsit": None,
+ "Shop": None,
+ "Hostel": None,
+ "Block": None,
+ },
+ "HA20": {
+ "House": "House",
+ "Flat": "Flat",
+ 'Sheltered Flat': "Flat",
+ 'Maisonette': 'Maisonette',
+ 'Bungalow': 'Bungalow',
+ 'House. SD': 'House',
+ 'House. MT': 'House',
+ 'House. ET': 'House',
+ 'Sheltered Bungalow': 'Bungalow',
+ 'Guest Accomodation': None,
+ 'Sheltered House': 'House',
+ 'House. MT ': 'House',
+ 'House. D': 'House'
+ },
+ "HA24": {
+ '01 HOUSE': 'House',
+ '02 FLAT': 'Flat',
+ '03 BUNGALOW': 'Bungalow',
+ '10 PBUNGALOW': 'Bungalow',
+ '01 HOUSE MID': 'House',
+ '13 SBUNGALOW': 'Bungalow',
+ '12 SBEDSIT': None, # BEDSIT does not match the specified property types
+ '14 SFLAT': 'Flat',
+ '05 BEDSIT': None,
+ '04 MAISONETTE': 'Maisonette',
+ '11 PFLAT': 'Flat',
+ '09 PBEDSIT': None
+ },
+ "HA25": {
+ 'Flat': 'Flat',
+ 'Mid Terrace House': 'House',
+ 'Semi Detached House': 'House',
+ 'End Terrace House': 'House',
+ 'House': 'House',
+ 'Semi Detached Bung': 'Bungalow',
+ 'Bungalow': 'Bungalow',
+ 'End Terrace Bungalow': 'Bungalow',
+ 'Maisonnette': 'Maisonette',
+ 'Mid Terrace Bungalow': 'Bungalow',
+ 'Bedspace': None,
+ 'Detached House': 'House',
+ 'Bedsit': 'Flat',
+ 'Coach House': 'House',
+ 'Detached Bungalow': 'Bungalow',
+ 'Office Buildings': None,
+ 'Guest Room': None,
+ 'Mid Terrace Housekeeping ': 'House',
+ 'End Terrace Housex': 'House'
+ },
+ "HA28": {
+ 'Flat': 'Flat',
+ 'Semi detached house': 'House',
+ 'Terraced house': 'House',
+ 'Maisonette flat': 'Maisonette',
+ 'Sheltered bedsit': None,
+ 'APD flat': 'Flat',
+ 'Bungalow terraced': 'Bungalow',
+ 'Flat with partition': 'Flat',
+ 'Bungalow semi detached': 'Bungalow',
+ 'APD Bungalow': 'Bungalow',
+ 'Sheltered flat': 'Flat',
+ 'Bedsit Flat': 'Flat',
+ 'Bedsit bungalow semi detached': 'Bungalow',
+ 'Sheltered bungalow terraced': 'Bungalow',
+ 'Sheltered bedsit disabled': None,
+ 'Bedsit bungalow terraced': 'Bungalow',
+ 'Sheltered bungalow semi detached': 'Bungalow',
+ 'Sheltered warden flat': 'Flat',
+ 'Bungalow detached': 'Bungalow',
+ 'Block': None, # Does not match the specified property types
+ 'End Terraced House': 'House',
+ 'Mid Terraced House': 'House',
+ '#N/A': None, # Assuming this is an invalid or missing entry
+ 0: None # Assuming 0 is also an invalid or missing entry
+ },
+ "HA30": {
+ 'House': 'House',
+ 'Flat': 'Flat',
+ 'Bungalow': 'Bungalow',
+ 'House with Attached Garage': 'House',
+ 'Bed Space': None, # Assuming this does not fit the specified property types
+ 'House with Garage': 'House',
+ 'Bungalow with Wheelchair Access': 'Bungalow',
+ 'Maisonette': 'Maisonette',
+ 'Flat with Wheelchair Access': 'Flat',
+ 'Bedsit': None, # Assuming this does not fit the specified property types
+ 'Flat w Wheelchair Access & Car Park': 'Flat',
+ 'House with Wheelchair Access': 'House',
+ 'Bungalow w Wheelchair Access & Car ': 'Bungalow'
+ },
+ "HA32": {
+ 'Bungalow': 'Bungalow',
+ 'Flat': 'Flat',
+ 'Bungalow Disabled': 'Bungalow', # "Disabled" properties categorized with their base type
+ 'House': 'House',
+ 'Dormer Bungalow': 'Bungalow',
+ 'Pop-In': None, # Does not fit the specified property types
+ 'Flat Disabled': 'Flat',
+ 'Laundry': None, # Does not fit the specified property types
+ 'Bedsit': None, # Excluded from the given categories
+ 'Shed': None, # Does not fit the specified property types
+ 'Store Room': None # Does not fit the specified property types
+ },
+ "HA34": {
+ 'Flat': 'Flat',
+ 'House': 'House',
+ 'Bungalow': 'Bungalow',
+ 'Maisonette': 'Maisonette',
+ 'ND': None,
+ },
+ "HA35": {
+ "Flat": "Flat",
+ "Maisonette": "Maisonette",
+ "House": "House",
+ "Bedsit": None,
+ "2 Bedroom Unknown": None,
+ "1 Bedroom Unknown": None,
+ "3 Bedroom Unknown": None,
+ "4 Bedroom Unknown": None,
+ },
+ "HA37": {
+ "FLT": "Flat",
+ "HSE": "House",
+ "BNW": "Bungalow",
+ "MAS": "Maisonette",
+ "HSL": None
+ },
+ "HA39": {
+ "Semi house": {"property_type": "House", "built_form": "Semi-Detached"},
+ "1st floor flat": {"property_type": "Flat", "built_form": None},
+ "Mid terrace house": {"property_type": "House", "built_form": "Mid-Terrace"},
+ "Ground floor flat": {"property_type": "Flat", "built_form": None},
+ "End terrace house": {"property_type": "House", "built_form": "End-Terrace"},
+ "Semi bungalow": {"property_type": "Bungalow", "built_form": "Semi-Detached"},
+ "End terrace bungalow": {"property_type": "Bungalow", "built_form": "End-Terrace"},
+ "2nd floor flat": {"property_type": "Flat", "built_form": None},
+ "Mid terrace bungalow": {"property_type": "Bungalow", "built_form": "Mid-Terrace"},
+ "3rd floor flat": {"property_type": "Flat", "built_form": None},
+ "Detached bungalow": {"property_type": "Bungalow", "built_form": "Detached"},
+ "Maisonette": {"property_type": "Maisonette", "built_form": None},
+ "Detached house": {"property_type": "House", "built_form": "Detached"},
+ "Lower ground floor flat": {"property_type": "Flat", "built_form": None},
+ "Dormer bungalow": {"property_type": "Bungalow", "built_form": None},
+ "Basement flat": {"property_type": "Flat", "built_form": None},
+ "Cluster House": {"property_type": "House", "built_form": "Detached"},
+ "2nd/3rd floor duplex flat": {"property_type": "Flat", "built_form": None},
+ "Ground floor flat with study": {"property_type": "Flat", "built_form": None},
+ "4th floor flat": {"property_type": "Flat", "built_form": None},
+ "1st floor flat with study room": {"property_type": "Flat", "built_form": None},
+ "2nd floor flat with study": {"property_type": "Flat", "built_form": None},
+ },
+ "HA41": {
+ 'Garage': None,
+ 'House 1919-1945': 'House',
+ 'House 1946-1964': 'House',
+ 'Flats & Maisonettes post 1974': 'Flat',
+ 'Non traditional houses': 'House',
+ 'Sheltered': None,
+ 'Flats & Maisonettes 1965-1974': 'Flat',
+ 'House post 1974': 'House',
+ 'Block': None,
+ 'Flats & Maisonettes 1946-1964': 'Flat',
+ 'House 1965-1974': 'House',
+ 'Non traditional flats': 'Flat',
+ 'Bungalow 1965-1974': 'Bungalow',
+ 'PIMSS EMPTY': None,
+ 'Bungalow post 1974': 'Bungalow',
+ 'Bungalow 1946-1964': 'Bungalow',
+ 'Flats & Maisonettes 1919-1945': 'Flat',
+ 'House pre 1919': 'House',
+ 'Flats & Maisonettes pre 1919': 'Flat',
+ 'Bungalow 1919-1945': 'Bungalow',
+ 'Office': None
+ },
+ "HA42": {
+ 'Flat': 'Flat',
+ 'House': 'House',
+ 'Flat Basement': 'Flat',
+ 'Room': None,
+ 'Bedsit Flat': 'Flat',
+ 'Maisonette': 'Maisonette',
+ 'Scheme Office': None,
+ 'Scheme Lounge': None,
+ 'Bungalow': 'Bungalow',
+ 'Garage': None,
+ 'Scheme Sleep Room': None,
+ 'Cluster': None,
+ 'Scheme Room': None
+ },
+ "HA45": {
+ 'Large block of flats': 'Flat',
+ 'Small block of flats/dwelling converted in to flats': 'Flat',
+ 'Semi-detached house': 'House',
+ 'Mid-terraced house': 'House',
+ 'End-terraced house': 'House',
+ 'Block of flats': 'Flat',
+ 'Detached house': 'House',
+ 'Flat in mixed use building': 'Flat',
+ },
+ "HA48": {
+ "House": "House",
+ "Flat": "Flat",
+ "Bungalow": "Bungalow",
+ "Maisonette": "Maisonette",
+ "Unit": None
+ },
+ "HA50": {
+ 'House': 'House',
+ 'Bungalow': 'Bungalow',
+ 'Flat': 'Flat',
+ 'House SD': 'House',
+ 'House MT': 'House',
+ 'House ET': 'House',
+ 'Bungalow ET': 'Bungalow',
+ 'House SD ': 'House',
+ 'House. SD': 'House',
+ 'Bungalow SD': 'Bungalow',
+ 'Bungalow MT': 'Bungalow',
+ 'Bungalow D': 'Bungalow',
+ 'House D': 'House',
+ 'House. MT': 'House',
+ 'House ': 'House',
+ 'House ET ': 'House',
+ ' ': None,
+ 'Flat?': 'Flat',
+ 'Bungalow ': 'Bungalow'
+ },
+ "HA51": {
+ 'FLAT': 'Flat',
+ 'HOUSE': 'House',
+ 'MAISONETTE': 'Maisonette',
+ 'BEDSIT': None, # Considering as a non-specific residential category here
+ 'BUNGALOW': 'Bungalow',
+ },
+ "HA52": {
+ 'House - Mid Terrace': 'House',
+ 'Flat - First Floor': 'Flat',
+ 'Flat - Ground Floor': 'Flat',
+ 'House - Semi-Detached': 'House',
+ 'House - End Terrace': 'House',
+ 'Flat - Second Floor': 'Flat',
+ 'Bedsit': None, # Considering as a non-specific residential category here
+ 'Bungalow - Semi-Detached': 'Bungalow',
+ 'Bungalow - Mid Terrace': 'Bungalow',
+ 'Bungalow - End Terrace': 'Bungalow',
+ 'House - Detached': 'House',
+ 'Flat - Third Floor': 'Flat',
+ 'House attached to flats': 'House',
+ 'Flat - Fourth Floor': 'Flat',
+ 'Bungalow - Detached': 'Bungalow'
+ },
+ "HA56": {
+ 'House Non Specific': 'House',
+ 'HOUSE TERRACED': 'House',
+ 'HOUSE - SEMI DETACHD': 'House',
+ 'Bungalow': 'Bungalow',
+ 'House - End Terraced': 'House',
+ 'Block': None,
+ 'Block with Communal': None,
+ 'Bungalow - Terraced': 'Bungalow',
+ 'Bungalow - Semi Dtch': 'Bungalow',
+ 'Block House with rooms': None,
+ 'Bungalow - End Terr': 'Bungalow',
+ 'House - Mid Terraced': 'House',
+ 'Bungalow - Detached': 'Bungalow',
+ 'House - Detached': 'House',
+ 'HOUSE THREE STOREY': 'House',
+ 'Maisonette': 'Maisonette',
+ 'Communal Block': None,
+ 'Scheme': None
+ },
+ "HA63": {
+ 'Flat': 'Flat',
+ 'House - Semi detached': 'House',
+ 'House - Detached': 'House',
+ 'House - End Terrace': 'House',
+ 'House - Mid Terrace': 'House',
+ 'Bungalow - Semi detached': 'Bungalow',
+ 'Bungalow': 'Bungalow',
+ 'Bedsit': None, # Considering as a non-specific residential category here
+ 'Maisonette': 'Maisonette',
+ 'Bungalow - End Terrace': 'Bungalow',
+ 'Bungalow - Detached': 'Bungalow',
+ 'Maisonette - Mid Terrace': 'Maisonette',
+ 'Maisonette - End Terrace': 'Maisonette',
+ 'Studio Flat': 'Flat',
+ 'Maisonette - Detached': 'Maisonette',
+ 'Bungalow - Mid Terrace': 'Bungalow',
+ 'Bedsit - Mid Terrace': None,
+ 'Bedsit - End Terrace': None,
+ 'Amenity Block - Semi detached': None, # Assuming non-residential
+ 'Maisonette - Semi Detached': 'Maisonette',
+ 'Amenity Block - Detached': None, # Assuming non-residential
+ 'Hostel': None, # Typically not considered a standard residential property for this context
+ 'Bungalow - Attached': 'Bungalow',
+ 'Unknown': None, # Not enough information to categorize
+ 'Studio Flat - Mid Terrace': 'Flat',
+ 'Chalet - Wheelchair': None # Specialized type, not categorized here
+ },
+ "HA107": {
+ "property_type": {
+ "HOUSE": "House",
+ "BUNGALOW": "Bungalow",
+ "GRD FLOOR FLAT": "Flat",
+ "FIRST FLOOR FLAT": "Flat",
+ "SHELTERED BUNGALOW": "Bungalow",
+ "MAISONETTE": "Maisonette",
+ "SECOND FLOOR FLAT": "Flat",
+ "SHELTERED FIRST FLR": "Flat",
+ "SHELTERED GROUND FLR": "Flat",
+ "GRD FLOOR BED SIT": "House"
+ },
+ "built_form": {
+ "Semi Detached": "Semi-Detached",
+ "Mid Terrace": "Mid-Terrace",
+ "End Terrace": "End-Terrace",
+ "Detached": "Detached",
+ "Detatched": "Detached",
+ }
+ },
+ "HA117": {
+ "Flat": "Flat",
+ "House": "House",
+ "Bungalow": "Bungalow",
+ "Flat over garage/underpass": "Flat",
+ },
+ "HAXXX": {
+ 'mid terraced house': 'House',
+ 'semi detached house': 'House',
+ '1st fl 4 in a block': 'Flat',
+ 'G/F 4 in a block': 'Flat',
+ 'end terraced house': 'House',
+ '1st floor flat': 'Flat',
+ 'G/F floor flat': 'Flat',
+ 'semi detached bungalow': 'Bungalow',
+ '2nd floor flat': 'Flat',
+ 'mid terrace bungalow': 'Bungalow',
+ 'detached bungalow': 'Bungalow',
+ 'end terrace bungalow': 'Bungalow',
+ 'Staff accommodation': None # Marked as None due to its special nature
+ }
+}
+
class DataLoader:
COLUMN_CONFIG = {
@@ -30,35 +518,256 @@ class DataLoader:
"address": "Address",
"postcode": "Address - Postcode"
},
+ "HA5": {
+ "address": "Address",
+ "postcode": "matching_postcode"
+ },
"HA6": {
"address": "propertyaddress",
"postcode": "address" # The 'address' column actually contains postcode
+ },
+ "HA12": {
+ "address": "Full Address",
+ "postcode": "Postcode"
+ },
+ "HA16": {
+ "address": "Address",
+ "postcode": "Postcode"
+ },
+ "HA24": {
+ "address": "Address",
+ "postcode": "Postcode"
+ },
+ "HA25": {
+ "address": "T1_Address",
+ "postcode": "matching_postcode"
+ },
+ "HA30": {
+ "address": "A_Address",
+ "postcode": "A_Postcode"
+ },
+ "HA31": {
+ "address": "A_Address",
+ "postcode": "matching_postcode"
+ },
+ "HA45": {
+ "address": "Full postal address",
+ "postcode": "Postcode"
+ },
+ "HA48": {
+ "address": "Full Address",
+ "postcode": "Postcode"
+ },
+ "HA49": {
+ "address": "Property Address Full",
+ "postcode": "Property Postcode"
+ },
+ "HA52": {
+ "address": "Postal Address",
+ "postcode": "POSTCODE"
+ },
+ "HA54": {
+ "address": "Postal Address",
+ "postcode": "matching_postcode"
}
}
- def __init__(self, directories, use_cache):
+ UNMATCHED_CIGA = {
+ "HA2": 0,
+ "HA6": 117,
+ "HA9": 0,
+ "HA12": 6,
+ "HA13": 119,
+ "HA14": 3,
+ "HA15": 3,
+ "HA16": 7,
+ "HA24": 12,
+ "HA50": 4,
+ "HA63": 15,
+ "HA107": 51,
+ "HA48": 0,
+ "HA45": 0,
+ "HA52": 5,
+ "HA20": 6
+ }
+
+ UNMATCHED_ECO3 = {
+ "HA25": 154,
+ "HA41": 26,
+ "HA50": 5,
+ "HA56": 320,
+ "HA63": 0,
+ "HA117": 4,
+ "HA51": 24
+ }
+
+ def __init__(self, directories, december_figures_filepath, use_cache, rebuild):
self.directories = directories
self.use_cache = use_cache
+ self.december_figures_filepath = december_figures_filepath
+ self.rebuild = rebuild
self.data = {}
+ self.december_figures = None
+ self.facts_and_figures = None
def create_asset_list_matching_address(self, ha_name, asset_list):
- if ha_name in ["HA1", "HA6"]:
+ if ha_name in [
+ "HA1", "HA5", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA45", "HA48", "HA49", "HA52", "HA54"
+ ]:
asset_list["matching_address"] = asset_list[
self.COLUMN_CONFIG[ha_name]["address"]
- ].str.lower().str.strip()
+ ].astype(str).str.lower().str.strip()
asset_list["matching_postcode"] = asset_list[
self.COLUMN_CONFIG[ha_name]["postcode"]
- ].str.lower().str.strip()
+ ].astype(str).str.lower().str.strip()
+ elif ha_name == "HA2":
+ # Create matching_address by concatenating Address 1, Address 2, Address 3, Address 4, Postcode
+ asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Postcode"].astype(str).str.lower().str.strip()
+ asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+ elif ha_name == "HA7":
+ # Create matching_address by concatenating Address 1, Address 2, Address 3, Address 4, Postcode
+ asset_list["matching_address"] = asset_list["Address"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Address2"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Address3"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Postcode"].astype(str).str.lower().str.strip()
+ asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+ elif ha_name == "HA8":
+ asset_list["matching_address"] = asset_list["AddressLine1"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["AddressLine2"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Postcode"].astype(str).str.lower().str.strip()
+ asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+ elif ha_name == "HA9":
+ asset_list["matching_address"] = asset_list["House Number"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Address Line 3"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Address Line 4"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Postcode"].astype(str).str.lower().str.strip()
+ asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+ elif ha_name == "HA11":
+ asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Address 2"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Address 3"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Post Code"].astype(str).str.lower().str.strip()
+ asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip()
+ elif ha_name == "HA13":
+ asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["address 2"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Address 3"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Postcode"].astype(str).str.lower().str.strip()
+ asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
elif ha_name == "HA14":
# Create matching_address by concatenating Address 1, Address 2, Address 3, Address 4, Postcode
- asset_list["matching_address"] = asset_list["Address 1"].str.lower().str.strip() + ", " + \
- asset_list["Address 2"].str.lower().str.strip() + ", " + \
- asset_list["Address 3"].str.lower().str.strip() + ", " + \
- asset_list["Address 4"].str.lower().str.strip() + ", " + \
- asset_list["Postcode"].str.lower().str.strip()
- asset_list["matching_postcode"] = asset_list["Postcode"].str.lower().str.strip()
+ asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Address 2"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Address 3"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Address 4"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Postcode"].astype(str).str.lower().str.strip()
+ asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+ elif ha_name == "HA15":
+ asset_list["matching_address"] = (
+ asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " +
+ asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " +
+ asset_list["Address Line 3"].astype(str).str.lower().str.strip() + ", " +
+ asset_list["Address Line 4"].astype(str).str.lower().str.strip() + ", " +
+ asset_list["Postcode"].astype(str).str.lower().str.strip()
+ )
+ asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+ elif ha_name == "HA18":
+ asset_list["matching_address"] = (
+ asset_list["Address"].astype(str).str.lower().str.strip() + ", " +
+ asset_list["Post Code"].astype(str).str.lower().str.strip()
+ )
+ asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip()
+ elif ha_name == "HA19":
+ asset_list["matching_address"] = (
+ asset_list["Address1"].astype(str).str.lower().str.strip() + ", " +
+ asset_list["Address2"].astype(str).str.lower().str.strip() + ", " +
+ asset_list["Address3"].astype(str).str.lower().str.strip() + ", " +
+ asset_list["Postcode"].astype(str).str.lower().str.strip()
+ )
+ asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+ elif ha_name == "HA20":
+ asset_list["matching_address"] = (
+ asset_list["House Name"].astype(str).str.lower().str.strip() + ", " +
+ asset_list["Block"].astype(str).str.lower().str.strip() + ", " +
+ asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " +
+ asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " +
+ asset_list["Address Line 3"].astype(str).str.lower().str.strip() + ", " +
+ asset_list["Address Line 4"].astype(str).str.lower().str.strip() + ", " +
+ asset_list["Postcode"].astype(str).str.lower().str.strip()
+ )
+ asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+ elif ha_name == "HA21":
+ asset_list["matching_address"] = (
+ asset_list["Address"].astype(str).str.lower().str.strip() + ", " +
+ asset_list["PostCode"].astype(str).str.lower().str.strip()
+ )
+ asset_list["matching_postcode"] = asset_list["PostCode"].astype(str).str.lower().str.strip()
+ elif ha_name == "HA25":
+ asset_list["matching_address"] = asset_list[
+ self.COLUMN_CONFIG[ha_name]["address"]
+ ].astype(str).str.lower().str.strip()
+
+ asset_list["matching_postcode"] = asset_list['matching_address'].apply(
+ lambda x: ' '.join(x.split()[-2:]) if pd.notnull(x) else x
+ )
+ elif ha_name == "HA27":
+ asset_list["matching_address"] = (
+ asset_list[" Address"].astype(str).str.lower().str.strip() + ", " +
+ asset_list[" Postcode"].astype(str).str.lower().str.strip()
+ )
+ asset_list["matching_postcode"] = asset_list[" Postcode"].astype(str).str.lower().str.strip()
+ elif ha_name == "HA28":
+ asset_list["matching_address"] = (
+ asset_list["House Number"].astype(str).str.lower().str.strip() + ", " +
+ asset_list["Street 1"].astype(str).str.lower().str.strip() + ", " +
+ asset_list["Postcode"].astype(str).str.lower().str.strip()
+ )
+ asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+ elif ha_name == "HA32":
+ asset_list["matching_address"] = (
+ asset_list["Dwelling num"].astype(str).str.lower().str.strip() + ", " +
+ asset_list["Street"].astype(str).str.lower().str.strip() + ", " +
+ asset_list["Postcode"].astype(str).str.lower().str.strip()
+ )
+ asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+ elif ha_name == "HA33":
+ asset_list["matching_address"] = (
+ asset_list["ADDRESS"].astype(str).str.lower().str.strip() + ", " +
+ asset_list["POST CODE"].astype(str).str.lower().str.strip()
+ )
+ asset_list["matching_postcode"] = asset_list["POST CODE"].astype(str).str.lower().str.strip()
+ elif ha_name == "HA34":
+ asset_list["matching_address"] = (
+ asset_list[" Address"].astype(str).str.lower().str.strip() + ", " +
+ asset_list[" Postcode"].astype(str).str.lower().str.strip()
+ )
+ asset_list["matching_postcode"] = asset_list[" Postcode"].astype(str).str.lower().str.strip()
+ elif ha_name == "HA35":
+ asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Address Line 3"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Address Line 4"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Address Post Code"].astype(str).str.lower().str.strip()
+ asset_list["matching_postcode"] = asset_list["Address Post Code"].astype(str).str.lower().str.strip()
+ elif ha_name == "HA37":
+ asset_list["matching_address"] = asset_list["ADDRESS LINE 1"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["ADDRESS LINE 2"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["ADDRESS LINE 3"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["POSTCODE"].astype(str).str.lower().str.strip()
+ asset_list["matching_postcode"] = asset_list["POSTCODE"].astype(str).str.lower().str.strip()
+ elif ha_name == "HA38":
+ asset_list["matching_address"] = asset_list["House_Number"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Address_Line_1"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Address_Line_2"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Address_Line_3"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Postcode"].astype(str).str.lower().str.strip()
+ asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
elif ha_name == "HA39":
# Create matching_address by concatenating add_1, add_2, add_3, add_4, add_5, post_code
asset_list["matching_address"] = asset_list["add_1"].astype(str).str.lower().str.strip() + ", " + \
@@ -67,26 +776,146 @@ class DataLoader:
asset_list["add_4"].astype(str).str.lower().str.strip() + ", " + \
asset_list["add_5"].astype(str).str.lower().str.strip() + ", " + \
asset_list["post_code"].astype(str).str.lower().str.strip()
- asset_list["matching_postcode"] = asset_list["post_code"].str.lower().str.strip()
+ asset_list["matching_postcode"] = asset_list["post_code"].astype(str).str.lower().str.strip()
+ elif ha_name == "HA41":
+ asset_list["matching_address"] = asset_list["AddressLine1"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["AddressLine2"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["AddressLine3"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["AddressLine4"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["AddressLine5"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Postcode"].astype(str).str.lower().str.strip()
+ asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+ elif ha_name == "HA42":
+ asset_list["matching_address"] = asset_list["Dwelling Number"].astype(str).str.lower().str.strip() + " " + \
+ asset_list["Street"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Locality"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Town"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Postcode"].astype(str).str.lower().str.strip()
+ asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+ elif ha_name == "HA44":
+ asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Address 2"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Postal Code"].astype(str).str.lower().str.strip()
+ asset_list["matching_postcode"] = asset_list["Postal Code"].astype(str).str.lower().str.strip()
+ elif ha_name == "HA50":
+ asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Post Code"].astype(str).str.lower().str.strip()
+ asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip()
+ elif ha_name == "HA51":
+ asset_list["matching_address"] = asset_list["Address Line 1"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Address Line 2"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Address Line 3"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Postcode"].astype(str).str.lower().str.strip()
+ asset_list["matching_address"] = np.where(
+ asset_list["Block"].str.strip().str.len() > 0,
+ asset_list["Block"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["matching_address"],
+ asset_list["matching_address"]
+ )
+ asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+ elif ha_name == "HA56":
+ asset_list["matching_address"] = asset_list["Address 1"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Address 2"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Address 3"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Post Code"].astype(str).str.lower().str.strip()
+ asset_list["matching_postcode"] = asset_list["Post Code"].astype(str).str.lower().str.strip()
+ elif ha_name == "HA63":
+ asset_list["matching_address"] = asset_list["Address1"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["POSTCODE"].astype(str).str.lower().str.strip()
+ asset_list["matching_postcode"] = asset_list["POSTCODE"].astype(str).str.lower().str.strip()
+ elif ha_name == "HA70":
+ asset_list["matching_address"] = asset_list["Address1"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["POSTCODE"].astype(str).str.lower().str.strip()
+ asset_list["matching_postcode"] = asset_list["POSTCODE"].astype(str).str.lower().str.strip()
elif ha_name == "HA107":
# Create matching_address by concatenating House No, Street, Town, District, Postcode
asset_list["matching_address"] = asset_list["House No"].astype(str).str.lower().str.strip() + ", " + \
- asset_list["Street"].str.lower().str.strip() + ", " + \
- asset_list["Town"].str.lower().str.strip() + ", " + \
- asset_list["District"].str.lower().str.strip() + ", " + \
- asset_list["Postcode"].str.lower().str.strip()
- asset_list["matching_postcode"] = asset_list["Postcode"].str.lower().str.strip()
+ asset_list["Street"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Town"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["District"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Postcode"].astype(str).str.lower().str.strip()
+ asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
+ elif ha_name == "HA117":
+ asset_list["matching_address"] = asset_list["Address1"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["Address2"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["PostCode"].astype(str).str.lower().str.strip()
+ asset_list["matching_postcode"] = asset_list["PostCode"].astype(str).str.lower().str.strip()
+ elif ha_name == "HAXX":
+ asset_list["matching_address"] = asset_list["Address"].astype(str).str.lower().str.strip() + ", " + \
+ asset_list["PostCode"].astype(str).str.lower().str.strip()
+ asset_list["matching_postcode"] = asset_list["PostCode"].astype(str).str.lower().str.strip()
+ elif ha_name == "HAXXX":
+ asset_list["matching_address"] = (
+ asset_list["Combined Address"].astype(str).str.lower().str.strip() + ", " +
+ asset_list["Postcode"].astype(str).str.lower().str.strip()
+ )
+ asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
else:
raise NotImplementedError("implement me")
return asset_list
+ @staticmethod
+ def extract_property_info_ha107(properties):
+ property_types = {
+ "House": "House",
+ "Flat": "Flat",
+ "Bungalow": "Bungalow",
+ "Maisonette": "Maisonette",
+ "Bedsit": None
+ }
+
+ built_forms = {
+ "Detached": "Detached",
+ "Semi Detached": "Semi-Detached",
+ "End Terrace": "End-Terrace",
+ "Mid Terrace": "Mid-Terrace"
+ }
+
+ # Function to extract property type and built form from a description
+ def extract_from_description(description):
+ property_type = None
+ built_form = None
+
+ for key in property_types:
+ if key in description:
+ property_type = property_types[key]
+ break
+
+ for key in built_forms:
+ if key in description:
+ built_form = built_forms[key]
+ break
+
+ return property_type, built_form
+
+ # Process each property in the list
+ results = []
+ for property_description in properties:
+ property_type, built_form = extract_from_description(property_description)
+ results.append(
+ {
+ "Property type": property_description,
+ "property_type": property_type,
+ "built_form": built_form
+ }
+ )
+ results = pd.DataFrame(results)
+
+ return results
+
def append_asset_list_built_form(self, ha_name, asset_list):
# Finally, we process property_type or built form, where needed
if ha_name == "HA6":
asset_list["built_form"] = asset_list["Property Type"].apply(self.identify_built_form_ha6)
+ if ha_name == "HA107":
+ mapped_df = self.extract_property_info_ha107(asset_list["Property type"].unique())
+ asset_list = asset_list.merge(
+ mapped_df, how="left", on="Property type"
+ )
+
return asset_list
@staticmethod
@@ -96,52 +925,237 @@ class DataLoader:
:return:
"""
- if ha_name in ["HA107"]:
+ if ha_name == "HA107":
asset_list["HouseNo"] = asset_list["House No"].copy()
+ elif ha_name == "HA32":
+ asset_list["HouseNo"] = asset_list["Dwelling num"].copy()
+ elif ha_name == "HA28":
+ asset_list["HouseNo"] = asset_list["House Number"].copy()
+ elif ha_name == "HA38":
+ asset_list["HouseNo"] = asset_list["House_Number"].copy()
+ elif ha_name == "HA9":
+ asset_list["HouseNo"] = asset_list["House Number"].copy()
+ elif ha_name == "HAXXX":
+ asset_list["HouseNo"] = asset_list["Door Number"].copy()
else:
split_addresses = asset_list['matching_address'].str.split(',', expand=True)
house_numbers = split_addresses[0].str.split(' ', expand=True)
+ # If we have "flat" or valley" as the house number, then the house number is actually in the second column
+ house_numbers[0] = np.where(house_numbers[0].isin(["flat", "valley"]), house_numbers[1], house_numbers[0])
+
# THe first column should be HouseNo - we aren't interested in the other columns, but we don't know how
# many columns there might be
house_numbers = house_numbers.iloc[:, 0:1]
house_numbers.columns = ['HouseNo']
+ # Remove trailing punctuation such as , or ;
+ house_numbers["HouseNo"] = house_numbers["HouseNo"].str.rstrip(',;')
+
asset_list = pd.concat([asset_list, house_numbers[["HouseNo"]]], axis=1)
return asset_list
@staticmethod
- def create_ciga_list_house_no(ha_name, ciga_list):
+ def create_ciga_list_house_no(ciga_list):
"""
This function will append the House number onto the asset list
:return:
"""
- if ha_name in ["HA6"]:
- split_addresses = ciga_list['Matched Address'].str.split(',', expand=True)
- house_numbers = split_addresses[0].str.split(' ', expand=True)
- # THe first column should be HouseNo - we aren't interested in the other columns, but we don't know how
- # many columns there might be
- house_numbers = house_numbers.iloc[:, 0:1]
- house_numbers.columns = ['HouseNo']
+ split_addresses = ciga_list['Matched Address'].str.split(',', expand=True)
+ house_numbers = split_addresses[0].str.split(' ', expand=True)
+ # THe first column should be HouseNo - we aren't interested in the other columns, but we don't know how
+ # many columns there might be
+ house_numbers = house_numbers.iloc[:, 0:1]
+ house_numbers.columns = ['HouseNo']
- ciga_list = pd.concat([ciga_list, house_numbers[["HouseNo"]]], axis=1)
- else:
- raise NotImplementedError("Implement me")
+ ciga_list = pd.concat([ciga_list, house_numbers[["HouseNo"]]], axis=1)
return ciga_list
+ @staticmethod
+ def dedupe_ciga_list(ciga_list):
+ ciga_list["unique_key"] = ciga_list["Matched Address"] + ciga_list["Matched Postcode"]
+ # Remove spaces from the unique key
+ ciga_list["unique_key"] = ciga_list["unique_key"].str.replace(" ", "")
+ # Remove punctuation from the unique key
+ ciga_list["unique_key"] = ciga_list["unique_key"].str.replace(r'[^\w\s]', '')
+ # Drop duplicated keys
+ ciga_list = ciga_list[~ciga_list["unique_key"].duplicated()]
+ return ciga_list
+
+ @staticmethod
+ def get_asset_sheetname(workbook):
+ if "Asset List" in workbook.sheetnames:
+ return "Asset List"
+ elif "Asset list" in workbook.sheetnames:
+ return "Asset list"
+ elif "Asset" in workbook.sheetnames and "Assets" not in workbook.sheetnames:
+ return "Asset"
+ elif "Decent Homes Stock" in workbook.sheetnames:
+ return "Decent Homes Stock"
+ elif "Report" in workbook.sheetnames:
+ return "Report"
+ else:
+ return "Assets"
+
+ @staticmethod
+ def get_ciga_sheetname(workbook):
+
+ if "CIGA Checks" in workbook.sheetnames:
+ return "CIGA Checks"
+ elif "CIGA checks" in workbook.sheetnames:
+ return "CIGA checks"
+ elif "CIGA check" in workbook.sheetnames:
+ return "CIGA check"
+ elif "CIGA Check" in workbook.sheetnames:
+ return "CIGA Check"
+ elif "CIGA requested" in workbook.sheetnames:
+ return "CIGA requested"
+ else:
+ return "CIGA"
+
+ @staticmethod
+ def get_survey_sheetname(workbook):
+ if "ECO Surveys" in workbook.sheetnames:
+ return "ECO Surveys"
+ elif "ECO Survey" in workbook.sheetnames:
+ return "ECO Survey"
+ elif "ECO 4 Surveys completed" in workbook.sheetnames:
+ return "ECO 4 Surveys completed"
+ elif "ECO4 Surveys" in workbook.sheetnames:
+ return "ECO4 Surveys"
+ else:
+ return "ECO surveys"
+
+ @staticmethod
+ def correct_ha51_asset_list(asset_list):
+ # Correct this
+ asset_list["HouseNo"] = np.where(
+ asset_list["matching_address"].str.contains("61 wandle bank"),
+ asset_list["Block"].str.lower(),
+ asset_list["HouseNo"]
+ )
+
+ return asset_list
+
+ def prepare_ha17(self, workbook):
+ blocks_sheet = workbook["Blocks List - Cavity Wall only"]
+ blocks_data = []
+ blocks_colnames = [cell.value for cell in blocks_sheet[2]]
+ for row in blocks_sheet.iter_rows(min_row=4, values_only=False):
+ row_data = [cell.value for cell in row] # This will get you the cell values
+ blocks_data.append(row_data)
+
+ blocks_df = pd.DataFrame(blocks_data, columns=blocks_colnames)
+
+ blocks_df["matching_address"] = (
+ blocks_df["Block Name\n[as per Naming Convention procedure]"].astype(str).str.lower().str.strip() + ", " +
+ blocks_df["Block Street Name"].astype(str).str.lower().str.strip() + ", " +
+ blocks_df["Postcode"].astype(str).str.lower().str.strip()
+ )
+ blocks_df["matching_postcode"] = blocks_df["Postcode"].astype(str).str.lower().str.strip()
+ blocks_df["property_type"] = "Flat"
+
+ street_properties_sheet = workbook["Street Properties - Cavity Wall"]
+ street_properties_data = []
+ street_properties_colnames = [cell.value for cell in street_properties_sheet[2]]
+ for row in street_properties_sheet.iter_rows(min_row=3, values_only=False):
+ row_data = [cell.value for cell in row] # This will get you the cell values
+ street_properties_data.append(row_data)
+
+ street_properties_df = pd.DataFrame(street_properties_data, columns=street_properties_colnames)
+
+ street_properties_df["matching_address"] = (
+ street_properties_df["Block Name\n[as per Naming Convention procedure]"].astype(
+ str).str.lower().str.strip() + ", " +
+ street_properties_df["Postcode"].astype(str).str.lower().str.strip()
+ )
+ street_properties_df["matching_postcode"] = street_properties_df["Postcode"].astype(str).str.lower().str.strip()
+ street_properties_df["property_type"] = street_properties_df[
+ "Block typology based on dwelling type\n[defined list]"
+ ]
+
+ asset_list_compressed = pd.concat(
+ [
+ blocks_df[["matching_address", "matching_postcode", "property_type", "ECO Eligibility"]],
+ street_properties_df[["matching_address", "matching_postcode", "property_type", "ECO Eligibility"]]
+ ],
+ axis=0
+ )
+ # We expand
+ range_pattern = r"(\d+)\s+to\s+(\d+)\s+(.*)"
+ asset_list = []
+ for _, row in tqdm(asset_list_compressed.iterrows(), total=len(asset_list_compressed)):
+ if row["ECO Eligibility"] == "Not Eligible":
+ asset_list.append(row.to_dict())
+ continue
+
+ # Detect a house number range
+ match = re.search(range_pattern, row["matching_address"])
+
+ if not match:
+ asset_list.append(row.to_dict())
+ continue
+
+ # Extracting the start and end of the range
+ start_number = int(match.group(1))
+ end_number = int(match.group(2))
+ rest_of_address = match.group(3)
+
+ # Generating the list of house numbers
+ house_numbers = list(range(start_number, end_number + 1))
+ data_to_extend = []
+ for house_number in house_numbers:
+ new_adress = f"{house_number} {rest_of_address}"
+
+ entry = row.to_dict().copy()
+ entry.update({"matching_address": new_adress})
+
+ data_to_extend.append(entry)
+
+ asset_list.extend(data_to_extend)
+
+ asset_list = pd.DataFrame(asset_list)
+
+ # Add in asset_list_row_id
+ asset_list["asset_list_row_id"] = ["HA17" + str(i) for i in range(0, len(asset_list))]
+
+ # Add on house number
+ asset_list = self.create_asset_list_house_no(ha_name="HA17", asset_list=asset_list)
+
+ return asset_list
+
def load_asset_list(self, filepath, ha_name):
workbook = openpyxl.load_workbook(filepath)
- asset_sheet = workbook["Assets"]
+ if ha_name == "HA17":
+ asset_list = self.prepare_ha17(workbook)
+ return asset_list, pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
+ else:
+ asset_sheetname = self.get_asset_sheetname(workbook)
+
+ asset_sheet = workbook[asset_sheetname]
asset_sheet_colnames = [cell.value for cell in asset_sheet[1]]
+ if ha_name == "HA25":
+ asset_sheet_colnames[11] = "matching_postcode"
+
+ if ha_name == "HA31":
+ asset_sheet_colnames[2] = "matching_postcode"
+
+ if ha_name == "HA54":
+ asset_sheet_colnames[10] = "matching_postcode"
+
+ if ha_name == "HA5":
+ asset_sheet_colnames[2] = "matching_postcode"
rows_data = []
+
for row in asset_sheet.iter_rows(min_row=2, values_only=False):
row_data = [cell.value for cell in row] # This will get you the cell values
rows_data.append(row_data)
asset_list = pd.DataFrame(rows_data, columns=asset_sheet_colnames)
+
asset_list = asset_list.loc[:, asset_list.columns.notnull()]
# Remove entirely empty rows - consider all rows apart from row_color
@@ -157,39 +1171,89 @@ class DataLoader:
asset_list = self.append_asset_list_built_form(ha_name=ha_name, asset_list=asset_list)
- # We check if there is a survey list
- survey_list = pd.DataFrame()
- if "ECO Surveys" in workbook.sheetnames:
- survey_sheet = workbook["ECO Surveys"]
- survey_rows = []
- for row in survey_sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
- row_data = [cell.value for cell in row] # This will get you the cell values
- survey_rows.append(row_data)
+ # We correct the asset list if it needs it
+ # Correct the asset list
+ correction_function_name = f"correct_{ha_name.lower()}_asset_list"
+ if hasattr(self, correction_function_name):
+ asset_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_asset_list")
+ asset_list = asset_list_correction_function(asset_list)
- survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
+ # For HA1 and HA25, there is an exception in the structure of the data. We don't have any survey or ciga
+ # lists, and so
+ # we can return the asset list now
+ if ha_name in ["HA1", "HA27"]:
+ return asset_list, pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
+
+ # If we have ECO3 surveys, we need to match them, because any properties treated under ECO3 won't be
+ # suitable under ECO4, since their walls will be filled
+ eco3_list = pd.DataFrame()
+ sheetnames_lower = [x.lower() for x in workbook.sheetnames]
+ eco3_sheetname_index = [i for i, x in enumerate(sheetnames_lower) if "eco3" in x.replace(" ", "")]
+ if eco3_sheetname_index:
+ eco3_sheetname = workbook.sheetnames[eco3_sheetname_index[0]]
+ eco3_sheet = workbook[eco3_sheetname]
+ eco3_rows = []
+ for row in eco3_sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
+ row_data = [cell.value for cell in row] # This will get you the cell values
+ eco3_rows.append(row_data)
+
+ eco3_list = pd.DataFrame(eco3_rows, columns=[cell.value for cell in eco3_sheet[1]])
# Remove columns that are None
- survey_list = survey_list.loc[:, survey_list.columns.notnull()]
- survey_list["survey_list_row_id"] = [ha_name + "_survey_" + str(i) for i in range(0, len(survey_list))]
- # Perform survey list merge
+ eco3_list = eco3_list.loc[:, eco3_list.columns.notnull()]
+ # Remove rows that are completely empty
+ eco3_list = eco3_list.loc[eco3_list.loc[:, eco3_list.columns].notnull().any(axis=1)]
+ eco3_list["eco3_list_row_id"] = [ha_name + "_Eco3_" + str(i) for i in range(0, len(eco3_list))]
+
+ # Perform the eco3 merge
+ if not eco3_list.empty:
+ eco3_list = self.merge_eco3_to_assets(asset_list, eco3_list, ha_name)
+
+ if ha_name in ["HA25"]:
+ # Accomodate ha25 unique structure
+ return asset_list, pd.DataFrame(), pd.DataFrame(), eco3_list
+
+ # We check if there is a survey list
+ survey_sheetname = self.get_survey_sheetname(workbook)
+ survey_sheet = workbook[survey_sheetname]
+ survey_rows = []
+ for row in survey_sheet.iter_rows(min_row=2, values_only=False): # Assuming the first row is headers
+ row_data = [cell.value for cell in row] # This will get you the cell values
+ survey_rows.append(row_data)
+
+ survey_list = pd.DataFrame(survey_rows, columns=[cell.value for cell in survey_sheet[1]])
+ # Remove columns that are None
+ survey_list = survey_list.loc[:, survey_list.columns.notnull()]
+ # Remove rows that are completely empty
+ survey_list = survey_list.loc[survey_list.loc[:, survey_list.columns].notnull().any(axis=1)]
+ survey_list["survey_list_row_id"] = [ha_name + "_survey_" + str(i) for i in range(0, len(survey_list))]
+
+ # Perform survey list merge
+ if not survey_list.empty:
survey_list = self.merge_surveys_to_assets(asset_list, survey_list, ha_name)
# We check if there are CIGA checks
- ciga_list = pd.DataFrame()
- if "CIGA Checks" in workbook.sheetnames:
- ciga_sheet = workbook["CIGA Checks"]
- ciga_rows = []
- for row in ciga_sheet.iter_rows(min_row=2, values_only=False):
- row_data = [cell.value for cell in row] # This will get you the cell values
- ciga_rows.append(row_data)
+ ciga_sheetname = self.get_ciga_sheetname(workbook)
+ ciga_sheet = workbook[ciga_sheetname]
+ ciga_rows = []
+ for row in ciga_sheet.iter_rows(min_row=2, values_only=False):
+ row_data = [cell.value for cell in row] # This will get you the cell values
+ ciga_rows.append(row_data)
- ciga_list = pd.DataFrame(ciga_rows, columns=[cell.value for cell in ciga_sheet[1]])
- # Remove columns that are None
- ciga_list = ciga_list.loc[:, ciga_list.columns.notnull()]
- ciga_list = self.create_ciga_list_house_no(ha_name, ciga_list)
- # Perform ciga list merge
+ ciga_list = pd.DataFrame(ciga_rows, columns=[cell.value for cell in ciga_sheet[1]])
+ # Remove columns that are None
+ ciga_list = ciga_list.loc[:, ciga_list.columns.notnull()]
+ # Remove rows that are completely None
+ ciga_list = ciga_list.loc[ciga_list.loc[:, ciga_list.columns].notnull().any(axis=1)]
+ # Perform ciga list merge
+ if not ciga_list.empty:
+ # Remove rows with missing postcode which happens in a small number of cases
+ ciga_list = ciga_list[~pd.isnull(ciga_list["Matched Postcode"])]
+ ciga_list["ciga_list_row_id"] = [ha_name + "_ciga_" + str(i) for i in range(0, len(ciga_list))]
+ ciga_list = self.create_ciga_list_house_no(ciga_list)
+ ciga_list = self.dedupe_ciga_list(ciga_list)
ciga_list = self.merge_ciga_to_assets(asset_list, ciga_list, ha_name)
- return asset_list, survey_list, ciga_list
+ return asset_list, survey_list, ciga_list, eco3_list
@staticmethod
def correct_ha6_asset_list(asset_list):
@@ -208,6 +1272,241 @@ class DataLoader:
return asset_list
+ @staticmethod
+ def correct_ha56_asset_list(asset_list):
+ # CH1 4JR has already been surveyed, but it's listed in the asset list
+ # as a single row, when it's actually 32 units, so we just set this
+ # as ineligible
+ asset_list["ECO Eligibility"] = np.where(
+ asset_list["Post Code"] == "CH1 4JR",
+ "Not eligible",
+ asset_list["ECO Eligibility"]
+ )
+
+ # Same for CW8 3EU
+ asset_list["ECO Eligibility"] = np.where(
+ asset_list["Post Code"] == "CW8 3EU",
+ "Not eligible",
+ asset_list["ECO Eligibility"]
+ )
+
+ asset_list["ECO Eligibility"] = np.where(
+ asset_list["Post Code"] == "CW1 3HP",
+ "Not eligible",
+ asset_list["ECO Eligibility"]
+ )
+
+ asset_list["ECO Eligibility"] = np.where(
+ asset_list["Post Code"] == "WA4 2PH",
+ "Not eligible",
+ asset_list["ECO Eligibility"]
+ )
+
+ asset_list["ECO Eligibility"] = np.where(
+ asset_list["Post Code"] == "BD6 1QJ",
+ "Not eligible",
+ asset_list["ECO Eligibility"]
+ )
+
+ asset_list["ECO Eligibility"] = np.where(
+ asset_list["Post Code"] == "L39 1RS",
+ "Not eligible",
+ asset_list["ECO Eligibility"]
+ )
+
+ asset_list["ECO Eligibility"] = np.where(
+ asset_list["Post Code"] == "WA10 2DE",
+ "Not eligible",
+ asset_list["ECO Eligibility"]
+ )
+
+ # Already surveyed under ECO4
+ asset_list["ECO Eligibility"] = np.where(
+ asset_list["Post Code"] == "SK17 6NR",
+ "Not eligible",
+ asset_list["ECO Eligibility"]
+ )
+
+ asset_list["ECO Eligibility"] = np.where(
+ ((asset_list["Post Code"] == "WA5 0EN") &
+ (asset_list["Address 1"] == "Block 17-26 Tavlin Avenue")),
+ "Not eligible",
+ asset_list["ECO Eligibility"]
+ )
+
+ return asset_list
+
+ @staticmethod
+ def correct_ha14_asset_list(asset_list):
+
+ # For 5 Queens Court, DE72 3NP, the postcode is actually DE72 3QZ
+ asset_list.loc[
+ (asset_list["Address 1"] == "5 Queens Court") &
+ (asset_list["Postcode"].str.strip() == "DE72 3NP"),
+ "matching_postcode"
+ ] = "DE72 3QZ"
+
+ # We then correct the matching_address
+ asset_list.loc[
+ (asset_list["Address 1"] == "5 Queens Court") &
+ (asset_list["Postcode"].str.strip() == "DE72 3NP"),
+ "matching_address"
+ ] = "5 queens court, garfield avenue, draycott, derby, de72 3qz"
+
+ return asset_list
+
+ @staticmethod
+ def correct_ha15_asset_list(asset_list):
+ asset_list["matching_postcode"] = np.where(
+ asset_list["Address Line 1"] == "103 Priory Crescent",
+ "hp19 9ny",
+ asset_list["matching_postcode"]
+ )
+ return asset_list
+
+ @staticmethod
+ def correct_ha32_asset_list(asset_list):
+ asset_list["Postcode"] = np.where(
+ (asset_list["Street"] == "Norton Grove") & (asset_list["Postcode"] == "HU4 6HQ") & (
+ asset_list["Dwelling num"] == "7"),
+ "hu4 6hg",
+ asset_list["Postcode"]
+ )
+ return asset_list
+
+ @staticmethod
+ def correct_ha38_asset_list(asset_list):
+ # For Kingsford court, the house number is at the end of the address
+ def rearrange_address_if_flat(address):
+ if '/flat' in address.lower():
+ parts = address.split('/flat', 1)
+ return f"FLAT{parts[1]}, {parts[0]}"
+ return address
+
+ def extract_house_no_if_flat(address):
+ if '/flat' in address.lower():
+ # Attempt to extract the house number following "/flat"
+ try:
+ house_no = address.split('/flat ')[1].split(' ')[0]
+ # Remove trailing comma
+ house_no = house_no.replace(",", "")
+ except IndexError:
+ house_no = None
+ return house_no
+ return None
+
+ asset_list['ExtractedHouseNo'] = asset_list['matching_address'].apply(extract_house_no_if_flat)
+ asset_list.loc[asset_list['ExtractedHouseNo'].notnull(), 'HouseNo'] = asset_list['ExtractedHouseNo']
+ asset_list['matching_address'] = asset_list['matching_address'].apply(rearrange_address_if_flat)
+
+ # We update a few specific rows
+ asset_list["HouseNo"] = np.where(
+ (asset_list["Address_Line_1"].isin(
+ [
+ "10 SOUTH VIEW/ROOM A1",
+ "10 SOUTH VIEW/ROOM A2",
+ "10 SOUTH VIEW/ROOM A3",
+ ]
+ )),
+ "10A",
+ asset_list["HouseNo"]
+ )
+
+ asset_list["matching_address"] = np.where(
+ (asset_list["Address_Line_1"].isin(
+ [
+ "10 SOUTH VIEW/ROOM A1",
+ ]
+ )),
+ "10a, 10 south view/room a1, spennymoor, co. durham, dl16 7df'",
+ asset_list["matching_address"]
+ )
+
+ asset_list["HouseNo"] = np.where(
+ (asset_list["Address_Line_1"].isin(
+ [
+ "10 SOUTH VIEW/ROOM B1",
+ "10 SOUTH VIEW/ROOM B2",
+ "10 SOUTH VIEW/ROOM B3",
+ "10 SOUTH VIEW/ROOM B4",
+ ]
+ )),
+ "10B",
+ asset_list["HouseNo"]
+ )
+
+ asset_list["matching_address"] = np.where(
+ (asset_list["Address_Line_1"].isin(
+ [
+ "10 SOUTH VIEW/ROOM B1",
+ ]
+ )),
+ "10b, 10 south view/room b1, spennymoor, co. durham, dl16 7df",
+ asset_list["matching_address"]
+ )
+
+ asset_list["HouseNo"] = np.where(
+ (asset_list["Address_Line_1"].isin(
+ [
+ "10 SOUTH VIEW/FLAT C",
+ ]
+ )),
+ "10C",
+ asset_list["HouseNo"]
+ )
+
+ asset_list["matching_address"] = np.where(
+ (asset_list["Address_Line_1"].isin(
+ [
+ "10 SOUTH VIEW/FLAT C",
+ ]
+ )),
+ "FLAT c, spennymoor, co. durham, dl16 7df, 10c, 10 south view",
+ asset_list["matching_address"]
+ )
+
+ asset_list["HouseNo"] = np.where(
+ (asset_list["Address_Line_1"].isin(
+ [
+ "10 SOUTH VIEW/FLAT D",
+ ]
+ )),
+ "10D",
+ asset_list["HouseNo"]
+ )
+
+ asset_list["matching_address"] = np.where(
+ (asset_list["Address_Line_1"].isin(
+ [
+ "10 SOUTH VIEW/FLAT D",
+ ]
+ )),
+ "FLAT d, spennymoor, co. durham, dl16 7df, 10d, 10 south view",
+ asset_list["matching_address"]
+ )
+
+ asset_list["HouseNo"] = np.where(
+ (asset_list["Address_Line_1"].isin(
+ [
+ "10 SOUTH VIEW/FLAT E",
+ ]
+ )),
+ "10E",
+ asset_list["HouseNo"]
+ )
+
+ asset_list["matching_address"] = np.where(
+ (asset_list["Address_Line_1"].isin(
+ [
+ "10 SOUTH VIEW/FLAT E",
+ ]
+ )),
+ 'FLAT e, spennymoor, co. durham, dl16 7df, 10e, 10 south view',
+ asset_list["matching_address"]
+ )
+
+ return asset_list
+
@staticmethod
def correct_ha6_survey_list(survey_list):
@@ -335,24 +1634,1167 @@ class DataLoader:
"Post Code"
] = "ST5 7BY"
+ # PERFORM ADDITIONAL DROPS
+ # Dropping rows based on multiple conditions
+ conditions_to_drop = [
+ (survey_list['Street / Block Name'] == "Bedford Crescent") & (survey_list['Post Code'] == "ST5 3EH") & (
+ survey_list['NO.'] == 23) & (survey_list['INSTALLED OR CANCELLED'].str.contains("NO UPDATE YET")),
+ (survey_list['Street / Block Name'] == "Hereford Avenue") & (survey_list['Post Code'] == "ST5 3EJ") & (
+ survey_list['NO.'] == 92) & (survey_list['INSTALLED OR CANCELLED'].str.contains("NO UPDATE YET")),
+ (survey_list['Street / Block Name'] == "Seabridge Lane") & (survey_list['Post Code'] == "ST5 3EX") & (
+ survey_list['NO.'].isin([16, 18, 42])) & (
+ survey_list['INSTALLED OR CANCELLED'].str.contains("NO UPDATE YET")),
+ (survey_list['Street / Block Name'] == "ESKDALE PLACE") & (survey_list['Post Code'] == "ST5 3QW") & (
+ survey_list['NO.'] == 5) & (survey_list['SUBMISSION DATE'].astype(str) == "2023-03-06 00:00:00"),
+ (survey_list['Street / Block Name'] == "Birch House road") & (survey_list['Post Code'] == "ST6 2LS") & (
+ survey_list['NO.'].isin([56, 58])),
+ (survey_list['Street / Block Name'] == "Blackthorn Place") & (survey_list['Post Code'] == "ST6 2LS") & (
+ survey_list['NO.'].isin([37, 39])),
+ (survey_list['Street / Block Name'] == "Whitethorn Way") & (survey_list['Post Code'] == "ST5 7BT") & (
+ survey_list['NO.'].isin([17, 6])),
+ (survey_list['Street / Block Name'] == "Lion Grove") & (survey_list['Post Code'] == "ST5 7HQ") & (
+ survey_list['NO.'].isin([10, 12])) & (
+ survey_list['INSTALLED OR CANCELLED'].str.contains("NO UPDATE YET")),
+ (survey_list['Street / Block Name'] == "DENRY CRESCENT") & (survey_list['Post Code'] == "ST5 8JW") & (
+ survey_list['NO.'] == 87) & (survey_list['INSTALLED OR CANCELLED'].str.contains("NO UPDATE YET")),
+ (survey_list['Street / Block Name'] == "HOLLINS CRESCENT") & (survey_list['Post Code'] == "ST7 1JW") & (
+ survey_list['NO.'] == 19)
+ ]
+
+ # Combine all conditions with an OR "|"
+ combined_condition = np.logical_or.reduce(conditions_to_drop)
+
+ # Drop rows that meet the combined condition
+ survey_list = survey_list[~combined_condition]
+
+ # Making replacements using np.where
+ survey_list['Post Code'] = np.where(
+ (survey_list['Street / Block Name'] == "Whitethorn Way") & (survey_list['Post Code'] == "ST5 3EH") & (
+ survey_list['NO.'] == 17),
+ "ST5 7BT",
+ survey_list['Post Code']
+ )
+
+ survey_list['Post Code'] = np.where(
+ (survey_list['Street / Block Name'] == "Whitethorn Way") & (survey_list['Post Code'] == "ST5 3ED") & (
+ survey_list['NO.'] == 6),
+ "ST5 7BT",
+ survey_list['Post Code']
+ )
+
+ # Maple avenue (stoke on trent, not newcastle) should be st7 1jw
+ survey_list["Post Code"] = np.where(
+ (survey_list["Street / Block Name"].str.lower().str.contains("maple avenue")) & (
+ survey_list["Post Code"].str.lower() == "st7 1jx"
+ ),
+ "st7 1jw",
+ survey_list["Post Code"]
+ )
+
+ # Hollins Crescent should be st7 1jx
+ survey_list["Post Code"] = np.where(
+ (survey_list["Street / Block Name"].str.lower().str.contains("hollins crescent")) & (
+ survey_list["Post Code"].str.lower() == "st7 1jw"
+ ),
+ "st7 1jx",
+ survey_list["Post Code"]
+ )
+
+ # Additional drops as the above misses some:
+ survey_list = survey_list[
+ ~((survey_list["NO."].astype(str).isin(["18", "42"])) &
+ (survey_list["Street / Block Name"] == "Seabridge Lane") &
+ (survey_list["Post Code"] == "ST5 3EY") &
+ (survey_list["SUBMISSION DATE"].astype(str) == "24.07.2023") &
+ (survey_list["INSTALLED OR CANCELLED"].str.contains("NO UPDATE YET")))
+ ]
+
return survey_list
+ @staticmethod
+ def correct_ha14_survey_list(survey_list):
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Godfrey Road", "Godfrey Drive"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Oiliver Road", "Oliver Road"
+ )
+
+ # For postodes DE7 4FB, DE7 4EZ, it's actually spelled WINDERMERE AVENUE, not WINDEREMERE AVENUE (without the
+ # extra e)
+ survey_list.loc[
+ (survey_list["Street / Block Name"] == "WINDEREMERE AVENUE") &
+ (survey_list["Post Code"].isin(["DE7 4FB", "DE7 4EZ"])),
+ "Street / Block Name"
+ ] = "WINDERMERE AVENUE"
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "MACDONALD SQAURE", "MACDONALD SQUARE"
+ )
+
+ return survey_list
+
+ @staticmethod
+ def correct_ha15_survey_list(survey_list):
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Mary Mac Manus Drive, Milton Keynes", "Mary Mac Manus Drive"
+ )
+
+ return survey_list
+
+ @staticmethod
+ def correct_ha16_survey_list(survey_list):
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("/", ", ")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.lower()
+ survey_list["Street / Block Name"] = np.where(
+ survey_list["Street / Block Name"] == "REEDS RD",
+ "Reeds ROAD",
+ survey_list["Street / Block Name"]
+ )
+ # Replace " rd " with "road"
+ survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(r'\brd\b', 'road',
+ regex=True)
+
+ # Replace " , " with ", "
+ survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(
+ " , ", ', ',
+ )
+ # Fix "{place} ,{place}" with "{place}, {place}"
+ survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.replace(r'\s*,\s*', ', ',
+ regex=True)
+ # Strip whitespace
+ survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.strip()
+
+ # Correct errors
+ survey_list["Post Code"] = np.where(
+ survey_list["Post Code"] == "M38 0SA",
+ "M38 9SA",
+ survey_list["Post Code"]
+ )
+
+ survey_list["Post Code"] = np.where(
+ (survey_list["Street / Block Name"] == "nelson drive") & (survey_list["Post Code"] == "M44 5JE"),
+ "M44 5JF",
+ survey_list["Post Code"]
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("eccels", "eccles")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("chatley, road",
+ "chatley road")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("vaughen", "Vaughan")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("cresent", "crescent")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("plantation road",
+ "plantation avenue")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("how clough drive",
+ "howclough drive")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brockhurst lane",
+ "brookhurst lane")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("biirch road",
+ "birch road")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hadson road",
+ "hodson road")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("harbonne avennue",
+ "narbonne avenue")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "cumberland road, cadishead",
+ "cumberland avenue, cadishead")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("aston field drive",
+ "ashton field drive")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("wedgewood road",
+ "wedgwood road")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hamilton close",
+ "hamilton avenue")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "lichens crescent, fitton hill",
+ "lichens crescent")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("south croft, fitton hill",
+ "south croft")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(", fitton hill", "")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("firtree dr",
+ "fir tree avenue")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hawthorne road",
+ "hawthorn crescent")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("rein lee avenue",
+ "reins lee avenue")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("westerhill road",
+ "wester hill road")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("st martins road",
+ "saint martins road")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("timperley avenue",
+ "timperley close")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("eastwood road",
+ "eastwood avenue")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("new road", "new street")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("grassmere road",
+ "grasmere road")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("hulton road",
+ "hulton avenue")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("beechfield avenue",
+ "beechfield road")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("princess avenue",
+ "princes avenue")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("edge ford crecent",
+ "edge fold crescent")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("conniston avenue",
+ "coniston avenue")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("blackthorne crescent",
+ "blackthorn crescent")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("wellstock road",
+ "wellstock lane")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brackley avenue",
+ "brackley street")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("brook avenue swinton",
+ "brook avenue, swinton")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("green avenue swinton",
+ "green avenue, swinton")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("grasmere avenue wardley",
+ "grasmere avenue, wardley")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("mardale avenue wardle",
+ "mardale avenue, wardle")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("carleach grove",
+ "cartleach Grove")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("arbour grove",
+ "arbor Grove")
+
+ # Replacement for clively avenue 66-68
+ survey_list["NO."] = np.where(
+ survey_list["NO."] == "66-68",
+ "66",
+ survey_list["NO."]
+ )
+
+ # Delete some duplicated entries
+ survey_list = survey_list[
+ ~((survey_list["Street / Block Name"] == "york road") &
+ (survey_list["NO."].astype(str) == "12") &
+ (survey_list["Post Code"] == "M44 5HU") &
+ (survey_list["SUBMISSION DATE"].astype(str) == "45229"))
+ ]
+
+ survey_list = survey_list[
+ ~((survey_list["Street / Block Name"] == "peatfield avenue") &
+ (survey_list["NO."].astype(str) == "23") &
+ (survey_list["Post Code"] == "M27 9XG") &
+ (survey_list["SUBMISSION DATE"].astype(str) == "45236"))
+ ]
+
+ return survey_list
+
+ @staticmethod
+ def correct_ha24_survey_list(survey_list):
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace("/", ", ")
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.lower()
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.strip()
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "council house, nidds lane", "nidds lane"
+ )
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "wirral avenue", "wirrall avenue"
+ )
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "st ives road", "st. ives crescent"
+ )
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "sundringham road", "sandringham road"
+ )
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "milton avenue", "milton road"
+ )
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "st ives crescent", "st. ives crescent"
+ )
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "council house, waterbelly lane", "waterbelly lane"
+ )
+ # Generally remove "councile house, " from the start of the street name
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "council house, ", ""
+ )
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "st. leodegars close", "st leodegars close"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "montgomery crescent", "montgomery road"
+ )
+
+ return survey_list
+
+ @staticmethod
+ def correct_ha28_survey_list(survey_list):
+ # Rename the "No" column to "No." to align with the other survey sheets
+ survey_list = survey_list.rename(columns={"NO ": "NO."})
+
+ survey_list["Post Code"] = np.where(
+ survey_list["Post Code"] == "ME75HA",
+ "ME7 5HA",
+ survey_list["Post Code"]
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "ANDREW MANOR/BRITTON ST", "ANDREW MANOR"
+ )
+
+ survey_list["Post Code"] = np.where(
+ survey_list["Post Code"] == "ME75TW",
+ "ME7 5TW",
+ survey_list["Post Code"]
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "ST MARKS HOUSE/SAXON ST", "ST MARKS HOUSE"
+ )
+
+ return survey_list
+
+ @staticmethod
+ def correct_ha38_survey_list(survey_list):
+ # Rename the "No" column to "No." to align with the other survey sheets
+ survey_list = survey_list.rename(columns={"NO ": "NO."})
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ 'Kingsford Court, Coombe Valley Road', 'Kingsford Court'
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ 'LESLIE TEW COURT/DERWENT ROAD', 'LESLIE TEW COURT'
+ )
+
+ # There is no 18A LESLIE TEW COURT in the asset list
+ survey_list = survey_list[
+ ~((survey_list["Street / Block Name"] == "LESLIE TEW COURT") &
+ (survey_list["Post Code"] == "TN10 3TX") &
+ (survey_list["NO."] == "18A"))
+ ]
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ 'Brindley House, Wellbeck Road', 'Brindley House'
+ )
+
+ # Try taking just the first part of the string, splitting on a /
+ survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.split('/').str[0].str.strip()
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ 'HUNTSMAN WAY', 'HUNTSMANS WAY'
+ )
+
+ # Try taking just the first part of the string, splitting on a ,
+ survey_list['Street / Block Name'] = survey_list['Street / Block Name'].str.split(',').str[0].str.strip()
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "McCLAREN COURT", "MCLAREN COURT"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "ST JAMES CLOISTERS", "ST. JAMES'S CLOISTERS"
+ )
+
+ survey_list["Street / Block Name"] = np.where(
+ ((survey_list["NO."].isin(
+ [
+ "FLAT 1 22",
+ "FLAT 2 22",
+ "FLAT 3 22",
+ "FLAT 4 22",
+ "FLAT 5 22",
+ "FLAT 6 22",
+ ]
+ )) &
+ (survey_list["Street / Block Name"] == "MELTON ROAD")),
+ "22 MELTON ROAD",
+ survey_list["Street / Block Name"]
+ )
+
+ survey_list["Street / Block Name"] = np.where(
+ ((survey_list["NO."].isin(
+ [
+ "FLAT 1 24",
+ "FLAT 2 24",
+ "FLAT 3 24",
+ "FLAT 4 24",
+ "FLAT 5 24",
+ "FLAT 6 24",
+ ]
+ )) &
+ (survey_list["Street / Block Name"] == "MELTON ROAD")),
+ "24 MELTON ROAD",
+ survey_list["Street / Block Name"]
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "TURRETT GREEN COURT SILENT STREET", "TURRET GREEN COURT"
+ )
+
+ # Turret green court flat 1 doesn't exist in the asset list
+ survey_list = survey_list[
+ ~((survey_list["Street / Block Name"] == "TURRET GREEN COURT") &
+ (survey_list["NO."] == 1))
+ ]
+ # 3, 45 raywell steet doesn't exist in the asset list
+ survey_list = survey_list[
+ ~((survey_list["Street / Block Name"] == "45 RAYWELL STREET") &
+ (survey_list["NO."] == 3))
+ ]
+
+ # 40 Avondale drive doesn't exist in the asset list
+ survey_list = survey_list[
+ ~((survey_list["Street / Block Name"] == "Avondale Drive") &
+ (survey_list["NO."] == 40))
+ ]
+ # 17A beech road has the wrong postcode
+ survey_list["Post Code"] = np.where(
+ (survey_list["Street / Block Name"] == "BEECH ROAD") &
+ (survey_list["Post Code"] == "DH6 1JD"),
+ "DH6 1JB",
+ survey_list["Post Code"]
+ )
+
+ survey_list["Street / Block Name"] = np.where(
+ (survey_list["Street / Block Name"] == "SOUTHVIEW") &
+ (survey_list["Post Code"] == "DL16 7DF"),
+ "SOUTH VIEW",
+ survey_list["Street / Block Name"]
+ )
+
+ survey_list["Post Code"] = np.where(
+ (survey_list["Street / Block Name"] == "BEECH ROAD") &
+ (survey_list["Post Code"] == "DH6 1JD"),
+ "DH6 1JB",
+ survey_list["Post Code"]
+ )
+
+ return survey_list
+
+ @staticmethod
+ def correct_ha32_survey_list(survey_list):
+ survey_list["Street / Block Name"] = np.where(
+ survey_list["Street / Block Name"] == "Coxwold",
+ "Coxwold Grove",
+ survey_list["Street / Block Name"]
+ )
+
+ # Update the Barringhton Avenue with their correct spelling: Barrington Avenue
+ survey_list["Street / Block Name"] = np.where(
+ survey_list["Street / Block Name"] == "Barringhton Avenue",
+ "Barrington Avenue",
+ survey_list["Street / Block Name"]
+ )
+
+ # Update how the Rustenburn addresses are listed in the identified addresses
+ survey_list["Street / Block Name"] = np.where(
+ survey_list["Street / Block Name"] == "Rustenburg",
+ "Rustenburg Street",
+ survey_list["Street / Block Name"]
+ )
+
+ # Update how the MALIN LODGE, RONALDSWAY CLOSE addresses are listed in the identified addresses
+ survey_list["Street / Block Name"] = np.where(
+ survey_list["Street / Block Name"] == "MALIN LODGE, RONALDSWAY CLOSE",
+ "Malin Lodge",
+ survey_list["Street / Block Name"]
+ )
+
+ # Update how the Feroes Close are listed in the identified addresses
+ survey_list["Street / Block Name"] = np.where(
+ survey_list["Street / Block Name"] == "Feroes Close",
+ "Faroes Close",
+ survey_list["Street / Block Name"]
+ )
+
+ survey_list["Street / Block Name"] = np.where(
+ survey_list["Street / Block Name"] == 'FORESTER WAY',
+ 'FORESTER WAY',
+ survey_list["Street / Block Name"]
+ )
+
+ survey_list["Street / Block Name"] = np.where(
+ survey_list["Street / Block Name"] == '6 Zeigfeld',
+ 'Ziegfeld Court',
+ survey_list["Street / Block Name"]
+ )
+
+ # Malin Lodge, Ronaldsway Close
+ survey_list["Street / Block Name"] = np.where(
+ survey_list["Street / Block Name"] == 'Malin Lodge, Ronaldsway Close',
+ 'Malin Lodge',
+ survey_list["Street / Block Name"]
+ )
+
+ return survey_list
+
+ @staticmethod
+ def correct_ha50_survey_list(survey_list):
+
+ survey_list["Post Code"] = np.where(
+ (survey_list["Street / Block Name"] == 'COSELEY STREET') &
+ (survey_list["Post Code"] == 'ST16 1LR'),
+ "ST6 1JU",
+ survey_list["Post Code"]
+ )
+
+ # Remove some of COSELEY STREET, as we have surveys done, outside of the asset list
+ survey_list = survey_list[
+ ~((survey_list["Street / Block Name"] == "COSELEY STREET") &
+ (survey_list["Post Code"] == "ST6 1JU") &
+ (survey_list["NO."].isin([96])))
+ ]
+
+ survey_list["Post Code"] = survey_list["Post Code"].str.replace("ST33JZ", "ST3 3JZ")
+
+ # Remove some of Jesmond drive as we have surveys done outside of the asset list
+ survey_list = survey_list[
+ ~((survey_list["Street / Block Name"] == "Jesmond Drive") &
+ (survey_list["Post Code"] == "ST3 3JZ") &
+ (survey_list["NO."].isin([29])))
+ ]
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "BRUNDELL OVAL", "BRUNDALL OVAL"
+ )
+
+ # Remove 4 Linden Place
+ survey_list = survey_list[
+ ~((survey_list["Street / Block Name"] == "Linden Place") &
+ (survey_list["Post Code"] == "ST3 3AT") &
+ (survey_list["NO."].isin([4])))
+ ]
+
+ # Remove 11 Tilehurst Place
+ survey_list = survey_list[
+ ~((survey_list["Street / Block Name"] == "Tilehurst Place") &
+ (survey_list["Post Code"] == "ST3 3AP") &
+ (survey_list["NO."].isin([11])))
+ ]
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "deavile road", "DEAVILLE ROAD"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "WOOLISCROFT ROAD", "WOOLLISCROFT ROAD"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Leak Road", "Leek Road"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Springfield road", "Springfields road"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "MILLWARD RD", "MILLWARD ROAD"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "REPINGTON RD", "REPINGTON ROAD"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "ECCELSTONE PLACE", "ECCLESTONE PLACE"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "St. James Place", "St James Place"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "CHELL HEATH RD", "CHELL HEATH ROAD"
+ )
+ # Correct postcode
+ survey_list["Post Code"] = np.where(
+ (survey_list["Street / Block Name"] == 'CHELL HEATH ROAD') &
+ (survey_list["Post Code"] == 'ST6 6HU'),
+ "ST6 6HJ",
+ survey_list["Post Code"]
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Franklin Rd", "Franklin Road"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Lodge Rd", "Lodge Road"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "St Matthews Street", "St Matthew Street"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Grove Bank Road", "Grovebank Road"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "OVERSLEY RD", "OVERSLEY ROAD"
+ )
+
+ # Replace all of the " RD" with " ROAD"
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ " RD", " ROAD"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "St. Georges Crescent", "St Georges Crescent"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Tewson Road", "Tewson Green"
+ )
+
+ # Remove 55 Seabridge Lane
+ survey_list = survey_list[
+ ~((survey_list["Street / Block Name"] == "Seabridge Lane") &
+ (survey_list["Post Code"] == "ST5 4AG") &
+ (survey_list["NO."].isin([55])))
+ ]
+
+ survey_list = survey_list[
+ ~((survey_list["Street / Block Name"] == "Tyne Way") &
+ (survey_list["Post Code"] == "ST5 4AX") &
+ (survey_list["NO."].isin([56])))
+ ]
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "St.Bernards Place", "St Bernard Place"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Penarth Road", "Penarth Grove"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "St. Marys Road", "St Marys Road"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Larch Drive", "Larch Grove"
+ )
+
+ # Drop 31 Lauder place north, as there is a duplicate. THis version also has a wrong postcode
+ survey_list = survey_list[
+ ~((survey_list["Street / Block Name"] == "LAUDER PLACE NORTH") &
+ (survey_list["Post Code"] == "ST20QS") &
+ (survey_list["NO."].isin([31])))
+ ]
+
+ # Handle dropping of dupes
+ survey_list["street_pruner"] = survey_list["Street / Block Name"].str.lower().str.replace(" ", "")
+ survey_list["postcode_pruner"] = survey_list["Post Code"].str.lower().str.replace(" ", "")
+
+ # Should go to 18
+ survey_list = survey_list.drop_duplicates(["NO.", "street_pruner", "postcode_pruner"])
+ survey_list = survey_list.drop(columns=["street_pruner", "postcode_pruner"])
+
+ return survey_list
+
+ @staticmethod
+ def correct_ha107_survey_list(survey_list):
+ # Replace Front Street, East Stockham with Front Street, East Stockwith
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Front Street, East Stockham", "Front Street, East Stockwith"
+ )
+
+ # Replace "HONEYHOLE L;ANE" with "HONEYHOLES LANE"
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "HONEYHOLE L;ANE", "HONEYHOLES LANE"
+ )
+
+ # Replace "Croft Lane Cherry Willingham, Lincoln" with "Croft Lane, Cherry Willingham, Lincoln"
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Croft Lane Cherry Willingham, Lincoln", "Croft Lane, Cherry Willingham, Lincoln"
+ )
+
+ # Replace "Snelland Road Wickenby, Lincoln" with "Snelland Road, Wickenby, Lincoln"
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Snelland Road Wickenby, Lincoln", "Snelland Road, Wickenby, Lincoln"
+ )
+
+ # Replace Reasby Road Snelland, Lincoln with Reasby Road, Snelland, Lincoln
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Reasby Road Snelland, Lincoln", "Reasby Road, Snelland, Lincoln"
+ )
+
+ # Replace Silver Street Bardney, Lincoln with Silver Street, Bardney, Lincoln
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Silver Street Bardney, Lincoln", "Silver Street, Bardney, Lincoln"
+ )
+
+ # Replace Manor Close Bardney, Lincoln with Manor Close, Bardney, Lincoln
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Manor Close Bardney, Lincoln", "Manor Close, Bardney, Lincoln"
+ )
+
+ # Replace Ferry Road Southrey, Lincoln with Ferry Road, Southrey, Lincoln
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Ferry Road Southrey, Lincoln", "Ferry Road, Southrey, Lincoln"
+ )
+
+ # Replace Harvey Kent Gardens Bardney, Lincoln with Harvey Kent Gardens, Bardney, Lincoln
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Harvey Kent Gardens Bardney, Lincoln", "Harvey Kent Gardens, Bardney, Lincoln"
+ )
+
+ # Replace Wragby Road Bardney, Lincoln with Wragby Road, Bardney, Lincoln
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Wragby Road Bardney, Lincoln", "Wragby Road, Bardney, Lincoln"
+ )
+
+ # Replace SPRINKHILL ROAD with SPINKHILL ROAD
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "SPRINKHILL ROAD", "SPINKHILL ROAD"
+ )
+
+ return survey_list
+
+ @staticmethod
+ def correct_ha41_survey_list(survey_list):
+ return survey_list
+
+ @staticmethod
+ def correct_ha12_survey_list(survey_list):
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Henstone Road", "Hanstone Road"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Lindern avenue", "Linden Avenue"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "priness way", "Princess Way"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Worth Crecesent", "Worth Crescent"
+ )
+
+ survey_list["Post Code"] = survey_list["Post Code"].str.replace(
+ "DY117HA", "DY11 7HA"
+ )
+
+ survey_list["Post Code"] = survey_list["Post Code"].str.replace(
+ "DY117HF", "DY11 7HF"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Adderbrook Crescent", "Addenbrooke Crescent"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Kinver Road", "Kinver Avenue"
+ )
+
+ return survey_list
+
+ @staticmethod
+ def correct_ha13_survey_list(survey_list):
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Woodfarm Road", "WOOD FARM ROAD"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "ALLANDALE ROAD", "ALLANDALE"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "NEWFIELDS LANE", "NEWFIELD LANE"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "BROADFIELDS ROAD", "BROADFIELD ROAD"
+ )
+
+ survey_list["Post Code"] = survey_list["Post Code"].str.replace(
+ "HP2 5SF+", "HP2 5SF"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "PESCOTT HILL", "PESCOT HILL"
+ )
+
+ # This is a duplicate record
+ survey_list = survey_list[
+ ~((survey_list["NO."] == 33) &
+ (survey_list["Street / Block Name"] == "Turners Hill") &
+ (survey_list["Post Code"] == "HP2 4LH") &
+ (survey_list["INSTALLED OR CANCELLED"] == "NO UPDATE - CHECKED 18.12.23"))
+ ]
+
+ return survey_list
+
+ @staticmethod
+ def correct_ha18_survey_list(survey_list):
+ return survey_list
+
+ @staticmethod
+ def correct_ha35_survey_list(survey_list):
+ return survey_list
+
+ @staticmethod
+ def correct_ha34_survey_list(survey_list):
+ # Note in the asset list
+ survey_list = survey_list[
+ survey_list["Post Code"] != "L5 3SS"
+ ]
+
+ survey_list["Post Code"] = survey_list["Post Code"].str.replace(
+ "L177DR", "L17 7DR"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "PENVALLEY CRESENT", "Penvalley Crescent"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "PENLINKEN DRIVE", "Penlinken Drive"
+ )
+
+ # There's no 32 Penlinken Drive in the asset sheet
+ survey_list = survey_list[
+ ~((survey_list["Street / Block Name"] == "Penlinken Drive") &
+ (survey_list["NO."] == 32))
+ ]
+
+ # There's no 30 Gwent Street in the asset sheet
+ survey_list = survey_list[
+ ~((survey_list["Street / Block Name"] == "GWENT ST") &
+ (survey_list["NO."] == 30))
+ ]
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "POULTON RD", "Poulton Road"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "ST PAULS RD", "St Pauls Road"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "BROAD LANE, KIRKBY", "BROAD LANE"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "BULLENS RD, KIRKBY", "Bullens Road"
+ )
+
+ # There's no 219 NORTH HILL ST in the asset sheet
+ survey_list = survey_list[
+ ~((survey_list["Street / Block Name"] == "NORTH HILL ST") &
+ (survey_list["NO."] == 219))
+ ]
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "CROSLAND RD, KIRKBY", "CROSLAND ROAD"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "PARK BROW DRIVE, KIRKBY", "Park Brow Drive"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "CELTIC TREET", "Celtic Street"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "BUCKLAND ROAD", "Buckland Street"
+ )
+
+ # duplicates
+ survey_list = survey_list.drop_duplicates(["Street / Block Name", "NO.", "Post Code"])
+
+ # This is a duplicate with wrong postcode
+ survey_list = survey_list[
+ ~((survey_list["Street / Block Name"] == "CLARIBEL STREET") &
+ (survey_list["NO."] == 7) &
+ (survey_list["Post Code"] == "L8 8AF"))
+ ]
+
+ survey_list["NO."] = np.where(
+ ((survey_list["NO."] == "187 A") &
+ (survey_list["Post Code"] == "L32 6QF")),
+ "187A",
+ survey_list["NO."]
+ )
+
+ return survey_list
+
+ @staticmethod
+ def correct_ha56_survey_list(survey_list):
+ # Not in asset list
+ survey_list = survey_list[
+ ~((survey_list["Street / Block Name"] == "Samual Street") &
+ (survey_list["NO."].isin([22, 24])) &
+ (survey_list["Post Code"] == "WA5 1BB"))
+ ]
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "STOURTON RD", "Stourton Road"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "BIRKIN RD", "Birkin Road"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "PORTLAND RD", "Portland Road"
+ )
+
+ # We remove a row, because two rows match to a block listing
+ survey_list = survey_list[
+ ~((survey_list["Street / Block Name"] == "Tavlin Avenue") &
+ (survey_list["NO."] == 17) &
+ (survey_list["Post Code"] == "WA5 0EN"))
+ ]
+
+ return survey_list
+
+ @staticmethod
+ def correct_ha30_survey_list(survey_list):
+
+ survey_list = survey_list[~pd.isnull(survey_list["Post Code"])]
+
+ # Split on / and take the first half
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.split("/").str[0]
+
+ # Not in the asset list
+ survey_list = survey_list[
+ ~((survey_list["Street / Block Name"] == "Horsebridge Road") &
+ (survey_list["NO."] == 286))
+ ]
+
+ survey_list = survey_list[
+ ~((survey_list["Street / Block Name"] == "DUTTON WAY") &
+ (survey_list["NO."] == 9))
+ ]
+
+ survey_list = survey_list[
+ ~((survey_list["Street / Block Name"] == "PAYTHORNE CLOSE") &
+ (survey_list["NO."] == 10))
+ ]
+
+ survey_list = survey_list[
+ ~((survey_list["Street / Block Name"] == "MARCHWOOD ROAD") &
+ (survey_list["NO."] == 11))
+ ]
+
+ survey_list = survey_list[
+ ~((survey_list["Street / Block Name"] == "Otterburn Close") &
+ (survey_list["NO."] == 4))
+ ]
+
+ survey_list = survey_list[
+ ~((survey_list["Street / Block Name"] == "Blossom Court") &
+ (survey_list["NO."] == 5))
+ ]
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "St LUKES CLOSE , HUNTINGDON", "St. Lukes Close"
+ )
+
+ survey_list = survey_list[
+ ~((survey_list["Street / Block Name"] == "St. Lukes Close") &
+ (survey_list["NO."].isin([4, 7, 8])))
+ ]
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "ROMAN WAY , GODMANCHESTER , HUNTINGDON", "Roman Way"
+ )
+
+ survey_list = survey_list[
+ ~((survey_list["Street / Block Name"] == "Roman Way") &
+ (survey_list["NO."].isin([58])))
+ ]
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "HEADLANDS , FENSTANTON , HUNTINGDON", "Headlands Fenstanton"
+ )
+
+ survey_list = survey_list[
+ ~((survey_list["Street / Block Name"] == "Headlands Fenstanton") &
+ (survey_list["NO."].isin([126, 134])))
+ ]
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "WALLACE COURT , HUNTINGDON", "Wallace Court"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "CRICKETERS WAY , CHATTERIS", "Cricketers Way"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Jubilee Gardens", "Jubilee Green"
+ )
+
+ survey_list = survey_list[
+ ~((survey_list["Street / Block Name"] == "Harrow Road") &
+ (survey_list["NO."].isin([10])))
+ ]
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "ST LUKES CLOSE", "St. Lukes Close"
+ )
+
+ return survey_list
+
+ @staticmethod
+ def correct_ha49_survey_list(survey_list):
+ return survey_list
+
+ @staticmethod
+ def correct_ha8_survey_list(survey_list):
+ # Split on / and take the first half
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.split("/").str[0]
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "WESTONIA COURT HOUSE", "Westonia Court"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Hillesdon Avenue", "Hillesden Avenue"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Weston Street", "Western Street"
+ )
+
+ # Remove placeholder rows where postcode is missing
+ survey_list = survey_list[
+ ~pd.isnull(survey_list["Post Code"])
+ ]
+
+ return survey_list
+
+ @staticmethod
+ def correct_ha11_survey_list(survey_list):
+ # Remove 39 HOLLYWOOD WAY as it's not in the asset list
+ survey_list = survey_list[
+ ~((survey_list["Street / Block Name"] == "HOLLYWOOD WAY") &
+ (survey_list["NO."] == 39))
+ ]
+ return survey_list
+
+ @staticmethod
+ def correct_ha42_survey_list(survey_list):
+ # original asset list has nothing in the street
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Turnstone Terrace", ""
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Pegasus place", ""
+ )
+ return survey_list
+
+ @staticmethod
+ def correct_ha45_survey_list(survey_list):
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Norwich Road", "Norwich Avenue"
+ )
+ return survey_list
+
+ @staticmethod
+ def correct_ha51_survey_list(survey_list):
+ survey_list = survey_list.rename(columns={"NO ": "NO."})
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Autum Close", "Autumn Close"
+ )
+
+ return survey_list
+
+ @staticmethod
+ def correct_ha52_survey_list(survey_list):
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Mardalle Avenue", "Mardale Avenue"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Ollerton Close, Grappenhall", "Ollerton Close"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Bradshaw Road, Grappenhall", "Bradshaw Lane"
+ )
+
+ # Drop a bunch of dupes
+ survey_list = survey_list.drop_duplicates(["NO.", "Street / Block Name", "Post Code"])
+
+ return survey_list
+
+ @staticmethod
+ def correct_ha5_survey_list(survey_list):
+ return survey_list
+
+ @staticmethod
+ def correct_ha20_survey_list(survey_list):
+ # Not in the asset list
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Abbot Close", "ABBOTS CLOSE"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Downbarns Road", "DOWN BARNS ROAD"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "Austin Lane", "AUSTINS LANE"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "South Park Way", "SOUTHPARK WAY"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "OAKLAND ROAD", "OAKWOOD ROAD"
+ )
+
+ survey_list["Street / Block Name"] = survey_list["Street / Block Name"].str.replace(
+ "ACRE WAY/NORTHWOOD", "ACRE WAY"
+ )
+
+ return survey_list
+
+ @staticmethod
+ def levenstein_match(matching_string, df):
+ match_to = df["matching_address"].tolist()
+ # Strip out punctuation and spaces
+ match_to = [re.sub(r'[^\w\s]', '', x) for x in match_to]
+ match_to = [x.replace(" ", "") for x in match_to]
+
+ # Perform matching between full key and match_to
+ distances = [Levenshtein.distance(matching_string, s) for s in match_to]
+ best_match_index = distances.index(min(distances))
+ # We might want to consider a threshold for the distance, however for the momeny,
+ # we don't consider this for the moment
+ df = df.iloc[best_match_index:best_match_index + 1]
+
+ return df
+
def merge_surveys_to_assets(self, asset_list, survey_list, ha_name):
- # Correct the asset list
- asset_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_asset_list")
- asset_list = asset_list_correction_function(asset_list)
# Correct the survey list
survey_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_survey_list")
survey_list = survey_list_correction_function(survey_list)
missed_postcodes = []
- if ha_name == "HA6":
+ if ha_name in ["HA6", "HA34"]:
missed_postcodes = [
postcode.lower() for postcode in survey_list["Post Code"] if
postcode.lower() not in asset_list["matching_postcode"].values
]
+ if ha_name == "HA13":
+ missed_postcodes = ["hp17 8le"]
+
+ if ha_name == "HA56":
+ # Multiple properties are listed as blocks, which is a problem for matching
+ missed_postcodes = ["sk17 6nr", "wa5 0en"]
+
matching_lookup = []
for _, row in tqdm(survey_list.iterrows(), total=len(survey_list)):
@@ -365,12 +2807,44 @@ class DataLoader:
asset_list["matching_address"].str.contains(row["Street / Block Name"].lower().strip())
].copy()
+ if not any(df["matching_address"].str.contains(str(house_number))):
+ if "flat" in str(house_number):
+ house_number = house_number.split("flat")[1].strip()
+
+ # We check if we had an instance of flat x, y
+ if "," in str(house_number):
+ house_number = house_number.split(",")[0].strip()
+
+ # We may also have a space for an instance of flat x y
+ if " " in str(house_number):
+ house_number = house_number.split(" ")[0].strip()
+
df = df[df["matching_address"].str.contains(str(house_number))]
+
+ if df.empty:
+
+ postcode_lower = row["Post Code"].lower()
+ if postcode_lower in missed_postcodes:
+ matching_lookup.append(
+ {
+ "survey_list_row_id": row["survey_list_row_id"],
+ "asset_list_row_id": None,
+ }
+ )
+ continue
+
+ print(row["Street / Block Name"])
+ print(house_number)
+ print(row["Post Code"])
+ raise ValueError("Investigate")
+
if df.shape[0] != 1:
- df = df[df["HouseNo"] == str(house_number)]
+ df = df[df["HouseNo"].astype(str).str.lower() == str(house_number)]
if df.shape[0] != 1:
- df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower())]
- if df.shape[0] != 1:
+ df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower().strip())]
+
+ if df.empty:
+
postcode_lower = row["Post Code"].lower()
if postcode_lower in missed_postcodes:
matching_lookup.append(
@@ -381,10 +2855,23 @@ class DataLoader:
)
continue
- print(row["Street / Block Name"])
- print(house_number)
- print(row["Post Code"].lower())
- raise ValueError("Investigate")
+ if df.shape[0] != 1:
+ if "Town/Area" not in row.keys():
+ full_key = (str(row["NO."]).lower().strip() + row["Street / Block Name"].lower().strip() +
+ row["Post Code"].lower().strip())
+ else:
+ full_key = str(row["NO."]).lower().strip() + row["Street / Block Name"].lower().strip() + \
+ row["Town/Area"].lower().strip() + row["Post Code"].lower().strip()
+ # Remove any spaces from the full key
+ full_key = full_key.replace(" ", "")
+
+ df = self.levenstein_match(full_key, df)
+
+ if df.shape[0] != 1:
+ print(row["Street / Block Name"])
+ print(house_number)
+ print(row["Post Code"])
+ raise ValueError("Investigate")
matching_lookup.append(
{
@@ -395,13 +2882,318 @@ class DataLoader:
matching_lookup = pd.DataFrame(matching_lookup)
+ if matching_lookup.shape[0] != survey_list.shape[0]:
+ raise ValueError("Mismatch in the number of survey rows and matching lookup rows")
+
+ matching_lookup = matching_lookup[~pd.isnull(matching_lookup["asset_list_row_id"])]
+
+ if matching_lookup["asset_list_row_id"].duplicated().sum():
+ raise ValueError("Duplicated matches in survey list")
+
# Merge onto the survey list
survey_list = survey_list.merge(matching_lookup, how='left', on="survey_list_row_id")
return survey_list
+ @staticmethod
+ def correct_ha25_eco3_list(eco3_list):
+ # NEADS DRIVE, postcode with bs305dt, is not found in the asset list
+ eco3_list = eco3_list[
+ ~(eco3_list["Post Code"] == "BS305DT")
+ ]
+ # Drop rows with missings postcode
+ eco3_list = eco3_list[
+ ~pd.isnull(eco3_list["Post Code"])
+ ]
+ # We have a bunch of genuine duplicates
+ eco3_list = eco3_list.drop_duplicates(["NO ", "Street / Block Name", "Post Code"])
+
+ eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+ "HALWILL MEADOOW", "HALWILL MEADOW"
+ )
+
+ eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+ "Hall Road", "Hall Rd"
+ )
+
+ eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+ "SPRINGFIELD WAY SAINT DAY", "SPRINGFIELD WAY ST DAY"
+ )
+ eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+ "BOND SPEAR COURT", "BOND-SPEAR COURT"
+ )
+ eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+ "ST.MARYS HILL", "ST MARYS HILL"
+ )
+ # Correct the postcode for edmund road
+ eco3_list["Post Code"] = np.where(
+ (eco3_list["Street / Block Name"] == "EDMUND ROAD") &
+ (eco3_list["Post Code"] == "TR14 8QJ"),
+ "TR15 1BY",
+ eco3_list["Post Code"]
+ )
+ return eco3_list
+
+ @staticmethod
+ def correct_ha50_eco3_list(eco3_list):
+ return eco3_list
+
+ @staticmethod
+ def correct_ha41_eco3_list(eco3_list):
+ return eco3_list
+
+ @staticmethod
+ def correct_ha63_eco3_list(eco3_list):
+ eco3_list = eco3_list[~pd.isnull(eco3_list["Post Code"])]
+ # Some postcode that aren't in the asset list
+ eco3_list = eco3_list[
+ ~eco3_list["Post Code"].isin(
+ ["NR32 15X", "NR30 2BT"]
+ )
+ ]
+
+ eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+ "POUND COTTAGES - BLOOMSBERRY CLOSE", "POUND COTTAGES"
+ )
+
+ eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+ "FREDRICK ROAD", "Frederick Road"
+ )
+
+ # For denmark street, remove the space from the house number
+ eco3_list["NO "] = np.where(
+ eco3_list["Street / Block Name"] == "DENMARK STREET",
+ eco3_list["NO "].str.replace(" ", ""),
+ eco3_list["NO "]
+ )
+
+ eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+ "OLD HOSPITAL MEWS HOSPITAL WALK", "Old Hospital Mews"
+ )
+
+ eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+ "Portland House, Portland Street", "Portland House"
+ )
+
+ eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+ "MIDDLE MARKET STREET", "Middle Market Road"
+ )
+
+ return eco3_list
+
+ @staticmethod
+ def correct_ha117_eco3_list(eco3_list):
+ # Delete rows where postcode is null - there are some placeholder rows where this happens
+ eco3_list = eco3_list[~pd.isnull(eco3_list["Post Code"])]
+
+ eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+ "TARRING ROAD", "155 TARRING ROAD"
+ )
+
+ return eco3_list
+
+ @staticmethod
+ def correct_ha56_eco3_list(eco3_list):
+ eco3_list = eco3_list[~pd.isnull(eco3_list["Post Code"])]
+
+ eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+ "Mount Pleasant, Crewe", "Mount Pleasant"
+ )
+
+ eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+ "Dutton Close", "Dutton Way"
+ )
+
+ eco3_list["Post Code"] = eco3_list["Post Code"].str.replace(
+ "Ls63nl", "LS6 3NL"
+ )
+
+ # Handle a duplicate
+ eco3_list = eco3_list[
+ ~((eco3_list["Street / Block Name"] == "Mount Pleasant") &
+ (eco3_list["Post Code"] == "CW1 3JF") &
+ (eco3_list["NO "] == 5) &
+ (eco3_list["INSTALL/ CANCELLATION DATE"] == "CANCELLED 20.5.2022"))
+ ]
+
+ return eco3_list
+
+ @staticmethod
+ def correct_ha51_eco3_list(eco3_list):
+ eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+ "HASELEMERE AVENUE", "HASLEMERE AVENUE"
+ )
+ eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+ "THORVILLE GROVE", "THORNVILLE GROVE"
+ )
+ eco3_list["Street / Block Name"] = eco3_list["Street / Block Name"].str.replace(
+ "MONTBRETA CLOSE", "MONTBRETIA CLOSE"
+ )
+ eco3_list["Post Code"] = np.where(
+ (eco3_list["Street / Block Name"] == "SYDENHAM ROAD") &
+ (eco3_list["Post Code"] == "CR0 2DW"),
+ "CR0 2ED",
+ eco3_list["Post Code"]
+ )
+ # Not in asset list
+ eco3_list = eco3_list[
+ ~((eco3_list["Street / Block Name"] == "WOODLEY LANE") &
+ (eco3_list["Post Code"] == "SM5 2RJ") &
+ (eco3_list["NO "] == "FLAT 3, 11"))
+ ]
+
+ eco3_list["NO "] = np.where(
+ (eco3_list["NO "] == "47 B"),
+ "47B",
+ eco3_list["NO "]
+ )
+
+ return eco3_list
+
+ def merge_eco3_to_assets(self, asset_list, eco3_list, ha_name):
+
+ eco3_list_correction_function = getattr(self, f"correct_{ha_name.lower()}_eco3_list")
+ eco3_list = eco3_list_correction_function(eco3_list)
+
+ asset_list["matching_postcode_nospace"] = asset_list["matching_postcode"].str.replace(" ", "").str.lower()
+ eco3_list["postcode_no_space"] = eco3_list["Post Code"].str.lower().str.replace(" ", "")
+
+ if ha_name in ["HA25", "HA56", "HA51"]:
+ # HA25: 317 -> 259
+ missed_postcodes = {
+ postcode for postcode in eco3_list["postcode_no_space"] if
+ postcode not in asset_list["matching_postcode_nospace"].values
+ }
+
+ eco3_list = eco3_list[~eco3_list["postcode_no_space"].isin(missed_postcodes)]
+
+ # For the asset list, we create a matching address without any punctuation
+ # TODO: We should generally just remove puncutation from addresses when matching
+ asset_list['matching_address_no_punctuation'] = asset_list['matching_address'].str.replace(
+ r'[^\w\s]', '', regex=True
+ )
+ # Remove double spaces
+ asset_list["matching_address_no_punctuation"] = asset_list["matching_address_no_punctuation"].str.replace(
+ " ", " "
+ )
+
+ matching_lookup = []
+ missed = []
+ for _, row in tqdm(eco3_list.iterrows(), total=len(eco3_list)):
+ # if row["eco3_list_row_id"] == "HA51_Eco3_22":
+ # raise Exception()
+ postcode = row["postcode_no_space"]
+
+ # df will never be empty, since we've already done a check for common postcodes
+ df = asset_list[
+ asset_list["matching_postcode_nospace"].str.contains(postcode)
+ ]
+
+ house_number = row["NO "]
+ if isinstance(house_number, str):
+ house_number = house_number.lower().strip()
+
+ if not any(df["HouseNo"].str.contains(str(house_number))):
+ if "flat" in str(house_number):
+ house_number = house_number.split("flat")[1].strip()
+
+ # We check if we had an instance of flat x, y
+ if "," in str(house_number):
+ house_number = house_number.split(",")[0].strip()
+
+ # We may also have a space for an instance of flat x y
+ if " " in str(house_number):
+ house_number = house_number.split(" ")[0].strip()
+
+ # We must do the house number filter
+ df = df[df["HouseNo"].astype(str).str.lower() == str(house_number)]
+
+ # Perform a search on streetname
+ # We do this to prevent duplicate matches to properties with the same postcode and house number,
+ # but different streets
+ street_name_section1 = row["Street / Block Name"].lower().split("/")[0].split(",")[0]
+ street_name_section1 = re.sub(r'[^\w\s]', '', street_name_section1)
+ df = df[df["matching_address_no_punctuation"].str.contains(street_name_section1)]
+
+ if df.empty:
+ missed.append(row["eco3_list_row_id"])
+ continue
+
+ if df.shape[0] > 1:
+ if "flat" in str(row["NO "]).lower():
+ df = df[df["matching_address"].str.contains("flat")]
+ else:
+ df = df[~df["matching_address"].str.contains("flat")]
+
+ if df.shape[0] != 1:
+ print(row["Street / Block Name"])
+ print(house_number)
+ print(row["Post Code"])
+ raise ValueError("Investigate")
+
+ matching_lookup.append(
+ {
+ "eco3_list_row_id": row["eco3_list_row_id"],
+ "asset_list_row_id": df["asset_list_row_id"].values[0],
+ }
+ )
+
+ # We verify the missed
+ # HA25 contains 119 missed entries. These are actually 24 unique postcodes, and the majority belong to 2
+ # where many surveys were conducted on house numbers, not in the asset list
+ # 154 missed, 2827 matched for HA 25
+ # For HA56, the number of missed is high at 320, however a big portion of these are due to the block being
+ # listed in the asset list, and individual units being in the survey list
+ if len(missed) != self.UNMATCHED_ECO3[ha_name]:
+ raise ValueError(
+ f"Unmatched addresses for {ha_name} is not as expected, got {len(missed)} unmatched"
+ )
+
+ matching_lookup = pd.DataFrame(matching_lookup)
+ # Check dupes as this will cause problems later on
+ if matching_lookup["asset_list_row_id"].duplicated().sum():
+ raise ValueError("Duplicated asset list row ids")
+
+ # Merge onto eco3 list
+ eco3_list = eco3_list.merge(matching_lookup, how="left", on="eco3_list_row_id")
+
+ asset_list.drop(columns=["matching_address_no_punctuation"], inplace=True)
+
+ return eco3_list
+
+ @staticmethod
+ def extract_streetname(address, house_number=None, postcode=None):
+ """
+ Cleans an address by removing the house number and postcode, and converts everything to lower case.
+
+ :param address: The full address as a string.
+ :param house_number: The house number to remove, as a string or integer.
+ :param postcode: The postcode to remove, as a string.
+ :return: The cleaned address.
+ """
+ # Convert everything to lower case
+ address = address.lower()
+
+ if house_number is not None:
+ # Remove the house number
+ address = re.sub(r'\b{}\b'.format(house_number), '', address, flags=re.IGNORECASE).strip()
+
+ if postcode is not None:
+ # Remove the postcode
+ address = re.sub(r'\b{}\b'.format(re.escape(postcode)), '', address, flags=re.IGNORECASE).strip()
+
+ # Get first section before a comma
+ address = address.split(",")[0]
+ # Additional cleaning to remove extra spaces and commas left over
+ address = re.sub(r'\s+', ' ', address) # Replace multiple spaces with a single space
+ address = re.sub(r'\s*,\s*', ', ', address) # Clean up space around commas
+
+ return address
+
def merge_ciga_to_assets(self, asset_list, ciga_list, ha_name):
matching_lookup = []
+ unmatched_addresses = []
+
for _, row in tqdm(ciga_list.iterrows(), total=len(ciga_list)):
house_number = row["HouseNo"]
@@ -413,23 +3205,43 @@ class DataLoader:
asset_list["matching_address"].str.contains(row["Matched Postcode"].lower().strip())
].copy()
- df = df[df["HouseNo"] == str(house_number)]
- # TODO: Might need to consider street name at some point
+ df = df[df["HouseNo"].astype(str) == str(house_number)]
+ # For ciga, we skip
+ if df.empty:
+ unmatched_addresses.append(
+ {
+ "ciga_list_row_id": row["ciga_list_row_id"],
+ "HouseNo": house_number,
+ "Matched Postcode": row["Matched Postcode"]
+ }
+ )
+ continue
+
if df.shape[0] != 1:
- if df.shape[0] != 1:
- df = df[df["matching_postcode"].str.lower().str.contains(row["Post Code"].lower())]
- if df.shape[0] != 1:
- postcode_lower = row["Post Code"].lower()
- if postcode_lower in missed_postcodes:
- matching_lookup.append(
- {
- "survey_list_row_id": row["survey_list_row_id"],
- "asset_list_row_id": None,
- }
- )
- continue
+ # We split house number and postcode out of the matched address for ciga
+ street_name = self.extract_streetname(
+ address=row["Matched Address"], house_number=house_number, postcode=row["Matched Postcode"]
+ )
+ # We check if any of the rows contains the street name and if they do, filter
+ if any(df["matching_address"].str.replace(",", "").str.contains(street_name)):
+ df = df[df["matching_address"].str.replace(",", "").str.contains(street_name)]
+ if df.shape[0] != 1:
+ # The final check we do here is to check for the presence of flat in the address
+ if "flat" in row["Matched Address"].lower():
+ df = df[df["matching_address"].str.contains("flat")]
+ else:
+ df = df[df["matching_address"].str.contains("flat") == False]
+
+ if df.shape[0] != 1:
+ full_key = str(row["HouseNo"]).lower().strip() + row["Matched Address"].lower().strip() + row[
+ "Matched Postcode"].lower().strip()
+ # Remove any spaces from the full key
+ full_key = full_key.replace(" ", "")
+ df = self.levenstein_match(full_key, df)
+
+ if df.shape[0] != 1:
print(row["Street / Block Name"])
print(house_number)
print(row["Post Code"].lower())
@@ -437,13 +3249,27 @@ class DataLoader:
matching_lookup.append(
{
- "survey_list_row_id": row["survey_list_row_id"],
+ "ciga_list_row_id": row["ciga_list_row_id"],
"asset_list_row_id": df["asset_list_row_id"].values[0],
}
)
+ # We have an acceptable number of ciga failures for each HA
+ if len(unmatched_addresses) != self.UNMATCHED_CIGA[ha_name]:
+ raise ValueError(
+ f"Unmatched addresses for {ha_name} is not as expected, got {len(unmatched_addresses)} unmatched")
+
matching_lookup = pd.DataFrame(matching_lookup)
+ # Check dupes as this will cause problems later on
+ if matching_lookup["asset_list_row_id"].duplicated().any():
+ raise ValueError("Duplicated asset list row ids")
+
+ # Merge onto the ciga list
+ ciga_list = ciga_list.merge(matching_lookup, how='left', on="ciga_list_row_id")
+
+ return ciga_list
+
@staticmethod
def identify_built_form_ha6(property_string):
"""
@@ -474,40 +3300,37 @@ class DataLoader:
def load(self):
- if self.use_cache:
- self.data = read_pickle_from_s3(
+ # Get the december figures, which is just a csv
+ self.december_figures = pd.read_csv(self.december_figures_filepath)
+ # Remove the spaces in HA Name
+ self.december_figures["HA Name"] = self.december_figures["HA Name"].str.replace(" ", "")
+ for col in ["ECO4", "GBIS", "ECO4 remaining", "GBIS remaining"]:
+ self.december_figures[col] = self.december_figures[col].astype("Int64")
+
+ if self.use_cache and not self.rebuild:
+ data = read_pickle_from_s3(
bucket_name="retrofit-datalake-dev",
s3_file_name="ha-analysis/batch3-inputs.pickle",
)
- return
+ else:
+ data = {}
- data = {}
for filepath in self.directories:
ha_name = filepath.split("/")[2]
+ if ha_name in data:
+ continue
# Load asset list
- logger.info("Loading asset list for {}".format(ha_name))
- asset_list, survey_list, ciga_list = self.load_asset_list(
+ logger.info("Loading data for {}".format(ha_name))
+ asset_list, survey_list, ciga_list, eco3_list = self.load_asset_list(
filepath=filepath,
ha_name=ha_name,
)
- if file_config.get("survey_list"):
- # TODO: Delete this
- logger.info("Loading survey list for {}".format(ha_name))
- survey_list, matched_lookup = self.load_survey_list(
- asset_list=asset_list,
- file_path=file_config["survey_list"]["filepath"],
- ha_name=ha_name,
- sheet_name=file_config["survey_list"]["sheetname"]
- )
- else:
- survey_list = None
- matched_lookup = None
-
data[ha_name] = {
"asset_list": asset_list,
"survey_list": survey_list,
- "matched_lookup": matched_lookup
+ "ciga_list": ciga_list,
+ "eco3_list": eco3_list
}
self.data = data
@@ -520,6 +3343,504 @@ class DataLoader:
s3_file_name="ha-analysis/batch3-inputs.pickle",
)
+ def ha_facts_and_figures(self):
+ """
+ This function will return a dictionary of facts and figures for each HA
+ :return:
+ """
+
+ scheme_map = {
+ "ECO4": "ECO4",
+ "AFFORDABLE WARMTH": "ECO4",
+ "ECO4 A/W": "ECO4",
+ "ECO4 GBIS (ECO+)": "GBIS",
+ "ECO4 GBIS (ECO+) JJC UNDER 73m²": "GBIS",
+ "ECO4 AFFORDABLE WARMTH": "ECO4",
+ "Affordable Warmth": "ECO4",
+ "ECO4 GBIS (ECO+) JJC UNDER 73m² ": "GBIS",
+ "ECO4 PPS": "ECO4",
+ "AFFORDABLE WARMTH / REMEDIAL": "ECO4",
+ "AFF0RDALE WARMTH": "ECO4",
+ "ECO 4 RdSAP CL": "ECO4",
+ "Affordable Warmth (R) ": "ECO4",
+ "Affordable Warmth ": "ECO4",
+ "ECO 4 AFFORDABLE WARMTH": "ECO4",
+ }
+
+ # Since it seems like "subject to archetype check" has some failure conditions, for simplicity, we
+ # treat these as similar to subject to CIGA, and therefore unconfirmed worked that could fail. There
+ # are only a small volume of properties for which we see this
+ eco_eligibility_map = {
+ "not eligble": "not eligible",
+ "eco 4(subject to ciga)": "eco4 (subject to ciga)",
+ "eco4 (subject to ciga/archetype check": "eco4 (subject to ciga) (subject to archetype)",
+ "eco4 (subject to archetype check)": "eco4 (subject to archetype)",
+ "eco4 (subject to ciga/archetype)": "eco4 (subject to ciga) (subject to archetype)",
+ "eco4 (subject to ciga)": "eco4 (subject to ciga)",
+ "eco4(subject to ciga)": "eco4 (subject to ciga)",
+ "eco4 subject to ciga": "eco4 (subject to ciga)",
+ "eco4 (subject to archetype/ciga)": "eco4 (subject to ciga) (subject to archetype)",
+ "eco4( subject to ciga/archetype)": "eco4 (subject to ciga) (subject to archetype)",
+ "eco4 (subject to ciga/ archetype)": "eco4 (subject to ciga) (subject to archetype)",
+ }
+
+ ha_facts_and_figures = []
+ for ha_name, data_assets in self.data.items():
+ asset_list = data_assets["asset_list"].copy()
+ survey_list = data_assets["survey_list"].copy()
+ ciga_list = data_assets["ciga_list"].copy()
+ eco3_list = data_assets.get("eco3_list", pd.DataFrame())
+
+ asset_list_starting_size = asset_list.shape[0]
+
+ # Change the column name if it's ECO eligibility
+ asset_list = asset_list.rename(
+ columns={
+ "ECO eligibility": "ECO Eligibility",
+ "ECO Eligibilty": "ECO Eligibility",
+ },
+ )
+ # Remove surplus whitespace from the ECO Eligibility column
+ asset_list["ECO Eligibility"] = asset_list["ECO Eligibility"].str.strip()
+ # Push to lower case
+ asset_list["ECO Eligibility"] = asset_list["ECO Eligibility"].str.lower()
+ # Remap
+ asset_list["ECO Eligibility"] = asset_list["ECO Eligibility"].replace(eco_eligibility_map)
+
+ if not ciga_list.empty:
+ # We merge on ciga and update the status to reflect if it has failed ciga or not
+ # If Guarantee is Yes, this means that there is a guarantee in place, and the property failed the CIGA
+ # check
+
+ ciga_list_to_merge = ciga_list[["asset_list_row_id", "Guarantee"]].copy()
+ ciga_list_to_merge = ciga_list_to_merge[~pd.isnull(ciga_list_to_merge["asset_list_row_id"])]
+
+ asset_list = asset_list.merge(ciga_list_to_merge, how='left', on="asset_list_row_id")
+
+ asset_list["ECO Eligibility"] = np.where(
+ (
+ asset_list["ECO Eligibility"].str.contains("(subject to ciga)", regex=False) &
+ (asset_list["Guarantee"] == "Yes")
+ ),
+ "failed ciga",
+ asset_list["ECO Eligibility"]
+ )
+
+ # We replace any remaining "Subject to CIGA" with pass Ciga
+ asset_list["ECO Eligibility"] = np.where(
+ (
+ asset_list["ECO Eligibility"].str.contains("(subject to ciga)", regex=False) &
+ (asset_list["Guarantee"] == "No")
+ ),
+ "eco4 - passed ciga",
+ asset_list["ECO Eligibility"]
+ )
+
+ asset_list = asset_list.drop(columns=["Guarantee"])
+
+ # Update the asset list with the categorisations and rename changes
+ if asset_list.shape[0] != asset_list_starting_size:
+ raise ValueError("The asset list has changed in size")
+
+ # If we have eco3 surveys, we set a property to not eligible
+ if not eco3_list.empty:
+ eco3_list_to_merge = eco3_list[["asset_list_row_id"]].copy()
+ eco3_list_to_merge["has_eco3"] = True
+ asset_list = asset_list.merge(
+ eco3_list_to_merge, how="left", on="asset_list_row_id"
+ )
+
+ if asset_list.shape[0] != asset_list_starting_size:
+ raise ValueError("The asset list has changed in size, when merging on eco3")
+
+ # Any rows that have an eco3 survey are set to not eligible
+ asset_list["ECO Eligibility"] = np.where(
+ asset_list["has_eco3"] == True,
+ "not eligible",
+ asset_list["ECO Eligibility"]
+ )
+ # asset_list = asset_list.drop(columns=["has_eco3"])
+
+ # Report on sales
+ sales_report = {}
+ if not survey_list.empty:
+ scheme_column = survey_list.columns[0]
+ # Remap the values in the scheme column
+ survey_list[scheme_column] = survey_list[scheme_column].replace(scheme_map)
+ # We clean up the survey list installation or cancelled
+ if "INSTALLED OR CANCELLED" in survey_list.columns:
+ survey_list["installed_or_cancelled_clean"] = survey_list["INSTALLED OR CANCELLED"].str.lower()
+ # Remove all punctuation
+ survey_list["installed_or_cancelled_clean"] = survey_list[
+ "installed_or_cancelled_clean"].str.replace(
+ r'[^\w\s]', '', regex=True
+ )
+ # Remove double spaces
+ survey_list["installed_or_cancelled_clean"] = survey_list[
+ "installed_or_cancelled_clean"].str.replace(
+ r'\s+', ' ', regex=True
+ )
+ # Remove trailing spaces
+ survey_list["installed_or_cancelled_clean"] = survey_list[
+ "installed_or_cancelled_clean"].str.strip()
+
+ survey_list["installation_status"] = None
+ survey_list["installation_status"] = np.where(
+ survey_list["installed_or_cancelled_clean"].isin(["installed", "installed see notes"]),
+ "installed",
+ survey_list["installation_status"]
+ )
+ survey_list["installation_status"] = np.where(
+ survey_list["installed_or_cancelled_clean"].isin(["cancelled"]),
+ "cancelled",
+ survey_list["installation_status"]
+ )
+ # Find partial installations
+ survey_list["installation_status"] = np.where(
+ survey_list["installed_or_cancelled_clean"].str.contains("still to be installed"),
+ "in progress",
+ survey_list["installation_status"]
+ )
+ # Find partial cancellations
+ # TODO: We might have more indications of partial cancellations
+ survey_list["installation_status"] = np.where(
+ survey_list["installed_or_cancelled_clean"].isin(["loft cancelled"]),
+ "cancelled",
+ survey_list["installation_status"]
+ )
+ else:
+ # We have some examples, e.g. HA28, where we do not have the installed or cancelled column
+ if 'INSTALL/ CANCELLATION DATE' in survey_list.columns:
+ survey_list["installation_status"] = np.where(
+ survey_list['INSTALL/ CANCELLATION DATE'].str.lower().str.contains("cancelled"),
+ "cancelled",
+ "installed",
+ )
+ else:
+ survey_list["installation_status"] = np.where(
+ survey_list['INSTALL / CANCELLATION DATE'].str.lower().str.contains("cancelled"),
+ "cancelled",
+ "installed",
+ )
+
+ # Finally, for other cases, we set the status to "in progress"
+ survey_list["installation_status"] = survey_list["installation_status"].fillna("in progress")
+
+ # We concatenate the scheme name with the installation status
+ survey_list["installation_status"] = (
+ survey_list[scheme_column] + " - " + survey_list["installation_status"]
+ )
+
+ # We get the sales
+ sales_report = {
+ "ECO4 - surveys sold": survey_list.shape[0],
+ **survey_list["installation_status"].value_counts().to_dict()
+ }
+
+ # We find some cases where properties have sold but are missing CIGA checks
+ survey_list_to_merge = survey_list[["asset_list_row_id", "installation_status"]].copy()
+ survey_list_to_merge["has_a_survey_record"] = True
+ survey_list_to_merge = survey_list_to_merge[~pd.isnull(survey_list_to_merge["asset_list_row_id"])]
+
+ asset_list = asset_list.merge(survey_list_to_merge, how='left', on="asset_list_row_id")
+ # Update the cases where properties have sold, but are missing a CIGA check
+ # If we don't have a CIGA list, we set the value to ECO4
+ set_to = "eco4 - passed ciga" if not ciga_list.empty else "eco4"
+ asset_list["ECO Eligibility"] = np.where(
+ (asset_list["ECO Eligibility"].str.contains("subject to ciga")) & (
+ asset_list["has_a_survey_record"] == True
+ ),
+ set_to,
+ asset_list["ECO Eligibility"]
+ )
+ # Update the cases where a property has been marked as eligible for GBIS, but sold for ECO4
+ asset_list["ECO Eligibility"] = np.where(
+ (asset_list["ECO Eligibility"] == "gbis") & (
+ asset_list["installation_status"].isin(
+ ["ECO4 - installed", "ECO4 - cancelled", "ECO4 - in progress"]
+ )
+ ),
+ "eco4",
+ asset_list["ECO Eligibility"]
+ )
+ # Update the cases where a property was marked as eligible for ECO4, but sold for GBIS
+ asset_list["ECO Eligibility"] = np.where(
+ (asset_list["ECO Eligibility"].isin(
+ [
+ "eco4",
+ "eco4 (subject to ciga)",
+ "eco4 - passed ciga",
+ "failed ciga",
+ "eco4 (subject to archetype)",
+ "eco4 (subject to ciga) (subject to archetype)"
+ ]
+ )) & (
+ asset_list["installation_status"].isin(
+ ["GBIS - installed", "GBIS - cancelled", "GBIS - in progress"]
+ )
+ ),
+ "gbis",
+ asset_list["ECO Eligibility"]
+ )
+ # Update the cases where a property is marked as not eligible, but sold for GBIS
+ asset_list["ECO Eligibility"] = np.where(
+ (asset_list["ECO Eligibility"] == "not eligible") & (
+ asset_list["installation_status"].isin(
+ ["GBIS - in progress", "GBIS - installed", "GBIS - cancelled"]
+ )),
+ "gbis",
+ asset_list["ECO Eligibility"]
+ )
+
+ # Update the cases where a property is marked as not eligible, but sold for ECO4
+ asset_list["ECO Eligibility"] = np.where(
+ (asset_list["ECO Eligibility"] == "not eligible") & (
+ asset_list["installation_status"].isin(
+ ["ECO4 - in progress", "ECO4 - installed", "ECO4 - cancelled"]
+ )
+ ),
+ "eco4",
+ asset_list["ECO Eligibility"]
+ )
+
+ asset_list = asset_list.drop(columns=["has_a_survey_record", "installation_status"])
+
+ # Update the survey list with installation status
+ self.data[ha_name]["survey_list"] = survey_list
+
+ # Insert updated asset list
+ self.data[ha_name]["asset_list"] = asset_list
+
+ ha_facts_and_figures.append(
+ {
+ "HA Name": ha_name,
+ **asset_list["ECO Eligibility"].value_counts().to_dict(),
+ **sales_report
+ }
+ )
+
+ ha_facts_and_figures = pd.DataFrame(ha_facts_and_figures)
+ ha_facts_and_figures = ha_facts_and_figures.drop(
+ columns=["not eligible"]
+ )
+
+ ha_facts_and_figures = ha_facts_and_figures.fillna(0)
+ # Make all columns apart from HA NAme integers
+ for col in ha_facts_and_figures.columns[1:]:
+ ha_facts_and_figures[col] = ha_facts_and_figures[col].astype(int)
+
+ ha_facts_and_figures = self.december_figures.merge(ha_facts_and_figures, how="inner", on="HA Name")
+ ha_facts_and_figures = ha_facts_and_figures.fillna(0)
+
+ self.facts_and_figures = ha_facts_and_figures
+
+
+def get_property_type_and_built_form(property_meta, ha_name):
+ if ha_name in ["HA44"]:
+ return None, None
+
+ if ha_name == "HA1":
+ property_type = property_meta["Asset Type"]
+ # We correct a small error
+ if property_type == "a":
+ property_type = "House"
+
+ # Remap bedsits to flats
+ if property_type in ["Bedsit", "Room"]:
+ property_type = "Flat"
+
+ built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"].get(property_meta["Property Type"], None)
+ elif ha_name == "HA2":
+ property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Dwelling Type"].strip())
+ built_form = None
+ elif ha_name == "HA5":
+ property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset Type"].strip())
+ built_form = None
+ elif ha_name == "HA6":
+ property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"][property_meta["Dwelling type"]]
+ built_form = property_meta["built_form"]
+ elif ha_name == "HA7":
+ property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"].get(property_meta["Archetype"])
+ built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"].get(property_meta["Property Type"])
+ elif ha_name == "HA8":
+ property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
+ built_form = None
+ elif ha_name == "HA9":
+ property_description = property_meta["Asset Type"].strip().lower()
+ if "house" in property_description:
+ return "House", None
+
+ if "flat" in property_description:
+ return "Flat", None
+
+ if "bungalow" in property_description:
+ return "Bungalow", None
+
+ if "maisonette" in property_description:
+ return "Maisonette", None
+
+ return None, None
+ elif ha_name == "HA11":
+ property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
+ built_form = None
+ elif ha_name == "HA12":
+ property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset_Type1"].strip())
+ built_form = None
+ elif ha_name == "HA13":
+ property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Type Cd"].strip())
+ built_form = None
+ elif ha_name == "HA14":
+ if property_meta["Asset Type Description"] == "Block - Repair":
+ # We try and deduce if it's a flat or house, depending on if it has "room" or "flats" in the address
+ if "room" in property_meta["Address 1"].lower():
+ property_type = "House"
+ else:
+ property_type = "Flat"
+
+ else:
+ property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"][
+ property_meta["Asset Type Description"]
+ ]
+
+ built_form = None
+ elif ha_name == "HA15":
+ property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
+ built_form = None
+ elif ha_name == "HA16":
+ config = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["Type"]]
+ property_type = config.get("property-type")
+ built_form = config.get("built-form")
+ elif ha_name == "HA17":
+ return property_meta["property_type"], None
+ elif ha_name == "HA18":
+ property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset Type"].strip())
+ built_form = None
+ elif ha_name == "HA19":
+ property_type = property_meta["Dwelling Type"]
+ built_form = None
+ elif ha_name == "HA20":
+ property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset Type"].strip())
+ built_form = None
+ elif ha_name == "HA21":
+ property_description = property_meta["Property Type"].strip().lower()
+ if "house" in property_description:
+ return "House", None
+
+ if "flat" in property_description:
+ return "Flat", None
+
+ if "bungalow" in property_description:
+ return "Bungalow", None
+
+ if "maisonette" in property_description:
+ return "Maisonette", None
+
+ return None, None
+ elif ha_name == "HA24":
+ property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
+ built_form = None
+ elif ha_name == "HA25":
+ property_type = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["T1_AssetType"]]
+ built_form = None
+ elif ha_name == "HA27":
+ property_type = property_meta["Property Type"]
+ built_form = None
+ elif ha_name == "HA28":
+ property_type = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["Property Type - Academy"]]
+ built_form = None
+ elif ha_name == "HA30":
+ property_type = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["A_AssetType"]]
+ built_form = None
+ elif ha_name == "HA31":
+ property_description = property_meta["A_AssetType"].strip().lower()
+ if "house" in property_description:
+ return "House", None
+
+ if "flat" in property_description:
+ return "Flat", None
+
+ if "bungalow" in property_description:
+ return "Bungalow", None
+
+ if "maisonette" in property_description:
+ return "Maisonette", None
+
+ return None, None
+
+ elif ha_name == "HA32":
+ property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Dwelling type"].strip())
+ built_form = None
+ elif ha_name == "HA34":
+ property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
+ built_form = None
+ elif ha_name == "HA35":
+ property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type Grouping"].strip())
+ built_form = None
+ elif ha_name == "HA37":
+ property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["PROPERTY TYPE"].strip())
+ built_form = None
+ elif ha_name == "HA39":
+ property_type_config = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["ConstructionStyle"], {})
+ property_type = property_type_config.get("property_type", None)
+ built_form = property_type_config.get("built_form", None)
+
+ if property_type is None:
+ # We check for the presence of room or flat
+ if "flat" in property_meta["matching_address"]:
+ property_type = "Flat"
+ else:
+ property_type = "House"
+ elif ha_name == "HA41":
+ property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Archetype"].strip())
+ built_form = None
+ elif ha_name == "HA42":
+ property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Dwelling use/type"].strip())
+ built_form = None
+ elif ha_name == "HA45":
+ property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property type"].strip())
+ built_form = None
+ elif ha_name == "HA48":
+ property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
+ built_form = None
+ elif ha_name == "HA49":
+ property_type = property_meta["Property Class"].strip()
+ built_form = None
+ elif ha_name == "HA50":
+ property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
+ built_form = None
+ elif ha_name == "HA51":
+ property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset Type"].strip())
+ built_form = None
+ elif ha_name == "HA52":
+ if property_meta["Property Type"] is None:
+ return None, None
+ property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
+ built_form = None
+ elif ha_name == "HA54":
+ property_type = property_meta["Property Type"]
+ built_form = None
+ elif ha_name == "HA56":
+ property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Dwelling Type Description"].strip())
+ built_form = None
+ elif ha_name == "HA63":
+ property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["PropertyType"].strip())
+ built_form = None
+ elif ha_name == "HA107":
+ property_type = property_meta.get("property_type", None)
+ built_form = property_meta.get("built_form", None)
+ elif ha_name == "HA117":
+ property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip())
+ built_form = None
+ elif ha_name == "HAXX":
+ return property_meta["Property Type"].split(":")[0].strip(), None
+ elif ha_name == "HAXXX":
+ property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Unit Description"].strip())
+ built_form = None
+ else:
+ raise NotImplementedError("Implement me")
+
+ return property_type, built_form
+
def get_epc_data(
loader, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds, pull_data=True
@@ -527,84 +3848,6 @@ def get_epc_data(
if not loader.data:
raise ValueError("Data not found - please run loader.load() first")
- property_type_lookup = {
- "ha_1": {
- "built_form": {
- 'Mid Terrace': 'Mid-Terrace',
- 'Semi-Detached': 'Semi-Detached',
- 'End Terrace': 'End-Terrace',
- 'Detached': 'Detached',
- 'Enclosed Mid': 'Mid-Terrace',
- 'Detached Local Connect': 'Detached',
- }
- },
- "ha_6": {
- "property_type": {
- 'HOUSE': "House",
- 'GROUND FLOOR FLAT': "Flat",
- 'UPPER FLOOR FLAT': "Flat",
- 'MAISONETTE': "Maisonette",
- 'BUNGALOW': "Bungalow",
- 'WARDEN BUNGALOW': "Bungalow",
- 'WARDEN FLAT': "Flat",
- 'EXTRACARE SCHEME': "Flat",
- }
- },
- "ha_14": {
- "property_type": {
- "House": "House",
- "Flat": "Flat",
- "Bungalow": "Bungalow",
- "Maisonette": "Maisonette",
- }
- },
- "ha_39": {
- "Semi house": {"property_type": "House", "built_form": "Semi-Detached"},
- "1st floor flat": {"property_type": "Flat", "built_form": None},
- "Mid terrace house": {"property_type": "House", "built_form": "Mid-Terrace"},
- "Ground floor flat": {"property_type": "Flat", "built_form": None},
- "End terrace house": {"property_type": "House", "built_form": "End-Terrace"},
- "Semi bungalow": {"property_type": "Bungalow", "built_form": "Semi-Detached"},
- "End terrace bungalow": {"property_type": "Bungalow", "built_form": "End-Terrace"},
- "2nd floor flat": {"property_type": "Flat", "built_form": None},
- "Mid terrace bungalow": {"property_type": "Bungalow", "built_form": "Mid-Terrace"},
- "3rd floor flat": {"property_type": "Flat", "built_form": None},
- "Detached bungalow": {"property_type": "Bungalow", "built_form": "Detached"},
- "Maisonette": {"property_type": "Maisonette", "built_form": None},
- "Detached house": {"property_type": "House", "built_form": "Detached"},
- "Lower ground floor flat": {"property_type": "Flat", "built_form": None},
- "Dormer bungalow": {"property_type": "Bungalow", "built_form": None},
- "Basement flat": {"property_type": "Flat", "built_form": None},
- "Cluster House": {"property_type": "House", "built_form": "Detached"},
- "2nd/3rd floor duplex flat": {"property_type": "Flat", "built_form": None},
- "Ground floor flat with study": {"property_type": "Flat", "built_form": None},
- "4th floor flat": {"property_type": "Flat", "built_form": None},
- "1st floor flat with study room": {"property_type": "Flat", "built_form": None},
- "2nd floor flat with study": {"property_type": "Flat", "built_form": None},
- },
- "ha_107": {
- "property_type": {
- "HOUSE": "House",
- "BUNGALOW": "Bungalow",
- "GRD FLOOR FLAT": "Flat",
- "FIRST FLOOR FLAT": "Flat",
- "SHELTERED BUNGALOW": "Bungalow",
- "MAISONETTE": "Maisonette",
- "SECOND FLOOR FLAT": "Flat",
- "SHELTERED FIRST FLR": "Flat",
- "SHELTERED GROUND FLR": "Flat",
- "GRD FLOOR BED SIT": "House"
- },
- "built_form": {
- "Semi Detached": "Semi-Detached",
- "Mid Terrace": "Mid-Terrace",
- "End Terrace": "End-Terrace",
- "Detached": "Detached",
- "Detatched": "Detached",
- }
- }
- }
-
outputs = {}
for ha_name, data_assets in loader.data.items():
@@ -633,82 +3876,21 @@ def get_epc_data(
results = []
scoring_data = []
nodata = []
+ failed_model_rows = []
for index, property_meta in tqdm(asset_list.iterrows(), total=len(asset_list)):
if property_meta["matching_postcode"] is None:
continue
- if ha_name == "ha_1":
- property_type = property_meta["Asset Type"]
- # We correct a small error
- if property_type == "a":
- property_type = "House"
-
- # Remap bedsits to flats
- if property_type in ["Bedsit", "Room"]:
- property_type = "Flat"
-
- built_form = property_type_lookup[ha_name]["built_form"].get(property_meta["Property Type"], None)
- elif ha_name == "ha_6":
- property_type = property_type_lookup[ha_name]["property_type"][property_meta["Dwelling type"]]
- built_form = property_meta["built_form"]
- elif ha_name == "ha_14":
- if property_meta["Asset Type Description"] == "Block - Repair":
- # We try and deduce if it's a flat or house, depending on if it has "room" or "flats" in the address
- if "room" in property_meta["Address 1"].lower():
- property_type = "House"
- else:
- property_type = "Flat"
-
- else:
- property_type = property_type_lookup[ha_name]["property_type"][
- property_meta["Asset Type Description"]
- ]
-
- built_form = None
- elif ha_name == "ha_39":
-
- property_type_config = property_type_lookup[ha_name].get(property_meta["ConstructionStyle"], {})
- property_type = property_type_config.get("property_type", None)
- built_form = property_type_config.get("built_form", None)
-
- if property_type is None:
- # We check for the presence of room or flat
- if "flat" in property_meta["matching_address"]:
- property_type = "Flat"
- else:
- property_type = "House"
- elif ha_name == "ha_107":
-
- dwelling_style = property_meta["Dwelling Style"]
- if isinstance(dwelling_style, str):
- dwelling_style = dwelling_style.strip()
-
- property_type = property_type_lookup[ha_name]["property_type"].get(property_meta["DwellingType"])
- built_form = property_type_lookup[ha_name]["built_form"].get(dwelling_style, None)
-
- if property_type is None:
- if built_form in ["Semi-Detached", "Mid-Terrace", "End-Terrace", "Detached"]:
- property_type = "House"
-
- if "flat" in property_meta["Wall Construction"].lower():
- property_type = "Flat"
-
- if (property_meta["DwellingType"] == "UNKNOWN") & (property_meta["Dwelling Style"] == 0):
- # Hand a few specific cases
- property_type = "Bungalow"
-
- if property_meta["Street"] == "School View":
- property_type = "Bungalow"
-
- else:
- raise NotImplementedError("Implement me")
+ property_type, built_form = get_property_type_and_built_form(
+ property_meta=property_meta, ha_name=ha_name
+ )
searcher = SearchEpc(
address1=str(property_meta["HouseNo"]),
postcode=property_meta["matching_postcode"],
auth_token=EPC_AUTH_TOKEN,
- os_api_key=None,
+ os_api_key="",
full_address=property_meta["matching_address"]
)
searcher.ordnance_survey_client.property_type = property_type
@@ -739,9 +3921,48 @@ def get_epc_data(
eligibility.check_gbis_warmfront()
eligibility.check_eco4_warmfront()
- if (not eligibility.eco4_warmfront["eligible"]) and (
- not eligibility.gbis_warmfront
- ) and consider_penultimate_epc:
+ # We check the conditions for checking the penultimate epc
+ identified_for_gbis = property_meta["ECO Eligibility"] in ["gbis"]
+ identified_for_eco4 = property_meta["ECO Eligibility"] in ["eco4"]
+ subject_to_ciga = property_meta["ECO Eligibility"] in [
+ "eco4 (subject to ciga)", "eco4 - passed ciga", "failed ciga"
+ ]
+
+ # condition 1 - identified for gbis and not eligible
+ condition_1 = (identified_for_gbis and not eligibility.gbis_warmfront
+ and not eligibility.eco4_warmfront["eligible"]
+ ) & consider_penultimate_epc
+
+ # condition 2 - identified for eco4 and not eligible
+ condition_2 = (identified_for_eco4 and not eligibility.eco4_warmfront[
+ "eligible"]) & consider_penultimate_epc
+
+ # successfully identigied gbis
+ condition_3 = (
+ identified_for_gbis and (eligibility.gbis_warmfront or eligibility.eco4_warmfront["eligible"])
+ )
+
+ # Nothing identified
+ condition_4 = (
+ not identified_for_gbis
+ and not identified_for_eco4
+ and not eligibility.gbis_warmfront
+ and not subject_to_ciga
+ and not eligibility.eco4_warmfront["eligible"]
+ )
+
+ # Not identified but seemingly eligible for eco4 or gbis
+ condition_5 = (
+ not identified_for_gbis and not identified_for_eco4 and (
+ eligibility.eco4_warmfront["eligible"] or eligibility.gbis_warmfront
+ )
+ )
+
+ condition_6 = (
+ subject_to_ciga and not eligibility.eco4_warmfront["eligible"]
+ )
+
+ if condition_1 or condition_2:
# We check the penultimate epc
eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
eligibility.check_gbis_warmfront()
@@ -750,53 +3971,55 @@ def get_epc_data(
# We don't update just to make data cleaning easier
if penultimate_epc.get("estimated") is None:
older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
+ elif condition_3 or condition_4 or condition_5 or condition_6:
+ pass
+ else:
+ NotImplementedError("Implement me")
# If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity
# Loft MUST be suitable
cavity_age = None
if (
- eligibility.walls["is_cavity_wall"] and
- eligibility.walls["is_filled_cavity"] and
- eligibility.loft["suitability"] and
- eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
+ identified_for_eco4 and not eligibility.eco4_warmfront["eligible"]
):
# We check the age of the cavity and if it's particularly old, we flag it
cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
- # Full checks
- eligibility.check_gbis()
- eligibility.check_eco4()
-
if eligibility.eco4_warmfront["eligible"]:
if eligibility.epc["uprn"] == "":
eligibility.epc["uprn"] = int(property_meta["asset_list_row_id"].split(ha_name)[1])
-
- scoring_dictionary = prepare_model_data_row(
- property_id=property_meta["asset_list_row_id"],
- modelling_epc=eligibility.epc,
- cleaned=cleaned,
- cleaning_data=cleaning_data,
- created_at=created_at,
- old_data=older_epcs,
- full_sap_epc=full_sap_epc,
- photo_supply_lookup=photo_supply_lookup,
- floor_area_decile_thresholds=floor_area_decile_thresholds
- )
- scoring_data.extend(scoring_dictionary)
+ try:
+ scoring_dictionary = prepare_model_data_row(
+ property_id=property_meta["asset_list_row_id"],
+ modelling_epc=eligibility.epc,
+ cleaned=cleaned,
+ cleaning_data=cleaning_data,
+ created_at=created_at,
+ old_data=older_epcs,
+ full_sap_epc=full_sap_epc,
+ photo_supply_lookup=photo_supply_lookup,
+ floor_area_decile_thresholds=floor_area_decile_thresholds
+ )
+ scoring_data.extend(scoring_dictionary)
+ except Exception as e:
+ # If we fail, we just keep a record of it
+ failed_model_rows.append(
+ property_meta["asset_list_row_id"]
+ )
results.append(
{
"row_id": property_meta["asset_list_row_id"],
"uprn": eligibility.epc["uprn"],
+ "is_estimated": searcher.newest_epc.get("estimated") is not None,
"property_type": eligibility.epc["property-type"],
- "gbis_eligible": eligibility.gbis_warmfront,
"eco4_eligible": eligibility.eco4_warmfront["eligible"],
"eco4_message": eligibility.eco4_warmfront["message"],
+ "eco4_strict": eligibility.eco4_warmfront["strict"],
+ "gbis_eligible": eligibility.gbis_warmfront["eligible"],
+ "gbis_message": eligibility.gbis_warmfront["message"],
+ "gbis_strict": eligibility.gbis_warmfront["strict"],
"sap": float(eligibility.epc["current-energy-efficiency"]),
- "gbis_eligible_future": eligibility.gbis["eligible"],
- "gbis_eligible_future_message": eligibility.gbis["message"],
- "eco4_eligible_future": eligibility.eco4["eligible"],
- "eco4_eligible_future_message": eligibility.eco4["message"],
# Property components
"roof": eligibility.roof["clean_description"],
"walls": eligibility.walls["clean_description"],
@@ -806,92 +4029,99 @@ def get_epc_data(
"date_epc": eligibility.epc["lodgement-date"],
"loft_thickness": eligibility.roof["insulation_thickness"],
"cavity_age": cavity_age,
- **eligibility.walls,
- **eligibility.roof,
- "is_estimated": searcher.newest_epc.get("estimated") is not None,
"eligibility_cavity_type": eligibility.eco4_warmfront["cavity_type"],
"eligibility_loft_type": eligibility.eco4_warmfront["loft_type"]
}
)
- scoring_df = pd.DataFrame(scoring_data)
- scoring_df = scoring_df.drop(
- columns=[
- "rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
- "carbon_ending"
- ]
- )
-
- model_api = ModelApi(portfolio_id="-".join([ha_name, "eligibility"]), timestamp=created_at)
-
- # scoring_df["is_community"].value_counts()
- # scoring_df[scoring_df["is_community"] == "Unknown"]
- # property_meta = asset_list[asset_list["asset_list_row_id"] == "ha_67238"].squeeze()
-
- all_predictions = model_api.predict_all(
- df=scoring_df,
- bucket="retrofit-data-dev",
- prediction_buckets={
- "sap_change_predictions": "retrofit-sap-predictions-dev",
- "heat_demand_predictions": "retrofit-heat-predictions-dev",
- "carbon_change_predictions": "retrofit-carbon-predictions-dev"
- }
- )
-
results_df = pd.DataFrame(results)
+ scoring_df = pd.DataFrame(scoring_data)
+ results_df["post_install_sap"] = None
+ results_df["eligibility_classification"] = None
- predictions = all_predictions["sap_change_predictions"].copy()
-
- predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
- results_df[["row_id", "sap"]], how="left", on="row_id"
- )
- predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
- predictions = predictions.groupby("row_id")["sap_uplift"].sum().reset_index()
-
- results_df = results_df.merge(
- predictions[["sap_uplift", "row_id"]],
- how="left",
- on="row_id"
- )
- results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
-
- eligibility_assessment = []
- for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
- # The upgrade requirements are dependent on the current SAP
-
- # If the property is an F or G, it only needs to upgrade to an %
- if row["sap"] <= 38:
- if row["post_install_sap"] >= 57:
- eligibility_classification = "highest confidence"
- elif row["post_install_sap"] >= 55:
- eligibility_classification = "high confidence"
- elif row["post_install_sap"] >= 53:
- eligibility_classification = "medium confidence"
- else:
- eligibility_classification = "unlikely"
- else:
-
- if row["post_install_sap"] >= 71:
- eligibility_classification = "highest confidence"
- elif row["post_install_sap"] >= 69:
- eligibility_classification = "high confidence"
- elif row["post_install_sap"] >= 67:
- eligibility_classification = "medium confidence"
- else:
- eligibility_classification = "unlikely"
-
- eligibility_assessment.append(
- {
- "row_id": row["row_id"],
- "eligibility_classification": eligibility_classification
- }
+ if not scoring_df.empty:
+ scoring_df = scoring_df.drop(
+ columns=[
+ "rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+ "carbon_ending"
+ ]
)
- eligibility_assessment = pd.DataFrame(eligibility_assessment)
+ model_api = ModelApi(portfolio_id="-".join([ha_name, "eligibility"]), timestamp=created_at)
+ model_api.MODEL_PREFIXES = ["sap_change_predictions"]
- results_df = results_df.merge(
- eligibility_assessment, how="left", on="row_id"
- )
+ scoring_df["id"] = scoring_df["id"] + "phase=0"
+ # We split up the scoring_df and score
+ predictions = []
+ to_loop_over = range(0, scoring_df.shape[0], 400)
+ for chunk in tqdm(to_loop_over, total=len(to_loop_over)):
+ predictions_dict = model_api.predict_all(
+ df=scoring_df.iloc[chunk:chunk + 400],
+ bucket="retrofit-data-dev",
+ prediction_buckets={
+ "sap_change_predictions": "retrofit-sap-predictions-dev",
+ }
+ )
+
+ predictions.append(predictions_dict["sap_change_predictions"])
+
+ predictions = pd.concat(predictions)
+ predictions_size = predictions.shape[0]
+
+ predictions = predictions.rename(columns={"property_id": "row_id"}).merge(
+ results_df[["row_id", "sap"]], how="left", on="row_id"
+ )
+ if predictions.shape[0] != predictions_size:
+ raise ValueError("Predictions size has changed")
+ predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"]
+
+ results_df = results_df.merge(
+ predictions[["sap_uplift", "row_id"]],
+ how="left",
+ on="row_id"
+ )
+ results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"]
+
+ eligibility_assessment = []
+ for _, row in results_df[results_df["eco4_eligible"] == True].iterrows():
+ # The upgrade requirements are dependent on the current SAP
+
+ # If the property is an F or G, it only needs to upgrade to an %
+ if row["sap"] <= 38:
+ if row["post_install_sap"] >= 57:
+ eligibility_classification = "highest confidence"
+ elif row["post_install_sap"] >= 55:
+ eligibility_classification = "high confidence"
+ elif row["post_install_sap"] >= 53:
+ eligibility_classification = "medium confidence"
+ else:
+ eligibility_classification = "unlikely"
+ else:
+
+ if row["post_install_sap"] >= 71:
+ eligibility_classification = "highest confidence"
+ elif row["post_install_sap"] >= 69:
+ eligibility_classification = "high confidence"
+ elif row["post_install_sap"] >= 67:
+ eligibility_classification = "medium confidence"
+ else:
+ eligibility_classification = "unlikely"
+
+ eligibility_assessment.append(
+ {
+ "row_id": row["row_id"],
+ "eligibility_classification": eligibility_classification
+ }
+ )
+
+ eligibility_assessment = pd.DataFrame(eligibility_assessment)
+
+ # Make sure the results haven't changed in size
+ results_df = results_df.merge(
+ eligibility_assessment, how="left", on="row_id"
+ )
+ if results_df.shape[0] != len(results):
+ raise ValueError("results has changed size")
# We store the results in S3 as a pickle
save_pickle_to_s3(
@@ -914,430 +4144,769 @@ def get_epc_data(
def get_col_widths(dataframe):
- # First we find the maximum length of the index column
- idx_max = max([len(str(s)) for s in dataframe.index.values] + [len(str(dataframe.index.name))])
- # Then, we concatenate this to the max of the lengths of column name and its max value for each column, row-wise
- return [idx_max] + [max(dataframe[col].astype(str).map(len).max(), len(col)) for col in dataframe.columns]
+ # Define a maximum width for any column to prevent excessively wide columns
+ max_allowed_width = 25
+
+ # Calculate widths for columns
+ widths = []
+
+ if isinstance(dataframe.columns, pd.MultiIndex):
+ # For MultiIndex, calculate max width considering the header and data
+ header_widths = [max(len(str(item)) for item in col) + 2 for col in dataframe.columns.values] # +2 for padding
+ for i, column in enumerate(dataframe.columns):
+ max_data_width = max(dataframe[column].astype(str).apply(len).max(), header_widths[i])
+ widths.append(min(max_data_width, max_allowed_width))
+ else:
+ # For non-MultiIndex, calculate width normally
+ for col in dataframe.columns:
+ # Calculate the max length of data or column name and limit it
+ max_length = max(dataframe[col].astype(str).apply(len).max(), len(str(col)) + 2) # +2 for padding
+ widths.append(min(max_length, max_allowed_width))
+
+ return widths
-def analyse_ha_data(outputs, loader):
- """
- The approach we take within this function is the following:
- For properties that have been identified by warmfront as eligible properties, characterise them by scheme. The
- characterisation can be broken down as the following:
- 1) The property has been identified by Warmfront and is eligible for ECO4/GBIS work, under the strictest criteria
- 2) The property has been identified by Warmfront, however it has a full cavity, and therefore would be subject to
- a CIGA check
- 3) The property has been identified by Warmfront, but the EPC shows that the property has more than 100mm loft
- insulation
- 4) The property has been identified by Warmfront, but doesn't look like a property that would likely qualify under
- any cirsumstances, given the available data
+# def analyse_ha_data(outputs, loader):
+# """
+# The approach we take within this function is the following:
+# For properties that have been identified by warmfront as eligible properties, characterise them by scheme. The
+# characterisation can be broken down as the following:
+# 1) The property has been identified by Warmfront and is eligible for ECO4/GBIS work, under the strictest criteria
+# 2) The property has been identified by Warmfront, however it has a full cavity, and therefore would be subject to
+# a CIGA check
+# 3) The property has been identified by Warmfront, but the EPC shows that the property has more than 100mm loft
+# insulation
+# 4) The property has been identified by Warmfront, but doesn't look like a property that would likely qualify under
+# any cirsumstances, given the available data
+#
+# Then, for any property that has NOT been identifid by Warmfront, we identify properties that look like they would
+# qualify under the strictest criteria, and mark these as potential additional opportunities.
+#
+# :return:
+# """
+#
+# eco4_rate = 1710
+# gbis_rate = 600
+# # old_eco4_rate = 1456
+# old_gbis_rate = 432
+#
+# epc_c_threshold = 80
+# scheme_map = {
+# "ECO4": "ECO4",
+# "AFFORDABLE WARMTH": "ECO4",
+# "ECO4 A/W": "ECO4",
+# "ECO4 GBIS (ECO+)": "GBIS"
+# }
+#
+# ha_analysis_results = []
+# total_revenue_results = []
+# for ha_name, datasets in outputs.items():
+# inputs = [x for k, x in loader.data.items() if k == ha_name][0]
+#
+# results_df = datasets["results_df"].copy()
+#
+# analysis_data = inputs["asset_list"][['asset_list_row_id', "ECO Eligibility"]].rename(
+# columns={"row_meaning": "asset_identification_status"}
+# ).merge(
+# results_df,
+# how="left",
+# right_on="row_id",
+# left_on="asset_list_row_id"
+# )
+#
+# analysis_data["is_remaining"] = True
+#
+# n_sold_eco4 = 0
+# n_sold_gbis = 0
+# if not inputs["survey_list"].empty:
+# # Merge on the survey list and signal everything that is remaining or not (i.e. anything that hasn't had
+# # a survey)
+# survey_list = inputs["survey_list"].copy()
+#
+# # TODO: TEMP
+# scheme_column = survey_list.columns[0]
+# # We clean up the survey list installation or cancelled
+# survey_list["installed_or_cancelled_clean"] = survey_list["INSTALLED OR CANCELLED"].str.lower()
+# # Remove all punctuation
+# survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace(
+# r'[^\w\s]', '', regex=True
+# )
+# # Remove double spaces
+# survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace(
+# r'\s+', ' ', regex=True
+# )
+# # Remove trailing spaces
+# survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.strip()
+#
+# # Remap the values in the scheme column
+# survey_list[scheme_column] = survey_list[scheme_column].replace(scheme_map)
+#
+# survey_list["installation_status"] = None
+# survey_list["installation_status"] = np.where(
+# survey_list["installed_or_cancelled_clean"].isin(["installed", "installed see notes"]),
+# "installed",
+# survey_list["installation_status"]
+# )
+# survey_list["installation_status"] = np.where(
+# survey_list["installed_or_cancelled_clean"].isin(["cancelled"]),
+# "cancelled",
+# survey_list["installation_status"]
+# )
+# # Find partial installations
+# survey_list["installation_status"] = np.where(
+# survey_list["installed_or_cancelled_clean"].str.contains("still to be installed"),
+# "partially installed",
+# survey_list["installation_status"]
+# )
+# # Find partial cancellations
+# # TODO: We might have more indications of partial cancellations
+# survey_list["installation_status"] = np.where(
+# survey_list["installed_or_cancelled_clean"].isin(["loft cancelled"]),
+# "partially cancelled",
+# survey_list["installation_status"]
+# )
+#
+# # Finally, for other cases, we set the status to "in progress"
+# survey_list["installation_status"] = survey_list["installation_status"].fillna("in progress")
+#
+# # We concatenate the scheme name with the installation status
+# survey_list["installation_status"] = (
+# survey_list[scheme_column] + " - " + survey_list["installation_status"]
+# )
+#
+# # TODO: END TEMP
+#
+# survey_list_to_merge = survey_list[["asset_list_row_id", scheme_column]].copy()
+# survey_list_to_merge["is_remaining"] = False
+# analysis_data = analysis_data.drop(columns="is_remaining").merge(
+# survey_list_to_merge,
+# how="left", on="asset_list_row_id"
+# )
+# analysis_data["is_remaining"] = analysis_data["is_remaining"].fillna(True)
+#
+# n_sold_eco4 = survey_list_to_merge[survey_list_to_merge[scheme_column] == "ECO4"].shape[0]
+# n_sold_gbis = survey_list_to_merge[survey_list_to_merge[scheme_column] == "GBIS"].shape[0]
+#
+# # Take just remaining
+# analysis_data = analysis_data[analysis_data["is_remaining"]]
+#
+# # Also, if the HA has started selling, we remove any that are still subject to ciga
+# n_eco4_missed_subject_to_ciga = 0
+# if not inputs["survey_list"].empty:
+# n_eco4_missed_subject_to_ciga = (analysis_data["ECO Eligibility"] == "eco4 (subject to ciga)").sum()
+# analysis_data = analysis_data[analysis_data["ECO Eligibility"] != "eco4 (subject to ciga)"]
+#
+# ################################################################################################
+# # We take the properties that strictly qualified under eco
+# ################################################################################################
+#
+# eco4_identified = analysis_data[analysis_data["ECO Eligibility"] == "eco4"].copy()
+# eco4_identified["identification_type"] = None
+# eco4_identified["identification_type"] = np.where(
+# (eco4_identified["eco4_eligible"] == True) & (eco4_identified["eco4_strict"] == True),
+# "strict",
+# eco4_identified["identification_type"]
+# )
+#
+# # For expansive, the property can be no higher than an EPC C
+# eco4_identified["identification_type"] = np.where(
+# (eco4_identified["eco4_eligible"] == True) & (eco4_identified["eco4_strict"] == False) & (
+# eco4_identified["sap"] <= epc_c_threshold
+# ),
+# "expansive",
+# eco4_identified["identification_type"]
+# )
+# ################################################################################################
+# # We take the properties dependent on CIGA
+# ################################################################################################
+#
+# ciga_dependent_identified = analysis_data[
+# analysis_data["ECO Eligibility"].isin(
+# [
+# "eco4 (subject to ciga)",
+# "eco4 - passed ciga"
+# ]
+# )
+# ].copy()
+#
+# # These are properties that show filled cavity
+# ciga_dependent_identified["identification_type"] = None
+# ciga_dependent_identified["identification_type"] = np.where(
+# ciga_dependent_identified["eco4_message"].isin(
+# [
+# "Perfect suitability",
+# "Meets cavity and sap",
+# "Fails cavity, meets loft, fails SAP",
+# "Meets fabric, fails SAP check",
+# "Meets cavity, loft borderline, meets sap",
+# ]
+# ) & (ciga_dependent_identified["sap"] <= epc_c_threshold),
+# "strict",
+# ciga_dependent_identified["identification_type"]
+# )
+#
+# ciga_dependent_identified["identification_type"] = np.where(
+# ((ciga_dependent_identified["eco4_message"].isin(["Meets just cavity"])) | (
+# ciga_dependent_identified["walls"].isin(["Cavity wall, filled cavity"])
+# )) & (
+# (ciga_dependent_identified["sap"] <= epc_c_threshold) &
+# pd.isnull(ciga_dependent_identified["identification_type"])
+# ),
+# "expansive",
+# ciga_dependent_identified["identification_type"]
+# )
+#
+# ################################################################################################
+# # We properties that qualified for gbis
+# ################################################################################################
+# gbis_identified = analysis_data[analysis_data["ECO Eligibility"] == "gbis"].copy()
+# gbis_identified["identification_type"] = None
+# gbis_identified["identification_type"] = np.where(
+# (gbis_identified["gbis_eligible"] == True) & (gbis_identified["sap"] < 69),
+# "strict",
+# gbis_identified["identification_type"]
+# )
+#
+# gbis_identified["identification_type"] = np.where(
+# (gbis_identified["gbis_eligible"] == True) & (gbis_identified["sap"] <= epc_c_threshold) & (
+# pd.isnull(gbis_identified["identification_type"])
+# ),
+# "expansive",
+# gbis_identified["identification_type"]
+# )
+#
+# # Finally, we look at the properties that have not been identified by Warmfront
+# not_identified = analysis_data[
+# analysis_data["ECO Eligibility"].isin(
+# [
+# "not eligible"
+# ]
+# )
+# ].copy()
+#
+# surplus_eco4 = not_identified[
+# (not_identified["eco4_eligible"] == True) & (not_identified["eco4_message"].isin(
+# ["Perfect suitability", "Meets cavity, loft borderline, meets sap", "Near perfect suitability"]
+# ))
+# ]
+#
+# surplus_gbis = not_identified[
+# (not_identified["gbis_eligible"] == True) & (
+# ~not_identified["asset_list_row_id"].isin(surplus_eco4["asset_list_row_id"].values)
+# ) & (not_identified["sap"] < 69) & (
+# (not_identified["cavity_type"].isin(["empty", "partial insulation"])) | (
+# not_identified["walls"].str.contains("partial", case=False, na=False)
+# )
+# )
+# ]
+# surplus_gbis = surplus_gbis[surplus_gbis["is_estimated"] == False]
+#
+# # Output variables - the data was sent to us in December, but the remaining figures are
+# # what was in November
+# november_remaining = loader.december_figures[loader.december_figures["HA Name"] == ha_name]
+#
+# # ECO4
+# n_properties_remaining_in_asset_list = inputs["asset_list"].shape[0]
+# november_eco4_remaining = max(november_remaining["ECO4 remaining"].values[0], 0)
+# november_eco4_sold = november_remaining["No. of Tech surveys complete - Eco 4"].values[0]
+# eco4_sales_since_november = n_sold_eco4 - november_eco4_sold
+#
+# n_warmfront_identified_eco4 = eco4_identified.shape[0] + ciga_dependent_identified.shape[0]
+# eco4_of_which_identified_strict = (
+# eco4_identified[eco4_identified["identification_type"] == "strict"].shape[0] +
+# ciga_dependent_identified[ciga_dependent_identified["identification_type"] == "strict"].shape[0]
+# )
+# eco4_of_which_identified_expansive = (
+# eco4_identified[eco4_identified["identification_type"] == "expansive"].shape[0] +
+# ciga_dependent_identified[ciga_dependent_identified["identification_type"] == "expansive"].shape[0]
+# )
+# # GBIS
+# n_warmfront_identified_gbis = gbis_identified.shape[0]
+# november_gbis_remaining = max(november_remaining["GBIS remaining"].values[0], 0)
+# november_gbis_sold = november_remaining["No. of Tech surveys complete - GBIS"].values[0]
+# gbis_sales_since_november = n_sold_gbis - november_gbis_sold
+# gbis_of_which_identified_strict = gbis_identified[gbis_identified["identification_type"] == "strict"].shape[0]
+# gbis_of_which_identified_expansive = \
+# gbis_identified[gbis_identified["identification_type"] == "expansive"].shape[0]
+#
+# to_append = {
+# ("", "HA Name"): ha_name,
+# ("", "# properties in asset list"): n_properties_remaining_in_asset_list,
+# ############
+# # ECO4
+# ############
+# ("ECO4", "# remaining November file"): november_eco4_remaining,
+# ("ECO4", "# sold in November file"): november_eco4_sold,
+# ("ECO4", "# sold (survey list)"): n_sold_eco4,
+# ("ECO4", "# that missed CIGA check"): n_eco4_missed_subject_to_ciga,
+# ("ECO4", "# Remaining properties (asset list)"): n_warmfront_identified_eco4,
+# ("ECO4", "Of which identified by model - strict"): eco4_of_which_identified_strict,
+# ("ECO4", "Of which identified by model - expansive"): eco4_of_which_identified_expansive,
+# ("ECO4", "Of which identified by model - total"): (
+# eco4_of_which_identified_strict + eco4_of_which_identified_expansive
+# ),
+# ("ECO4", "Additional properties"): surplus_eco4.shape[0],
+# ############
+# # GBIS
+# ############
+# ("GBIS", "# remaining November file"): november_gbis_remaining,
+# ("GBIS", "# sold in November file"): november_gbis_sold,
+# ("GBIS", "# sold (survey list)"): n_sold_gbis,
+# ("GBIS", "# Remaining properties (asset list)"): n_warmfront_identified_gbis,
+# ("GBIS", "Of which identified by model - strict"): gbis_of_which_identified_strict,
+# ("GBIS", "Of which identified by model - expansive"): gbis_of_which_identified_expansive,
+# ("GBIS", "Of which identified by model - total"): (
+# gbis_of_which_identified_strict + gbis_of_which_identified_expansive
+# ),
+# ("GBIS", "Additional properties"): surplus_gbis.shape[0]
+# }
+#
+# ha_analysis_results.append(to_append)
+#
+# # Calculate the revenue results
+# to_append_revenue = {
+# ("", "HA Name"): ha_name,
+# # Eco4 revenue
+# ("ECO4", "£ remaining November file"): november_eco4_remaining * eco4_rate,
+# ("ECO4", "£ sold November file"): november_eco4_sold * old_eco4_rate,
+# ("ECO4", "£ sold since November"): eco4_sales_since_november * eco4_rate,
+# ("ECO4", "£ stuck at ciga check"): n_eco4_missed_subject_to_ciga * eco4_rate,
+# ("ECO4", "£ remaining (asset list)"): n_warmfront_identified_eco4 * eco4_rate,
+# ("ECO4", "Of which identified by model - strict"): eco4_of_which_identified_strict * eco4_rate,
+# ("ECO4", "Of which identified by model - expansive"): eco4_of_which_identified_expansive * eco4_rate,
+# ("ECO4", "Of which identified by model - total"): eco4_rate * (
+# eco4_of_which_identified_strict + eco4_of_which_identified_expansive
+# ),
+# ("ECO4", "Additional properties"): eco4_rate * surplus_eco4.shape[0],
+# }
+# total_revenue_results.append(to_append_revenue)
+#
+# ha_analysis_results = pd.DataFrame(ha_analysis_results)
+# ha_analysis_results.columns = pd.MultiIndex.from_tuples(ha_analysis_results.columns)
+#
+# facts_and_figures = loader.facts_and_figures.copy()
+# facts_and_figures["ha_number"] = facts_and_figures["HA Name"].str.extract(r'(\d+)').astype(int)
+# facts_and_figures = facts_and_figures.sort_values("ha_number")
+# facts_and_figures = facts_and_figures.drop(columns=["ha_number"])
+#
+# # Rename some of the cols
+# facts_and_figures = facts_and_figures.rename(
+# columns={
+# # ECO4 cols
+# "ECO4": "ECO4 - November",
+# "GBIS": "GBIS - November",
+# "eco4 (subject to ciga)": "ECO4 - subject to ciga",
+# "eco4": "ECO4 - doesn't need CIGA",
+# "eco4 - passed ciga": "ECO4 - passed CIGA",
+# "failed ciga": "ECO4 - failed CIGA",
+# "ECO4 - partially cancelled": "ECO4 - Install downgrade to GBIS",
+# "ECO4 - in progress": "ECO4 - Install in progress",
+# "ECO4 - cancelled": "ECO4 - Install cancelled",
+# # GBIS cols
+# "gbis": "GBIS total (asset list)"
+# }
+# )
+# # We calculate the eco4 total from the asset list
+# # 1) If ciga checks have been completed (i.e. ECO4 - passed ciga > 0) this sum is
+# # ECO4 - doesn't need CIGA + ECO4 - passed CIGA
+# # 2) if ciga checks haven't been completed (i.e. ECO4 - passed ciga is missing), this sum is
+# # ECO4 - doesn't need CIGA + ECO4 - subject to ciga
+# facts_and_figures["ECO4 total (asset list - pre ciga)"] = (
+# facts_and_figures["ECO4 - doesn't need CIGA"] +
+# facts_and_figures["ECO4 - subject to ciga"] +
+# facts_and_figures["ECO4 - passed CIGA"]
+# )
+#
+# facts_and_figures["ECO4 total (asset list - post ciga)"] = None
+# facts_and_figures["ECO4 total (asset list - post ciga)"] = np.where(
+# facts_and_figures["ECO4 - passed CIGA"] > 0,
+# facts_and_figures["ECO4 - doesn't need CIGA"] + facts_and_figures["ECO4 - passed CIGA"],
+# facts_and_figures["ECO4 total (asset list - post ciga)"]
+# )
+#
+# # Re-arrange the columns
+# facts_and_figures = facts_and_figures[
+# [
+# 'HA Name',
+# 'ECO4 - November',
+# 'GBIS - November',
+# 'ECO4 total (asset list - pre ciga)',
+# 'ECO4 total (asset list - post ciga)',
+# 'GBIS total (asset list)',
+# 'ECO4 - subject to ciga',
+# "ECO4 - doesn't need CIGA",
+# 'ECO4 - passed CIGA',
+# 'ECO4 - failed CIGA',
+# 'ECO4 - installed',
+# 'ECO4 - Install in progress',
+# 'ECO4 - Install cancelled',
+# 'ECO4 - partially installed',
+# 'ECO4 - Install downgrade to GBIS',
+# ]
+# ]
+# # Addd a note to flag any rows where ECO4 (
+# # subject to ciga is greater than 0) and (ECO4 - passed ciga is greater than 0
+# # )
+# facts_and_figures["Missed CIGA checks opportunity"] = None
+# facts_and_figures["Missed CIGA checks opportunity"] = np.where(
+# (facts_and_figures["ECO4 - subject to ciga"] > 0) & (facts_and_figures["ECO4 - passed CIGA"] > 0),
+# "potential opportunity of " + facts_and_figures["ECO4 - subject to ciga"].astype(
+# str) + " ECO4 properties needing a CIGA check",
+# facts_and_figures["Missed CIGA checks opportunity"]
+# )
+#
+# facts_and_figures.to_csv("Facts and figures sample.csv")
+#
+# # Re arrage the columns
+#
+# # Also sort ha_analysis_results by ha number
+# ha_analysis_results["ha_number"] = ha_analysis_results[("", "HA Name")].str.extract(r'(\d+)').astype(int)
+# ha_analysis_results = ha_analysis_results.sort_values("ha_number")
+# ha_analysis_results = ha_analysis_results.drop(columns=["ha_number"])
+#
+# # We save 2 sheets
+# # Automate creation of the excel
+# # Create a Pandas Excel writer using XlsxWriter as the engine
+# with pd.ExcelWriter('HA Analysis Results.xlsx', engine='xlsxwriter') as writer:
+# # Write each dataframe to a different worksheet without the index
+# for df, sheet in [(facts_and_figures, 'HA Facts and Figures'),
+# (ha_analysis_results, 'Asset Identification')]:
+#
+# df.to_excel(writer, sheet_name=sheet)
+#
+# # Auto-adjust columns' width
+# for i, width in enumerate(get_col_widths(df)):
+# writer.sheets[sheet].set_column(i, i, width)
+#
+# # Inspection: - Looking into the proportion of homes with "cavity, as built, insulated (assumed)" as their
+# # description, and what proportion of time they get identified via non-invasive surveys
+#
+# # true_eco4_assets = []
+# # ciga_dependent_assets = []
+# # not_eligible = []
+# # as_built_insulated = []
+# # date_cols = {
+# # "HA39": "date_built",
+# # "HA14": "Built In Year",
+# # "HA6": "Construction Year",
+# # "HA1": "Build Date",
+# # "HA107": "YEAR BUILT"
+# # }
+# # for ha_name, data_objects in outputs.items():
+# # inputs = [x for k, x in loader.data.items() if k == ha_name][0]
+# #
+# # date_col = date_cols[ha_name]
+# # results_df = data_objects["results_df"].copy()
+# # df = inputs["asset_list"][['asset_list_row_id', "ECO Eligibility", date_col]].rename(
+# # columns={"row_meaning": "asset_identification_status", date_col: "date_built"}
+# # ).merge(
+# # results_df,
+# # how="left",
+# # right_on="row_id",
+# # left_on="asset_list_row_id"
+# # )
+# #
+# # # take the true ECO4
+# # true_eco4 = df[df["ECO Eligibility"] == "eco4"].copy()
+# # ciga_dependent = df[
+# # df["ECO Eligibility"].isin(
+# # [
+# # "eco4 (subject to ciga)",
+# # "failed ciga",
+# # "eco4 - passed ciga"
+# # ]
+# # )
+# # ]
+# # insulated_assumed = df[df["walls"] == "Cavity wall, as built, insulated"].copy()
+# # # We convert date built to datetime
+# # try:
+# # insulated_assumed = insulated_assumed[~pd.isnull(insulated_assumed["date_built"])]
+# # insulated_assumed["year_built"] = pd.to_datetime(insulated_assumed["date_built"].astype(str)).dt.year
+# # as_built_insulated.append(insulated_assumed)
+# # except Exception as e:
+# # print("oh well")
+# #
+# # true_eco4_assets.append(true_eco4)
+# # ciga_dependent_assets.append(ciga_dependent)
+# #
+# # true_eco4_assets = pd.concat(true_eco4_assets)
+# # ciga_dependent_assets = pd.concat(ciga_dependent_assets)
+# # as_built_insulated = pd.concat(as_built_insulated)
+# #
+# # true_eco4_assets["walls"].value_counts(normalize=True)
+# # ciga_dependent_assets["walls"].value_counts(normalize=True)
+# #
+# # from recommendations.recommendation_utils import extract_insulation_thickness
+# #
+# # true_eco4_assets["roof_insulation_thickness"] = true_eco4_assets["roof"].apply(
+# # lambda x: extract_insulation_thickness(x)
+# # )
+# #
+# # true_eco4_assets["e"] = true_eco4_assets.merge(
+# # pd.DataFrame(cleaned["roof-description"])[["original_description", "insulation_thickness"]],
+# # how="left",
+# # left_on="roof",
+# # right_on="original_description"
+# # )
+# #
+# # true_eco4_assets["sap"].mean()
+# #
+# # true_eco4_assets["insulation_thickness"].isin(
+# # ["250", "150", "200", "100", "75", "50"]
+# # ).sum() / true_eco4_assets.shape[0]
+# #
+# # true_eco4_assets["insulation_thickness"].isin(
+# # ["100"]
+# # ).sum() / true_eco4_assets.shape[0]
+# #
+# # as_built_insulated.groupby("property_type")["ECO Eligibility"].value_counts(normalize=True)
- Then, for any property that has NOT been identifid by Warmfront, we identify properties that look like they would
- qualify under the strictest criteria, and mark these as potential additional opportunities.
- :return:
- """
+def get_propensity_model_data(
+ loader, cleaned, cleaning_data, created_at, photo_supply_lookup,
+ floor_area_decile_thresholds, pull_data=True
+):
+ # TODO: Set a seed!
+ model_data = []
+ for ha_name, data_assets in loader.data.items():
- eco4_rate = 1710
- gbis_rate = 600
+ logger.info("Processing HA: %s", ha_name)
+ if data_assets["survey_list"].empty:
+ continue
- ha_analysis_results = []
- ha_revenue_results = []
- for ha_name, datasets in outputs.items():
+ number_sold = data_assets["survey_list"].shape[0]
+
+ # For each HA, we read pull in the data required, and store in S3
+ asset_list = data_assets["asset_list"].copy()
+ # We determine the number of properties that we should select that are eligible
+ asset_list_size = asset_list.shape[0]
+ # Number eligible
+ n_eligibile = asset_list[asset_list["ECO Eligibility"] != "not eligible"].shape[0]
+ success_rate = n_eligibile / asset_list_size
+ needed_sample_size = np.ceil(number_sold / success_rate)
+ number_negative_samples = int(needed_sample_size - number_sold)
+
+ sold_asset_list_ids = data_assets["survey_list"]["asset_list_row_id"].tolist()
+ negative_sample_asset_list_ids = asset_list["asset_list_row_id"].sample(number_negative_samples).tolist()
+ sample_ids = sold_asset_list_ids + negative_sample_asset_list_ids
+
+ sample_asset_list = asset_list[asset_list["asset_list_row_id"].isin(sample_ids)]
+
+ # In order to have the most confidence, we should take just properties that have 1 EPC. We might need to
+ # cut down the number of properties that we include because of this
+ # Note: This is an imbalanced problem so we will need to build a model accomadating of that
+
+ data = []
+ errors = []
+ for index, property_meta in tqdm(sample_asset_list.iterrows(), total=len(sample_asset_list)):
+
+ if property_meta["matching_postcode"] is None:
+ continue
+
+ property_type, built_form = get_property_type_and_built_form(
+ property_meta=property_meta, ha_name=ha_name
+ )
+
+ searcher = SearchEpc(
+ address1=str(property_meta["HouseNo"]),
+ postcode=property_meta["matching_postcode"],
+ auth_token=EPC_AUTH_TOKEN,
+ os_api_key="",
+ full_address=property_meta["matching_address"]
+ )
+ searcher.ordnance_survey_client.property_type = property_type
+ searcher.ordnance_survey_client.built_form = built_form
+ searcher.find_property(skip_os=True)
+
+ if searcher.newest_epc is None:
+ continue
+
+ if searcher.newest_epc.get("estimated"):
+ # We insert the row ID as our proxy for UPRN
+ searcher.newest_epc["uprn"] = int(property_meta["asset_list_row_id"].split(ha_name)[1])
+
+ newest_epc = searcher.newest_epc
+ older_epcs = searcher.older_epcs
+ full_sap_epc = searcher.full_sap_epc
+
+ # If we have more than 1 EPC for the moment we just continue
+ if older_epcs or full_sap_epc:
+ continue
+ try:
+
+ # We clean up the data
+ epc_records = {
+ 'original_epc': newest_epc.copy(),
+ 'full_sap_epc': full_sap_epc.copy(),
+ 'old_data': older_epcs.copy(),
+ }
+
+ epc_record = EPCRecord(
+ epc_records=epc_records,
+ run_mode="newdata",
+ cleaning_data=cleaning_data
+ )
+
+ # If we have some data, continue
+ data.append(
+ {
+ "ECO Eligibility": property_meta["ECO Eligibility"],
+ "asset_list_row_id": property_meta["asset_list_row_id"],
+ **epc_record.get("prepared_epc")
+ }
+ )
+ except Exception as e:
+ errors.append(
+ {
+ "error": str(e),
+ "asset_list_row_id": property_meta["asset_list_row_id"],
+ "matching_postcode": property_meta["matching_postcode"],
+ "matching_address": property_meta["matching_address"]
+ }
+ )
+
+ data = pd.DataFrame(data)
+ # We store the results in S3 as a pickle
+ save_pickle_to_s3(
+ data=data,
+ bucket_name="retrofit-datalake-dev",
+ s3_file_name=f"propensity_model_data/{ha_name}/train.pickle"
+ )
+
+ # Store the errors
+ if errors:
+ save_pickle_to_s3(
+ data=errors,
+ bucket_name="retrofit-datalake-dev",
+ s3_file_name=f"propensity_model_data/{ha_name}/errors.pickle"
+ )
+
+ model_data.append(data)
+
+ return model_data
+
+
+def conversion_model(loader):
+ # Read in the model data
+
+ model_data = []
+ for ha_name in loader.data.keys():
+ try:
+ picked = read_pickle_from_s3(
+ bucket_name="retrofit-datalake-dev",
+ s3_file_name=f"propensity_model_data/{ha_name}/train.pickle"
+ )
+ data = pd.DataFrame(picked)
+
+ # We merge on the sales data
+ sales_data = loader.data[ha_name]["survey_list"].copy()
+ data = data.merge(
+ sales_data[["asset_list_row_id", "installation_status"]],
+ how="left",
+ on="asset_list_row_id"
+ )
+ data["ha_name"] = ha_name
+
+ except Exception as e:
+ logger.error("Error reading in the data for %s", ha_name)
+ continue
+
+ model_data.append(data)
+
+ model_data = pd.concat(model_data)
+
+ model_data["response"] = model_data["installation_status"].isin(
+ [
+ "ECO4 - in progress",
+ "ECO4 - installed"
+ ]
+ ).astype(int)
+
+ # Because of how we pulled the data, we need to re-balance the sample
+ ha_names = model_data["ha_name"].unique()
+
+ balanced_sample = []
+ for ha_name in ha_names:
+ df = model_data[model_data["ha_name"] == ha_name]
+ positive_samples = df[df["response"] == 1]
+ negative_samples = df[df["response"] != 1]
inputs = [x for k, x in loader.data.items() if k == ha_name][0]
- # TODO: This is placeholder because we don't have the schemes that the properties have been qualified for
- # yet
- #
- import random
- randomly_allocated_schemes = random.choices(["ECO4", "GBIS"], k=inputs["asset_list"].shape[0])
- inputs["asset_list"]["randomly_allocated_schemes"] = randomly_allocated_schemes
- inputs["asset_list"]["funding_scheme"] = None
- inputs["asset_list"]["funding_scheme"] = np.where(
- inputs["asset_list"]["row_meaning"] == "identified potential eco works (CWI)",
- inputs["asset_list"]["randomly_allocated_schemes"],
- inputs["asset_list"]["funding_scheme"]
- )
+ asset_list = inputs["asset_list"].copy()
+ asset_list_size = asset_list.shape[0]
+ n_eligibile = asset_list[asset_list["ECO Eligibility"] != "not eligible"].shape[0]
+ success_rate = n_eligibile / asset_list_size
+ needed_sample_size = np.ceil(positive_samples.shape[0] / success_rate)
+ number_negative_samples = int(needed_sample_size - positive_samples.shape[0])
+ negative_samples_subset = negative_samples.sample(number_negative_samples)
- # TODO: Also temp, just for HA 6
- if ha_name == "ha_6":
- inputs["survey_list"]["funding_scheme"] = None
- inputs["survey_list"]["funding_scheme"] = np.where(
- inputs["survey_list"][
- 'AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION '] == "AFFORDABLE WARMTH",
- "ECO4",
- "GBIS"
- )
+ output = pd.concat([positive_samples, negative_samples_subset])
- # End placholder
+ balanced_sample.append(output)
- results_df = datasets["results_df"].copy()
+ balanced_sample = pd.concat(balanced_sample)
- analysis_data = inputs["asset_list"][['asset_list_row_id', "row_meaning", "funding_scheme"]].rename(
- columns={"row_meaning": "asset_identification_status"}
- ).merge(
- results_df,
- how="left",
- right_on="row_id",
- left_on="asset_list_row_id"
- )
-
- # We now merge the survey list onto the analysis data and remove anything that is sold, to give us just what is
- # remaining
-
- if inputs["matched_lookup"] is not None:
- analysis_data = analysis_data.merge(
- inputs["matched_lookup"], how="left", on="asset_list_row_id"
- )
- # Drop any rows that have a survey_list_row_id
- analysis_data = analysis_data[pd.isnull(analysis_data["survey_list_row_id"])]
-
- # If we have a survey list, we merge this onto the results
- n_properties_in_asset_list = analysis_data["asset_list_row_id"].nunique()
-
- properties_sold = (
- inputs["survey_list"].groupby("funding_scheme")["survey_list_row_id"].nunique().reset_index() if
- inputs["survey_list"] is not None else pd.DataFrame(columns=["funding_scheme"])
- )
- properties_sold_eco4 = (
- properties_sold[properties_sold["funding_scheme"] == "ECO4"]["survey_list_row_id"].values[0] if
- (not properties_sold.empty) and ("ECO4" in properties_sold["funding_scheme"].values) else 0
- )
- properties_sold_gbis = (
- properties_sold[properties_sold["funding_scheme"] == "GBIS"]["survey_list_row_id"].values[0] if
- (not properties_sold.empty) and ("GBIS" in properties_sold["funding_scheme"].values) else 0
- )
-
- # We now calculate the number of remaining properties, by scheme
- remaining_properties = analysis_data[
- analysis_data["asset_identification_status"] == "identified potential eco works (CWI)"
- ].copy()
- remaining_properties["prospect_type"] = None
-
- remaining_properties_by_scheme = (
- remaining_properties.groupby("funding_scheme")["asset_list_row_id"].nunique().reset_index()
- )
-
- n_remaining_properties_eco4 = remaining_properties_by_scheme[
- remaining_properties_by_scheme["funding_scheme"] == "ECO4"
- ]["asset_list_row_id"].values[0]
-
- n_remaining_properties_gbis = remaining_properties_by_scheme[
- remaining_properties_by_scheme["funding_scheme"] == "GBIS"
- ]["asset_list_row_id"].values[0]
-
- # For the remaining properties, we use the results of the eligibility process to classify the property into
- # one of multiple categories
- #
- # For properties that have been identified as ECO4
- # 1) Strict ECO4 candidate - Has required fabric and EPC is D or below. We consider D or below here, because
- # Warmfront regularly re-surveys properties which then fall within the SAP requirement
- # - This is not the very strictest definition of ECO4 eligible, but we aim to characterise the properties
- # here and re-surveying is a common practicce by Warmfront. Additionally, many of the social homes have
- # very old EPCs which may score lower when re-done
- # 2) Meets Fabric requirements, not SAP
- # Warmfront has identified the property as eligible, but the EPC is not D or below. We consider this but
- # label is separately as not a strict
- # 3) Subject to CIGA check - Meets loft conditions but shows a filled cavity.
- # - we don't have a SAP constraint here because the EPC is (currently) showing what the property might
- # actually look like after retrofit and so the EPC currently being a C or above means little, because
- # the updated EPC, showing an empty cavity, could bring the property within
- # 4) Loft insulation too thick - Meets empty cavity but shows a loft with between 101 and 270mm insulation.
- # - No SAP constraint, for the same reason as in category 2)
- # 5) Looks like GBIS instead
- # 6) Does not look like ECO4 candidate
- #
- # For properties that have been identified as GBIS
- # 1) Strict GBIS candidates
- # 2) Properties that actually look like strict GBIS candidates
- # 3) Subject to CIGA check - Filled cavity
- # 4) Does not look like a GBIS candidate
-
- remaining_eco4_df = remaining_properties[
- remaining_properties["funding_scheme"] == "ECO4"
- ].copy()
-
- ####################################
- # ECO4
- ####################################
-
- # 1) We identify this if:
- # - remaining_properties["eco4_eligible"] == True
-
- remaining_eco4_df["prospect_type"] = np.where(
- (remaining_eco4_df["eco4_eligible"] == True),
- "strict ECO4",
- remaining_eco4_df["prospect_type"]
- )
-
- # 2) Meets fabric requirements
- remaining_eco4_df["prospect_type"] = np.where(
- (
- (remaining_eco4_df["eco4_message"] == "sap too high") &
- remaining_eco4_df["eligibility_cavity_type"].isin(["partial", "empty"]) &
- remaining_eco4_df["eligibility_loft_type"].isin(["0-100mm"]) &
- pd.isnull(remaining_eco4_df["prospect_type"])
- ),
- "ECO4 if SAP downgrade",
- remaining_eco4_df["prospect_type"]
- )
-
- # 3) We identify this if it has a filled cavity but meets the loft conditions
- # TODO: Consider if we should also allow 100-270mm or if we should add some slight tolerance (e.g. 150mm)
- # to account for measurement error
- remaining_eco4_df["prospect_type"] = np.where(
- (
- remaining_eco4_df["eligibility_cavity_type"].isin(["full"]) &
- remaining_eco4_df["eligibility_loft_type"].isin(["0-100mm"])
- ),
- "ECO4 - Filled cavity - subject to CIGA check",
- remaining_eco4_df["prospect_type"]
- )
-
- # 4) We identify this by ensuring the cavity if empty or partial, and the loft has between 101 and 270mm
- remaining_eco4_df["prospect_type"] = np.where(
- (
- remaining_eco4_df["eligibility_cavity_type"].isin(["empty", "partial"]) &
- remaining_eco4_df["eligibility_loft_type"].isin(["100-270mm"])
- ),
- "ECO4 prospect - empty cavity, loft insulation below regulation",
- remaining_eco4_df["prospect_type"]
- )
-
- # 5) Looks like GBIS instead
- remaining_eco4_df["prospect_type"] = np.where(
- (remaining_eco4_df["gbis_eligible"] == True) & pd.isnull(remaining_eco4_df["prospect_type"]),
- "Looks like GBIS",
- remaining_eco4_df["prospect_type"]
- )
-
- # 6) This is everything else (i.e. both the cavity is full and the loft insulation is above 100mm)
- remaining_eco4_df["prospect_type"] = remaining_eco4_df["prospect_type"].fillna(
- "Does not look like ECO4 candidate"
- )
-
- ####################################
- # GBIS
- ####################################
-
- remaining_gbis = remaining_properties[
- remaining_properties["funding_scheme"] == "GBIS"
- ].copy()
-
- # 1) Strict GBIS candidates
- remaining_gbis["prospect_type"] = np.where(
- (
- (remaining_gbis["gbis_eligible"] == True) & (remaining_gbis["eco4_eligible"] == False)
- ),
- "strict GBIS",
- remaining_gbis["prospect_type"]
- )
-
- # 2) GBIS candidates that look like strict ECO4 candidates
- remaining_gbis["prospect_type"] = np.where(
- (remaining_gbis["eco4_eligible"] == True),
- "GBIS - Upgradable to ECO4",
- remaining_gbis["prospect_type"]
- )
-
- # 3) Subject to CIGA check - Filled cavity
- remaining_gbis["prospect_type"] = np.where(
- (
- remaining_gbis["eligibility_cavity_type"].isin(["full"]) &
- pd.isnull(remaining_gbis["prospect_type"])
- ),
- "GBIS - Filled cavity - subject to CIGA check",
- remaining_gbis["prospect_type"]
- )
-
- # 4) Everything else
- remaining_gbis["prospect_type"] = remaining_gbis["prospect_type"].fillna(
- "Does not look like GBIS candidate"
- )
-
- ####################################
- # Surplus properties
- ####################################
-
- # Take properties that were not identified by Warmfront and identify those that look like they would qualify
- # under the strictest criteria
- surplus_df = analysis_data[
- analysis_data["asset_identification_status"] != "identified potential eco works (CWI)"
- ].copy()
-
- eco4_surplus = surplus_df[
- (
- (surplus_df["eco4_eligible"] == True) & (surplus_df["eco4_message"] == "subject to post retrofit sap") &
- (
- surplus_df["eligibility_classification"].isin(
- ["high confidence", "highest confidence", "medium confidence"]
- )
- )
- )
- ].copy()
-
- gbis_surplus = surplus_df[
- (
- (surplus_df["gbis_eligible"] == True) & (surplus_df["eco4_eligible"] == False) & (
- surplus_df["eligibility_cavity_type"].isin(["empty", "partial"])
- )
- )
- ].copy()
-
- # Perform some checks to make sure we have all of the values
- remaining_eco4_dict = remaining_eco4_df["prospect_type"].value_counts().to_dict()
- if n_remaining_properties_eco4 != sum([v for k, v in remaining_eco4_dict.items()]):
- raise ValueError(
- "Number of remaining properties does not match the number of properties in remaining ECO4 dict"
- )
-
- remaining_gbis_dict = remaining_gbis["prospect_type"].value_counts().to_dict()
- if n_remaining_properties_gbis != sum([v for k, v in remaining_gbis_dict.items()]):
- raise ValueError(
- "Number of remaining properties does not match the number of properties in remaining GBIS dict"
- )
-
- to_append = {
- "ha_name": ha_name,
- "n_properties_in_asset_list": n_properties_in_asset_list,
- ############
- # ECO4
- ############
- "properties_sold_eco4": properties_sold_eco4,
- "n_remaining_properties_eco4": n_remaining_properties_eco4,
- **remaining_eco4_dict,
- ############
- # GBIS
- ############
- "properties_sold_gbis": properties_sold_gbis,
- "n_remaining_properties_gbis": n_remaining_properties_gbis,
- **remaining_gbis_dict,
- ############
- # GBIS
- ############
- "n_eco4_surplus": eco4_surplus.shape[0],
- "n_gbis_surplus": gbis_surplus.shape[0],
- }
-
- ha_analysis_results.append(to_append)
-
- revenue_to_append = {
- "ha_name": ha_name,
- "£ Remaining from asset list": (
- n_remaining_properties_eco4 * eco4_rate + n_remaining_properties_gbis * gbis_rate
- ),
- "Of which: Strict": (
- to_append.get('strict ECO4', 0) * eco4_rate + to_append.get('strict GBIS', 0) * gbis_rate +
- to_append.get('GBIS - Upgradable to ECO4', 0) * gbis_rate
- ),
- "Of which: Subject to CIGA": (
- to_append.get("ECO4 - Filled cavity - subject to CIGA check", 0) * eco4_rate +
- to_append.get("GBIS - Filled cavity - subject to CIGA check", 0) * gbis_rate
- ),
- "Of which: Prospect, not perfect strict prospect": (
- to_append.get("ECO4 prospect - empty cavity, loft insulation below regulation", 0) * eco4_rate +
- to_append.get("ECO4 if SAP downgrade", 0) * eco4_rate
- ),
- "Of which: Potential downgrade to GBIS": to_append["Looks like GBIS"] * eco4_rate,
- "Of which: Does not look like prospect": (
- to_append.get("Does not look like ECO4 candidate", 0) * eco4_rate +
- to_append.get("Does not look like GBIS candidate", 0) * gbis_rate
- ),
- "Surplus: Unidentified properties": eco4_surplus.shape[0] * eco4_rate + gbis_surplus.shape[0] * gbis_rate,
- "Surplus: GBIS Updates to ECO4": to_append.get("GBIS - Upgradable to ECO4", 0) * (eco4_rate - gbis_rate)
- }
-
- # Perform a quick check:
- if revenue_to_append["£ Remaining from asset list"] - (
- revenue_to_append["Of which: Strict"] + revenue_to_append["Of which: Subject to CIGA"] +
- revenue_to_append["Of which: Prospect, not perfect strict prospect"] +
- revenue_to_append["Of which: Potential downgrade to GBIS"] +
- revenue_to_append["Of which: Does not look like prospect"]
- ) > 1:
- raise ValueError("Error between top level revenue figures and breakdown - investigate me")
-
- ha_revenue_results.append(revenue_to_append)
-
- ha_analysis_results = pd.DataFrame(ha_analysis_results)
- ha_revenue_results = pd.DataFrame(ha_revenue_results)
-
- # Automate creation of the excel
- # Create a Pandas Excel writer using XlsxWriter as the engine
- with pd.ExcelWriter('HA Analysis - batch3.xlsx', engine='xlsxwriter') as writer:
- # Write each dataframe to a different worksheet without the index
- for df, sheet in [(ha_revenue_results, 'Total Revenue'),
- (ha_analysis_results, 'By ECO4 and GBIS')]:
-
- df.to_excel(writer, sheet_name=sheet, index=False)
-
- # Auto-adjust columns' width
- for i, width in enumerate(get_col_widths(df)):
- writer.sheets[sheet].set_column(i, i, width)
-
-
-def app():
- """
- This app contains the housin association analysis for HAs 1, 6, 14, 39 and 107.
- Only HA 6 has surveys
- :return:
- """
-
- use_cache = False
-
- # List all of the data in the folder
- directories = [str(list(entry.iterdir())[0]) for entry in DATA_FOLDER.iterdir() if entry.is_dir()]
-
- files = {
- "ha_1": {
- "asset_list": {
- "filepath": "local_data/ha_data/HA1/ACCENT GROUP.xlsx",
- "sheetname": "Energy data"
- }
- },
- "ha_6": {
- "asset_list": {
- "filepath": "etl/eligibility/ha_15_32/HA 6 - ASSET LIST.xlsx",
- "sheetname": "HA 6"
- },
- "survey_list": {
- "filepath": "etl/eligibility/ha_15_32/HA 6 - SURVEY LIST.xlsx",
- "sheetname": "HA 6"
- }
- },
- "ha_14": {
- "asset_list": {
- "filepath": "etl/eligibility/ha_15_32/HA 14 - ASSET LIST.xlsx",
- "sheetname": "HA 14"
- }
- },
- "ha_39": {
- "asset_list": {
- "filepath": "etl/eligibility/ha_15_32/HA 39 - ASSET LIST.xlsx",
- "sheetname": "Sheet1"
- }
- },
- "ha_107": {
- "asset_list": {
- "filepath": "etl/eligibility/ha_15_32/HA 107 - ASSET LIST.xlsx",
- "sheetname": "HA 107"
- }
- }
- }
-
- loader = DataLoader(directories, use_cache)
- loader.load()
-
- # TODO: We probably need to make sure that we have all of the columns that we need
-
- # We load in the additional data required to perform the analysis
-
- cleaned = read_from_s3(
- s3_file_name="cleaned_epc_data/cleaned.bson",
- bucket_name="retrofit-data-dev"
+ # We work with a small sample
+ # Drop the ECO Eligibility column and installation_status column
+ # We keep the ID column
+ balanced_sample = balanced_sample.drop(
+ columns=['ECO Eligibility', 'asset_list_row_id', 'address', 'uprn_source', 'address3', 'local_authority_label',
+ 'county', 'postcode', 'constituency', 'local_authority', 'inspection_date', 'address1',
+ 'constituency_label', 'building_reference_number', 'address2', 'posttown', 'lodgement_datetime',
+ 'uprn', 'lodgement_date', 'lmk_key', 'installation_status', 'ha_name']
)
- cleaned = msgpack.unpackb(cleaned, raw=False)
+ # POC model
+ df = balanced_sample.copy()
+ # FIll missings with means, if they exist
+ numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
+ df[numeric_cols] = df[numeric_cols].fillna(df[numeric_cols].mean())
+
+ categorical_cols = df.select_dtypes(include=['object', 'category']).columns
+ df[categorical_cols] = df[categorical_cols].fillna("other")
+
+ # Reduce the number of categories to a specific number and the rest to other
+ max_n_categories = 10
+ for col in categorical_cols:
+ top_categories = df[col].value_counts().nlargest(max_n_categories).index
+ df[col] = df[col].where(df[col].isin(top_categories), other="other")
+
+ # Use a model based approach to feature selection
+ import xgboost as xgb
+ from sklearn.model_selection import train_test_split
+
+ # Assuming your outcome column is named 'target'
+ X = df.drop(columns=['response'])
+ y = df['response']
+ df["low_energy_fixed_light_count"].va
+
+ # Encoding categorical variables if not already done
+ X = pd.get_dummies(X, drop_first=True)
+
+ # Splitting the data into train and test sets
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+
+ # Initialize an XGBoost classifier
+ model = xgb.XGBClassifier()
+
+ # Fit the model
+ model.fit(X_train, y_train)
+
+ # Get feature importances
+ feature_importances = model.feature_importances_
+
+ # Map feature importances to their corresponding column names
+ feature_importance_dict = {feature: importance for feature, importance in zip(X.columns, feature_importances)}
+
+ # Sort features by importance
+ sorted_features = sorted(feature_importance_dict.items(), key=lambda item: item[1], reverse=True)
+
+ # Display sorted features
+ for feature, importance in sorted_features:
+ print(f"{feature}: {importance}")
+
+
+def patch_cleaned(cleaned):
# Patch to handle the a missing description
cleaned["floor-description"].extend(
[
@@ -1354,7 +4923,6 @@ def app():
]
)
- # We treat unknown loft insulation as no insulation
cleaned["roof-description"].extend(
[
{'original_description': 'Pitched, Unknown loft insulation', 'clean_description': 'Pitched, no insulation',
@@ -1365,6 +4933,76 @@ def app():
]
)
+ cleaned["roof-description"].extend(
+ [
+ {'original_description': 'Pitched, Unknown loft insulation', 'clean_description': 'Pitched, no insulation',
+ 'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_pitched': True,
+ 'is_roof_room': False,
+ 'is_loft': False, 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False, 'is_assumed': True,
+ 'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': 'none'}
+ ]
+ )
+
+ cleaned["roof-description"].extend(
+ [
+ {'original_description': 'Pitched, 300+mm loft insulation',
+ 'clean_description': 'Pitched, 300+ mm loft insulation', 'thermal_transmittance': None,
+ 'thermal_transmittance_unit': None, 'is_pitched': True, 'is_roof_room': False, 'is_loft': True,
+ 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False, 'is_assumed': False,
+ 'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': '300+'
+ }
+ ]
+ )
+
+ thermal_transmittance_values = list(np.arange(0, 2, 0.01))
+ for ttv in thermal_transmittance_values:
+ ttv_roundeded = round(ttv, 2)
+ # We look for an instance of that thermal transmittance value
+ rec = [
+ x for x in cleaned["roof-description"] if
+ (x["thermal_transmittance"] == ttv_roundeded) and "Average thermal transmittance" in x["clean_description"]
+ ]
+
+ if rec:
+ continue
+ else:
+ # We patch the record
+ cleaned["roof-description"].extend(
+ [{'original_description': f'Average thermal transmittance {ttv_roundeded} W/m-¦K',
+ 'clean_description': f'Average thermal transmittance {ttv_roundeded} w/m-¦k',
+ 'thermal_transmittance': ttv_roundeded,
+ 'thermal_transmittance_unit': 'w/m-¦k', 'is_pitched': False, 'is_roof_room': False, 'is_loft': False,
+ 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False, 'is_assumed': False,
+ 'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': None}]
+ )
+
+ # We also patch a funny unit value we found
+ for ttv in thermal_transmittance_values:
+ ttv_rounded = round(ttv, 2)
+ # We look for an instance of that thermal transmittance value
+ rec = [
+ x for x in cleaned["roof-description"] if
+ (x["thermal_transmittance"] == ttv_rounded) and "Average thermal transmittance" in x["clean_description"]
+ and x["thermal_transmittance_unit"] == "w/m?K"
+ ]
+
+ if rec:
+ continue
+ else:
+ # We patch the record
+ ttv_string = str(ttv_rounded)
+ if len(ttv_string) == 3:
+ ttv_string = f"{ttv_string}0"
+
+ cleaned["roof-description"].extend(
+ [{'original_description': f'Average thermal transmittance {ttv_string} W/m?K',
+ 'clean_description': f'Average thermal transmittance {ttv_string} w/m-¦k',
+ 'thermal_transmittance': ttv_rounded,
+ 'thermal_transmittance_unit': 'w/m-¦k', 'is_pitched': False, 'is_roof_room': False, 'is_loft': False,
+ 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False, 'is_assumed': False,
+ 'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': None}]
+ )
+
# Patch mainheatcont-description
cleaned["mainheatcont-description"].extend(
[
@@ -1381,54 +5019,2268 @@ def app():
x["another_property_below"] = True
x["thermal_transmittance"] = 0
- cleaning_data = read_dataframe_from_s3_parquet(
- bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+ return cleaned
+
+
+def calculate_eco4_post_ciga(
+ eligiblity_counts, input_data, ha_ciga_conversion_rate, ha_ciga_pass_to_sale_rate, ha_eco4_to_sale_rate,
+ eco4_rate, archetype_conversion_rate
+):
+ remaining_needing_ciga_check = eligiblity_counts[
+ eligiblity_counts["ECO Eligibility"].str.contains("subject to ciga") &
+ ~eligiblity_counts["ECO Eligibility"].str.contains("subject to archetype")
+ ]["count"].sum()
+
+ remaining_needing_ciga_and_archetype_check = eligiblity_counts[
+ eligiblity_counts["ECO Eligibility"].str.contains("subject to ciga") &
+ eligiblity_counts["ECO Eligibility"].str.contains("subject to archetype")
+ ]["count"].sum()
+ # We scale this down by the archetype_conversion_rate, and add this on to the remaining_needing_ciga_check
+ remaining_needing_ciga_and_archetype_check_passed = np.round(
+ remaining_needing_ciga_and_archetype_check * archetype_conversion_rate
)
- created_at = datetime.now().isoformat()
+ remaining_needing_ciga_check += remaining_needing_ciga_and_archetype_check_passed
- photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+ eco4_no_ciga_needed = eligiblity_counts[
+ eligiblity_counts["ECO Eligibility"] == "eco4"
+ ]["count"].sum()
- outputs = get_epc_data(
- loader, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds, pull_data=False
+ eco4_no_ciga_archetype_needed = eligiblity_counts[
+ eligiblity_counts["ECO Eligibility"] == "eco4 (subject to archetype)"
+ ]["count"].sum()
+ eco4_no_ciga_archetype_needed_passed = np.round(
+ eco4_no_ciga_archetype_needed * archetype_conversion_rate
)
- # for ha_name, datasets in outputs.items():
- # datasets["results_df"] = datasets["results_df"].drop(
- # columns=["eligibility_cavity_type", "eligibility_loft_type"]
- # )
+ eco4_no_ciga_needed += eco4_no_ciga_archetype_needed_passed
+
+ failed_archetype_check = int(
+ remaining_needing_ciga_and_archetype_check +
+ eco4_no_ciga_archetype_needed -
+ remaining_needing_ciga_and_archetype_check_passed -
+ eco4_no_ciga_archetype_needed_passed
+ )
+
+ has_ciga_check = not input_data["ciga_list"].empty
+ if has_ciga_check:
+
+ eco4_ciga_passed = eligiblity_counts[
+ eligiblity_counts["ECO Eligibility"] == "eco4 - passed ciga"
+ ]["count"].sum()
+
+ eco4_confirmed_ciga_failures = eligiblity_counts[
+ eligiblity_counts["ECO Eligibility"] == "failed ciga"
+ ]["count"].sum()
+
+ eco4_no_ciga_needed_or_ciga_passed = eco4_no_ciga_needed + eco4_ciga_passed
+
+ eco4_confirmed = np.round(
+ (eco4_no_ciga_needed * ha_eco4_to_sale_rate) +
+ (eco4_ciga_passed * ha_ciga_pass_to_sale_rate)
+ )
+
+ eco4_no_ciga_needed_cancellations = int(eco4_no_ciga_needed_or_ciga_passed - eco4_confirmed)
+
+ if remaining_needing_ciga_check > 0:
+ # We update the eco4 post ciga with the converted remaining
+ eco4_ciga_expected_remaining_to_pass = np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
+
+ eco4_remaining_forecast = np.round(
+ eco4_ciga_expected_remaining_to_pass * ha_ciga_pass_to_sale_rate
+ )
+ eco4_ciga_needed_cancellations = eco4_ciga_expected_remaining_to_pass - eco4_remaining_forecast
+ eco4_estimated_ciga_failures = remaining_needing_ciga_check - eco4_ciga_expected_remaining_to_pass
+ eco4_post_ciga = eco4_confirmed + eco4_remaining_forecast
+ else:
+ eco4_remaining_forecast = 0
+ eco4_estimated_ciga_failures = 0
+ eco4_ciga_needed_cancellations = 0
+ eco4_post_ciga = eco4_confirmed
+
+ eco4_expected_cancellations = eco4_no_ciga_needed_cancellations + eco4_ciga_needed_cancellations
+ else:
+ eco4_confirmed_ciga_failures = 0
+ # Multiply by sale conversion
+ eco4_confirmed = np.round(eco4_no_ciga_needed * ha_eco4_to_sale_rate)
+ eco4_no_ciga_cancellations = int(eco4_no_ciga_needed - eco4_confirmed)
+ eco4_ciga_expected_remaining_to_pass = np.round(remaining_needing_ciga_check * ha_ciga_conversion_rate)
+ eco4_estimated_ciga_failures = remaining_needing_ciga_check - eco4_ciga_expected_remaining_to_pass
+
+ eco4_remaining_forecast = np.round(
+ eco4_ciga_expected_remaining_to_pass * ha_ciga_pass_to_sale_rate
+ )
+ eco4_ciga_cancellations = int(eco4_ciga_expected_remaining_to_pass - eco4_remaining_forecast)
+ eco4_post_ciga = eco4_confirmed + eco4_remaining_forecast
+
+ eco4_expected_cancellations = eco4_no_ciga_cancellations + eco4_ciga_cancellations
+
+ eco4_post_ciga = int(eco4_post_ciga)
+ eco4_remaining_forecast = int(eco4_remaining_forecast)
+ eco4_confirmed = int(eco4_confirmed)
+
+ results = {
+ # Counts
+ "ECO4 - post CIGA - #": eco4_post_ciga,
+ "Of which confirmed - #": eco4_confirmed,
+ "Of which forecast - #": eco4_remaining_forecast,
+ # Revenue
+ "ECO4 - post CIGA - £": eco4_post_ciga * eco4_rate,
+ "Of which confirmed - £": eco4_confirmed * eco4_rate,
+ "Of which forecast - £": eco4_remaining_forecast * eco4_rate,
+ # Archetype check failures
+ "Estimated total - failed archetype check - #": failed_archetype_check,
+ "Estimated total - failed archetype check - £": failed_archetype_check * eco4_rate,
+ # Ciga failures
+ "Estimated total - failed CIGA": int(eco4_confirmed_ciga_failures + eco4_estimated_ciga_failures),
+ "Confirmed CIGA failures": eco4_confirmed_ciga_failures,
+ "Estimated CIGA failures": int(eco4_estimated_ciga_failures),
+ # Ciga failures cost
+ "Estimated total - failed CIGA - £": int(
+ (eco4_confirmed_ciga_failures + eco4_estimated_ciga_failures) * eco4_rate
+ ),
+ "Confirmed CIGA failures - £": int(eco4_confirmed_ciga_failures * eco4_rate),
+ "Estimated CIGA failures - £": int(eco4_estimated_ciga_failures * eco4_rate),
+ # Expected cencellations
+ "Expected cancellations - #": eco4_expected_cancellations,
+ "Expected cancellations - £": eco4_expected_cancellations * eco4_rate
+ }
+
+ return results
+
+
+def forecast_remaining_sales(loader):
+ # Assumptions:
+ # We cap the ciga conversion rate at 75% because I expect future HAs to have a lower CIGA conversion rate
+ # and I don't want the numbers to change too much, depenent on the CIGA conversation rate
+ maximum_ciga_conversion = 0.75
+
+ # This is a hard limit to the allowed conversion rates to final sale. These are typically very
+ # high but there are some anomalies, amongst surveys that are early on
+ sales_conversion_lower_bound = 0.8
+
+ gbis_rate = 600
+ eco4_rate = 1710
+
+ # Based on ONS https://www.ons.gov.uk/peoplepopulationandcommunity/housing/bulletins/housingenglandandwales
+ # /census2021
+ # there are 5.7 million terraced properties in the UK, of the 19.3 million houses or bungalows. We therefore apply
+ # a 30% discount to homes that are dependent on an archetype check, since around 30% of them will be mid terraced
+ # This 30% is slightly harsh but we be conservative
+ # Therefore, the archetype check conversion rate is 70%
+ archetype_conversion_rate = 0.7
+
+ # 1) Calculate the conversion rate from passed CIGA to actual sale
+ converted_ciga_jobs = []
+ for ha_name, input_data in loader.data.items():
+ asset_list = input_data["asset_list"].copy()
+ survey_list = input_data["survey_list"].copy()
+
+ if survey_list.empty:
+ continue
+
+ ciga_dependent_assets = asset_list[
+ asset_list["ECO Eligibility"] == "eco4 - passed ciga"
+ ]
+
+ # These are now the ciga dependent assets at installation
+ ciga_dependent_assets_at_installation = ciga_dependent_assets.merge(
+ survey_list[["asset_list_row_id", "installation_status"]],
+ how="inner",
+ on="asset_list_row_id"
+ )
+
+ # We then calculate how many get cancelled
+ ciga_dependent_assets_sold = ciga_dependent_assets_at_installation[
+ ciga_dependent_assets_at_installation["installation_status"].isin(
+ [
+ "ECO4 - installed", "ECO4 - in progress"
+ ]
+ )
+ ]
+
+ ciga_dependent_assets_failed = ciga_dependent_assets_at_installation[
+ ~ciga_dependent_assets_at_installation["installation_status"].isin(
+ [
+ "ECO4 - installed", "ECO4 - in progress"
+ ]
+ )
+ ]
+
+ converted_ciga_jobs.append(
+ {
+ "HA Name": ha_name,
+ "# Ciga dependent at installation": ciga_dependent_assets_at_installation.shape[0],
+ "# Ciga dependent successfully installed": ciga_dependent_assets_sold.shape[0],
+ "# Ciga dependent failed install": ciga_dependent_assets_failed.shape[0]
+ }
+ )
+
+ converted_ciga_jobs = pd.DataFrame(converted_ciga_jobs)
+
+ # We calculate a ciga pass to install conversaion rate
+ median_ciga_pass_to_install = (
+ converted_ciga_jobs["# Ciga dependent successfully installed"].sum() /
+ converted_ciga_jobs["# Ciga dependent at installation"].sum()
+ )
+
+ # 2) Calculate the conversion rate from CIGA dependent to ciga passed
+ ciga_passrates = []
+ for ha_name, input_data in loader.data.items():
+
+ # If we don't have a ciga list, we can't do anything
+ if input_data["ciga_list"].empty:
+ continue
+
+ # 1) Calculate the conversion rate for CIGA to actual sale
+ asset_list = input_data["asset_list"].copy()
+
+ ciga_completed_assets = asset_list[
+ asset_list["ECO Eligibility"].isin(
+ [
+ "eco4 - passed ciga",
+ "failed ciga"
+ ]
+ )
+ ]
+
+ ciga_passed = ciga_completed_assets[
+ ciga_completed_assets["ECO Eligibility"].isin(
+ [
+ "eco4 - passed ciga"
+ ]
+ )
+ ]
+
+ ciga_passrates.append(
+ {
+ "Ha Name": ha_name,
+ "# CIGA dependent": ciga_completed_assets.shape[0],
+ "# CIGA passed": ciga_passed.shape[0],
+ }
+ )
+
+ ciga_passrates = pd.DataFrame(ciga_passrates)
+
+ median_ciga_success_rate = ciga_passrates["# CIGA passed"].sum() / ciga_passrates["# CIGA dependent"].sum()
+
+ # 3) Calculate the conversion rate of an ECO4 and a GBISjob, that doesn't need ciga, to install
+ eco4_ciga_independent_to_install = []
+ gbis_to_install = []
+ for ha_name, input_data in loader.data.items():
+ asset_list = input_data["asset_list"].copy()
+ survey_list = input_data["survey_list"].copy()
+
+ if survey_list.empty:
+ continue
+
+ # For properties that were identified as a typical ECO4 job, we calculate the number of properties that
+ # installed
+ # vs cancelled
+
+ typical_eco4 = asset_list[asset_list["ECO Eligibility"] == "eco4"]
+ typical_gbis = asset_list[asset_list["ECO Eligibility"] == "gbis"]
+
+ # Merge on the surveys
+ typical_eco4_installed = typical_eco4.merge(
+ survey_list[["asset_list_row_id", "installation_status"]], how="inner", on="asset_list_row_id"
+ )
+
+ if not typical_eco4_installed.empty:
+ typical_eco4_sold = typical_eco4_installed[
+ typical_eco4_installed["installation_status"].isin(
+ [
+ "ECO4 - installed", "ECO4 - in progress"
+ ]
+ )
+ ]
+
+ eco4_ciga_independent_to_install.append(
+ {
+ "Ha Name": ha_name,
+ "# ECO4 at install stage": typical_eco4_installed.shape[0],
+ "# ECO4 successfully installed": typical_eco4_sold.shape[0]
+ }
+ )
+
+ typical_gbis_installed = typical_gbis.merge(
+ survey_list[["asset_list_row_id", "installation_status"]], how="inner", on="asset_list_row_id"
+ )
+ if not typical_gbis_installed.empty:
+ typical_gbis_sold = typical_gbis_installed[
+ typical_gbis_installed["installation_status"].isin(
+ [
+ "GBIS - in progress", "GBIS - installed"
+ ]
+ )
+ ]
+
+ gbis_to_install.append(
+ {
+ "Ha Name": ha_name,
+ "# GBIS at install stage": typical_gbis_installed.shape[0],
+ "# GBIS successfully installed": typical_gbis_sold.shape[0]
+ }
+ )
+
+ eco4_ciga_independent_to_install = pd.DataFrame(eco4_ciga_independent_to_install)
+ gbis_to_install = pd.DataFrame(gbis_to_install)
+
+ eco4_ciga_independent_to_install["conversion"] = (
+ eco4_ciga_independent_to_install["# ECO4 successfully installed"] /
+ eco4_ciga_independent_to_install["# ECO4 at install stage"]
+ )
+ eco4_ciga_independent_to_install_clipped = eco4_ciga_independent_to_install[
+ eco4_ciga_independent_to_install["conversion"] >= sales_conversion_lower_bound
+ ]
+
+ gbis_to_install["conversion"] = (
+ gbis_to_install["# GBIS successfully installed"] /
+ gbis_to_install["# GBIS at install stage"]
+ )
+ gbis_to_install_clipped = gbis_to_install[
+ gbis_to_install["conversion"] >= sales_conversion_lower_bound
+ ]
+
+ median_eco4_to_install = (
+ eco4_ciga_independent_to_install_clipped["# ECO4 successfully installed"].sum() /
+ eco4_ciga_independent_to_install_clipped["# ECO4 at install stage"].sum()
+ )
+
+ median_gbis_to_install = (
+ gbis_to_install_clipped["# GBIS successfully installed"].sum() /
+ gbis_to_install_clipped["# GBIS at install stage"].sum()
+ )
+
+ # Produce the final output
+ december_figures = loader.december_figures.copy()
+ december_figures = december_figures.fillna(0)
+ # If we have negative remaining, it means that actually sold more gbis than they initially thought so we set
+ # remaining to 0
+ december_figures["ECO4 remaining"] = np.where(
+ december_figures["ECO4 remaining"] < 0, 0, december_figures["ECO4 remaining"]
+ )
+ december_figures["GBIS remaining"] = np.where(
+ december_figures["GBIS remaining"] < 0, 0, december_figures["GBIS remaining"]
+ )
+
+ results = []
+ for ha_name, input_data in loader.data.items():
+
+ # Original warmfront figures - ECO4
+ original_warmfront_estimates = december_figures[december_figures["HA Name"] == ha_name]
+ if original_warmfront_estimates.empty:
+ # Append an empty row
+ original_warmfront_estimates = december_figures.head(1).copy()
+ for k in original_warmfront_estimates.columns:
+ original_warmfront_estimates[k] = 0
+ original_warmfront_estimates["HA Name"] = ha_name
+
+ original_warmfront_eco4 = original_warmfront_estimates["ECO4"].values[0]
+ original_warmfront_remaining_eco4 = original_warmfront_estimates["ECO4 remaining"].values[0]
+ original_warmfront_sold_eco4 = (
+ original_warmfront_estimates["No. of Tech surveys complete - Eco 4"].values[0] * eco4_rate
+ )
+
+ original_warmfront_eco4_revenue = original_warmfront_eco4 * eco4_rate
+ original_warmfront_remaining_eco4_revenue = original_warmfront_remaining_eco4 * eco4_rate
+ original_warmfront_sold_gbis = (
+ original_warmfront_estimates["No. of Tech surveys complete - GBIS"].values[0] * gbis_rate
+ )
+
+ # Original warmfront figures - GBIS
+
+ original_warmfront_gbis = original_warmfront_estimates["GBIS"].values[0]
+ original_warmfront_remaining_gbis = original_warmfront_estimates["GBIS remaining"].values[0]
+
+ original_warmfront_gbis_revenue = (
+ original_warmfront_gbis * gbis_rate
+ )
+ original_warmfront_remaining_gbis_revenue = original_warmfront_remaining_gbis * gbis_rate
+
+ # Asset list - ECO4
+ asset_list = input_data["asset_list"].copy()
+ survey_list = input_data["survey_list"].copy()
+
+ if survey_list.empty:
+ asset_list_remaining = asset_list.copy()
+ else:
+ # For HA6, there are a small number of postcodes that do not match to any item in the asset list
+ survey_list = survey_list[~pd.isnull(survey_list["asset_list_row_id"])]
+ asset_list_remaining = asset_list.merge(
+ survey_list[["asset_list_row_id", "installation_status"]],
+ how="left",
+ on="asset_list_row_id"
+ )
+ # Anything that has an installation has gone to installation, and therefore is not remaining
+ asset_list_remaining = asset_list_remaining[pd.isnull(asset_list_remaining["installation_status"])]
+ asset_list_remaining = asset_list_remaining.drop(columns=["installation_status"])
+
+ eligiblity_counts = pd.DataFrame(asset_list["ECO Eligibility"].value_counts()).reset_index()
+ eligiblity_counts_remaining = pd.DataFrame(asset_list_remaining["ECO Eligibility"].value_counts()).reset_index()
+
+ eco4_pre_ciga = eligiblity_counts[
+ eligiblity_counts["ECO Eligibility"].isin(
+ [
+ "eco4",
+ "eco4 (subject to ciga)",
+ "eco4 - passed ciga",
+ "failed ciga",
+ "eco4 (subject to ciga) (subject to archetype)",
+ "eco4 (subject to archetype)"
+ ]
+ )
+ ]["count"].sum()
+
+ eco4_pre_ciga_remaining = eligiblity_counts_remaining[
+ eligiblity_counts_remaining["ECO Eligibility"].isin(
+ [
+ "eco4",
+ "eco4 (subject to ciga)",
+ "eco4 - passed ciga",
+ "failed ciga",
+ "eco4 (subject to ciga) (subject to archetype)",
+ "eco4 (subject to archetype)"
+ ]
+ )
+ ]["count"].sum()
+
+ eco4_pre_ciga_revenue = eco4_pre_ciga * eco4_rate
+ eco4_pre_ciga_remaining_revenue = eco4_pre_ciga_remaining * eco4_rate
+
+ # Total Eligible - this is what passed ciga checks + strict. If we don't have what passed CIGA, we estimate
+ # We check if the HA has done a CIGA check. Also, if we have assets dormant at CIGA, we estimate what will
+ # convert
+ # We estimate a conversion for anything left post CIGA
+ ha_ciga_conversion = ciga_passrates[ciga_passrates["Ha Name"] == ha_name]
+ if not ha_ciga_conversion.empty:
+ ha_ciga_conversion_rate = (
+ ha_ciga_conversion["# CIGA passed"].values[0] / ha_ciga_conversion["# CIGA dependent"].values[0]
+ )
+ else:
+ ha_ciga_conversion_rate = (
+ median_ciga_success_rate if median_ciga_success_rate <= maximum_ciga_conversion else
+ maximum_ciga_conversion
+ )
+
+ # We also need the ha ciga passed to install success rate
+ ha_ciga_pass_to_sale = converted_ciga_jobs[converted_ciga_jobs["HA Name"] == ha_name]
+ if not ha_ciga_pass_to_sale.empty and ha_ciga_pass_to_sale["# Ciga dependent at installation"].values[0] != 0:
+ ha_ciga_pass_to_sale_rate = (
+ ha_ciga_pass_to_sale["# Ciga dependent successfully installed"].values[0] /
+ ha_ciga_pass_to_sale["# Ciga dependent at installation"].values[0]
+ )
+ else:
+ ha_ciga_pass_to_sale_rate = median_ciga_pass_to_install
+
+ ha_eco4_to_sale = eco4_ciga_independent_to_install_clipped[
+ eco4_ciga_independent_to_install_clipped["Ha Name"] == ha_name
+ ]
+ if not ha_eco4_to_sale.empty:
+ ha_eco4_to_sale_rate = (
+ ha_eco4_to_sale['# ECO4 successfully installed'].values[0] /
+ ha_eco4_to_sale['# ECO4 at install stage'].values[0]
+ )
+ else:
+ ha_eco4_to_sale_rate = median_eco4_to_install
+
+ eco4_post_ciga_total_results = calculate_eco4_post_ciga(
+ eligiblity_counts=eligiblity_counts,
+ input_data=input_data,
+ ha_ciga_conversion_rate=ha_ciga_conversion_rate,
+ ha_ciga_pass_to_sale_rate=ha_ciga_pass_to_sale_rate,
+ ha_eco4_to_sale_rate=ha_eco4_to_sale_rate,
+ eco4_rate=eco4_rate,
+ archetype_conversion_rate=archetype_conversion_rate
+ )
+
+ eco4_post_ciga_remaining_results = calculate_eco4_post_ciga(
+ eligiblity_counts=eligiblity_counts_remaining,
+ input_data=input_data,
+ ha_ciga_conversion_rate=ha_ciga_conversion_rate,
+ ha_ciga_pass_to_sale_rate=ha_ciga_pass_to_sale_rate,
+ ha_eco4_to_sale_rate=ha_eco4_to_sale_rate,
+ eco4_rate=eco4_rate,
+ archetype_conversion_rate=archetype_conversion_rate
+ )
+
+ # Calculate the delta compared to Warmfront's original remaining
+ if original_warmfront_remaining_eco4 == 0:
+ eco4_delta_vs_original_estimate_remaining = "N/A"
+ else:
+ eco4_delta_vs_original_estimate_remaining = ((eco4_post_ciga_remaining_results["ECO4 - post CIGA - #"] -
+ original_warmfront_remaining_eco4) /
+ original_warmfront_remaining_eco4)
+
+ # GBIS Figures
+ # Estimate the GBIS conversion rate
+ ha_gbis_sale_conversion = gbis_to_install_clipped[
+ gbis_to_install_clipped["Ha Name"] == ha_name
+ ]
+
+ if not ha_gbis_sale_conversion.empty:
+ ha_gbis_sale_conversion = (
+ ha_gbis_sale_conversion["# GBIS successfully installed"].values[0] /
+ ha_gbis_sale_conversion["# GBIS at install stage"].values[0]
+ )
+ else:
+ ha_gbis_sale_conversion = median_gbis_to_install
+
+ gbis_total_pre_cancellations = eligiblity_counts[
+ eligiblity_counts["ECO Eligibility"] == "gbis"
+ ]["count"].sum()
+
+ gbis_total_pre_cancellations_revenue = gbis_total_pre_cancellations * gbis_rate
+ # gbis_total = int(np.round(gbis_total_pre_cancellations * ha_gbis_sale_conversion))
+ # gbis_total_revenue = int(gbis_total * gbis_rate)
+
+ gbis_remaining_pre_cancellations = eligiblity_counts_remaining[
+ eligiblity_counts_remaining["ECO Eligibility"] == "gbis"
+ ]["count"].sum()
+ gbis_remaining_pre_cancellations_revenue = (
+ gbis_remaining_pre_cancellations * gbis_rate
+ )
+ # This is the gbis jobs we expect to sell
+ gbis_remaining = int(np.round(gbis_remaining_pre_cancellations * ha_gbis_sale_conversion))
+ gbis_remaining_revenue = int(gbis_remaining * gbis_rate)
+ # This is the number we expect to cancel
+ gbis_remaining_expected_cancellations = int(gbis_remaining_pre_cancellations - gbis_remaining)
+ gbis_remaining_expected_cancellations_revenue = gbis_remaining_expected_cancellations * gbis_rate
+
+ # GBIS delta
+ if original_warmfront_remaining_gbis == 0:
+ gbis_delta_vs_original_estimate_remaining = "N/A"
+ else:
+ gbis_delta_vs_original_estimate_remaining = (
+ (gbis_remaining - original_warmfront_remaining_gbis) / original_warmfront_remaining_gbis
+ )
+
+ # Current sales figures
+ # For any sales surveys that are complete, that could still cancel, we apply a conversion rate
+ eco4_actually_sold = 0
+ eco4_confirmed_cancellations = 0
+ eco4_expected_cancellations = 0
+
+ gbis_actually_sold = 0
+ gbis_confirmed_cancellations = 0
+ gbis_expected_cancellations = 0
+ if not survey_list.empty:
+ surveys_with_eligibility = survey_list.merge(
+ asset_list[["asset_list_row_id", "ECO Eligibility"]],
+ how="left", on="asset_list_row_id"
+ )
+ completed_eco4_sales = surveys_with_eligibility[
+ surveys_with_eligibility["installation_status"] == "ECO4 - installed"
+ ].shape[0]
+ incomplete_eco4_sales = surveys_with_eligibility[
+ (surveys_with_eligibility["installation_status"] == "ECO4 - in progress") &
+ (~surveys_with_eligibility["ECO Eligibility"].isin(
+ ["eco4 - passed ciga"])
+ )
+ ].shape[0]
+ incomplete_eco4_sales_ciga = surveys_with_eligibility[
+ (surveys_with_eligibility["installation_status"] == "ECO4 - in progress") &
+ (surveys_with_eligibility["ECO Eligibility"].isin(
+ ["eco4 - passed ciga"])
+ )
+ ].shape[0]
+
+ eco4_confirmed_cancellations = surveys_with_eligibility[
+ surveys_with_eligibility["installation_status"] == "ECO4 - cancelled"
+ ].shape[0]
+
+ expected_eco4_sales_no_ciga = np.round(incomplete_eco4_sales * ha_eco4_to_sale_rate)
+ expected_eco4_sales_ciga = np.round(incomplete_eco4_sales_ciga * ha_ciga_pass_to_sale_rate)
+
+ eco4_expected_cancellations = (incomplete_eco4_sales + incomplete_eco4_sales_ciga) - (
+ expected_eco4_sales_no_ciga + expected_eco4_sales_ciga
+ )
+ eco4_expected_cancellations = int(np.round(eco4_expected_cancellations))
+
+ eco4_actually_sold = eco4_rate * (
+ completed_eco4_sales + expected_eco4_sales_no_ciga + expected_eco4_sales_ciga
+ )
+
+ completed_gbis_sales = surveys_with_eligibility[
+ surveys_with_eligibility["installation_status"] == "GBIS - installed"
+ ].shape[0]
+ incomplete_gbis_sales = surveys_with_eligibility[
+ (surveys_with_eligibility["installation_status"] == "GBIS - in progress")
+ ].shape[0]
+
+ # Get confirmed cancellations
+ gbis_confirmed_cancellations = surveys_with_eligibility[
+ surveys_with_eligibility["installation_status"] == "GBIS - cancelled"
+ ].shape[0]
+
+ expected_gbis_unconfirmed_sales = np.round(incomplete_gbis_sales * ha_gbis_sale_conversion)
+
+ gbis_expected_cancellations = int(incomplete_gbis_sales - expected_gbis_unconfirmed_sales)
+
+ gbis_actually_sold = completed_gbis_sales * gbis_rate + (
+ expected_gbis_unconfirmed_sales * gbis_rate
+ )
+
+ # Add in the variance:
+ # We should expect that the pre-ciga total is:
+ # 1) The number of post CIGA successes +
+ # 2) The number of archetype failures +
+ # 2) the number of CIGA failures +
+ # 3) The number of cancellations
+ variance_total = eco4_pre_ciga - (
+ eco4_post_ciga_total_results["ECO4 - post CIGA - #"] +
+ eco4_post_ciga_total_results["Estimated total - failed archetype check - #"] +
+ eco4_post_ciga_total_results['Estimated total - failed CIGA'] +
+ eco4_post_ciga_total_results["Expected cancellations - #"]
+ )
+ if variance_total != 0:
+ raise ValueError("Something went wrong in variance total")
+
+ variance_remaining = eco4_pre_ciga_remaining - (
+ eco4_post_ciga_remaining_results["ECO4 - post CIGA - #"] +
+ eco4_post_ciga_remaining_results["Estimated total - failed archetype check - #"] +
+ eco4_post_ciga_remaining_results['Estimated total - failed CIGA'] +
+ eco4_post_ciga_remaining_results["Expected cancellations - #"]
+ )
+
+ if variance_remaining != 0:
+ raise ValueError("Something went wrong in variance remaining")
+
+ # We also check variances to make sure that the pre-CIGA ECO4 total equals
+ # 1) Pre CIGA remaining +
+ # 2) ECO4 sold +
+ # 3) ECO4 confirmed cancellations +
+ # 4) ECO4 unconfirmed cancellations
+
+ pre_ciga_eco4_variance = (
+ eco4_pre_ciga_revenue -
+ eco4_pre_ciga_remaining_revenue -
+ eco4_actually_sold -
+ eco4_confirmed_cancellations * eco4_rate -
+ eco4_expected_cancellations * eco4_rate
+ )
+
+ if pre_ciga_eco4_variance != 0:
+ raise ValueError("Something went wrong in pre_ciga_eco4_variance")
+
+ # Check GBIS total variance
+ # The total before cancellations should equal:
+ # The number of sold +
+ # The number of confirmed cancelled +
+ # The number of expected cancelled +
+ # The number of remaining
+ gbis_variance = gbis_total_pre_cancellations - (
+ gbis_actually_sold / gbis_rate +
+ gbis_confirmed_cancellations +
+ gbis_expected_cancellations +
+ gbis_remaining_pre_cancellations
+ )
+
+ if gbis_variance != 0:
+ raise ValueError("Something went wrong in gbis_variance")
+
+ # We expect the remaining to equal expected sales + expected cancellations
+ gbis_variance_2 = gbis_remaining_pre_cancellations - (
+ gbis_remaining +
+ gbis_remaining_expected_cancellations
+ )
+
+ if gbis_variance_2 != 0:
+ raise ValueError("Something went wrong in gbis_variance2")
+
+ # Update the GBIS sold, since Warmfront often sold more GBIS that expected
+ original_warmfront_gbis_revenue = original_warmfront_sold_gbis + original_warmfront_remaining_gbis_revenue
+ original_warmfront_gbis = (
+ original_warmfront_sold_gbis / gbis_rate + original_warmfront_remaining_gbis_revenue / gbis_rate
+ )
+
+ to_append = {
+ ("", "", "", "HA Name"): ha_name,
+ # ECO4 - original warmfront figures
+ ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): original_warmfront_eco4,
+ ("ECO4 original", "", "Remaining - #", ""): original_warmfront_remaining_eco4,
+ ("ECO4 original", "", "Total - £", ""): original_warmfront_eco4_revenue,
+ ("ECO4 original", "", "Sold or cancelled - £", ""): original_warmfront_sold_eco4,
+ ("ECO4 original", "", "Remaining - £", ""): original_warmfront_remaining_eco4_revenue,
+ # GBIS - original warmfront figures
+ ("", "Original Warmfront estimate", "Total - #", "GBIS - November"): original_warmfront_gbis,
+ ("GBIS original", "", "Remaining - #", ""): original_warmfront_gbis,
+ ("GBIS original", "", "Total - £", ""): original_warmfront_gbis_revenue,
+ ("GBIS original", "", "Sold or cancelled - £", ""): original_warmfront_sold_gbis,
+ ("GBIS original", "", "Remaining - £", ""): original_warmfront_remaining_gbis_revenue,
+ # ECO4 - asset list, pre-ciga
+ ("", "Warmfront post code list", "Total #", "ECO4 total (pre-ciga)"): eco4_pre_ciga,
+ ("ECO4 pre-ciga", "", "Remaining - #", ""): eco4_pre_ciga_remaining,
+ ("ECO4 pre-ciga", "", "Total - £", ""): eco4_pre_ciga_revenue,
+ ("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue,
+ ("ECO4 pre-ciga", "", "VARIANCE - PRE-CIGA ECO4 TOTAL", ""): pre_ciga_eco4_variance,
+ ("ECO4 pre-ciga", "", "VARIANCE - PRE-CIGA ECO4 TOTAL VS ELIGIBLE & INELIGIBLE", ""): variance_total,
+ ("ECO4 pre-ciga", "", "VARIANCE - PRE-CIGA ECO4 REMAINING VS ELIGIBLE & INELIGIBLE", ""):
+ variance_remaining,
+ ("ECO4 pre-ciga", "", "Sold - £", ""): eco4_actually_sold,
+ ("ECO4 pre-ciga", "", "Confirmed cancellations - £", ""): eco4_confirmed_cancellations * eco4_rate,
+ # This is for jobs that are in-progress and could still cancel
+ ("ECO4 pre-ciga", "", "Unconfirmed cancellations - £", ""): eco4_expected_cancellations * eco4_rate,
+ # ECO4 - asset list, post ciga, total
+ ("ECO4 post-ciga", "", "Estimated total eligible - #", "ECO4 total"):
+ eco4_post_ciga_total_results[
+ "ECO4 - post CIGA - #"],
+ ("ECO4 post-ciga", "", "Estimated total eligible - £", ""): eco4_post_ciga_total_results[
+ "ECO4 - post CIGA - £"],
+ # ECO4 - asset list, post ciga, remaining
+ ("ECO4 post-ciga", "", "Estimated remaining eligible - #", ""): eco4_post_ciga_remaining_results[
+ "ECO4 - post CIGA - #"],
+ ("ECO4 post-ciga", "", "Estimated remaining eligible - £", ""): eco4_post_ciga_remaining_results[
+ "ECO4 - post CIGA - £"],
+ ("ECO4 post-ciga", "", "Delta vs original estimate, remaining - %",
+ ""): eco4_delta_vs_original_estimate_remaining,
+ ("ECO4 post-ciga", "", "Of which - confirmed (post CIGA or no CIGA required) - #", ""):
+ eco4_post_ciga_remaining_results["Of which confirmed - #"],
+ ("ECO4 post-ciga", "", "Of which - confirmed (post CIGA or no CIGA required) - £", ""):
+ eco4_post_ciga_remaining_results["Of which confirmed - £"],
+ ("ECO4 post-ciga", "", "Of which forecast - #", ""):
+ eco4_post_ciga_remaining_results["Of which forecast - #"],
+ ("ECO4 post-ciga", "", "Of which forecast - £", ""):
+ eco4_post_ciga_remaining_results["Of which forecast - £"],
+ # Expected ECO4 cancellations
+ ("ECO4 Cancellations", "", "Of which expected cancellations - #", ""): eco4_post_ciga_remaining_results[
+ "Expected cancellations - #"
+ ],
+ ("ECO4 Cancellations", "", "Of which expected cancellations - £", ""): eco4_post_ciga_remaining_results[
+ "Expected cancellations - £"
+ ],
+ # Archetype check failures
+ ("ECO4 CIGA failures", "", "Estimated total - failed Archetype check - #", ""):
+ eco4_post_ciga_remaining_results['Estimated total - failed archetype check - #'],
+ ("ECO4 CIGA failures", "", "Estimated total - failed Archetype check - £", ""):
+ eco4_post_ciga_remaining_results['Estimated total - failed archetype check - £'],
+ # CIGA failures
+ ("ECO4 CIGA failures", "", "Estimated total - failed CIGA - #", ""): eco4_post_ciga_remaining_results[
+ 'Estimated total - failed CIGA'
+ ],
+ ("ECO4 CIGA failures", "", "Estimated total - failed CIGA - £", ""): eco4_post_ciga_remaining_results[
+ 'Estimated total - failed CIGA - £'
+ ],
+ ("ECO4 CIGA failures", "", "Confirmed failures - #", ""): eco4_post_ciga_remaining_results[
+ "Confirmed CIGA failures"
+ ],
+ ("ECO4 CIGA failures", "", "Confirmed failures - £", ""): eco4_post_ciga_remaining_results[
+ "Confirmed CIGA failures - £"
+ ],
+ ("ECO4 CIGA failures", "", "Estimated failures - #", ""): eco4_post_ciga_remaining_results[
+ "Estimated CIGA failures"
+ ],
+ ("ECO4 CIGA failures", "", "Estimated failures - £", ""): eco4_post_ciga_remaining_results[
+ "Estimated CIGA failures - £"
+ ],
+ # GBIS postcode list
+ ("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total"): gbis_total_pre_cancellations,
+ ("GBIS Postcode list", "Warmfront post code list", "Total - £", "GBIS total"):
+ gbis_total_pre_cancellations_revenue,
+ ("GBIS Postcode list", "Warmfront post code list", "GBIS VARIANCE", "GBIS total"): gbis_variance,
+ ("GBIS Postcode list", "Warmfront post code list", "Sold - £", "GBIS total"): gbis_actually_sold,
+ ("GBIS Postcode list", "", "Confirmed cancellations - £", ""): gbis_confirmed_cancellations * gbis_rate,
+ # This is for jobs that are in-progress and could still cancel
+ ("GBIS Postcode list", "", "Unconfirmed cancellations - £", ""): gbis_expected_cancellations * gbis_rate,
+ ("GBIS Postcode list", "Warmfront post code list", "Remaining - #", "GBIS total"):
+ gbis_remaining_pre_cancellations,
+ ("GBIS Postcode list", "Warmfront post code list", "Remaining - £", "GBIS total"):
+ gbis_remaining_pre_cancellations_revenue,
+ ("GBIS Postcode list", "", "Delta vs original estimate, remaining - %", ""):
+ gbis_delta_vs_original_estimate_remaining,
+ # Expected cancellations
+ (
+ "GBIS Postcode list", "", "Of which expected sales - £ - £",
+ "GBIS total"): gbis_remaining_revenue,
+ ("GBIS Postcode list", "", "Of which expected cancellations -£", "GBIS total"):
+ gbis_remaining_expected_cancellations_revenue
+ }
+
+ # Make sure nothing is forgotten due to duplicate multi-index keys
+ if len(to_append) != 51:
+ raise ValueError("Something went wrong")
+
+ results.append(to_append)
+
+ results = pd.DataFrame(results)
+ results.to_csv("pipeline_remaining_raw.csv")
+
+ totals_row = {}
+ for col in results.columns:
+ if col == ('', '', '', 'HA Name'):
+ totals_row[col] = "Total"
+ elif col in [
+ ("ECO4 post-ciga", "", "Delta vs original estimate, remaining - %", ""),
+ ("GBIS Postcode list", "", "Delta vs original estimate, remaining - %", "")
+ ]:
+ totals_row[col] = None
+ else:
+ totals_row[col] = results[col].sum()
+
+ # For the delta columns, we calculate the delta on the totals
+ totals_row[("ECO4 post-ciga", "", "Delta vs original estimate, remaining - %", "")] = (
+ (
+ totals_row[("ECO4 post-ciga", "", "Estimated remaining eligible - #", "")] -
+ totals_row[("ECO4 original", "", "Remaining - #", "")]
+ ) / totals_row[("ECO4 original", "", "Remaining - #", "")]
+ )
+
+ totals_row[("GBIS Postcode list", "", "Delta vs original estimate, remaining - %", "")] = (
+ (
+ totals_row[("GBIS Postcode list", "Warmfront post code list", "Remaining - #", "GBIS total")] -
+ totals_row[("GBIS original", "", "Remaining - #", "")]
+ ) / totals_row[("GBIS original", "", "Remaining - #", "")]
+ )
+
+ blank_row = pd.DataFrame([{col: "" for col in results.columns}])
+
+ # Put together a Warmfront original remaining ECO4 vs asset list remaining ECO4 and same for GBIS, as well as totals
+
+ # ECO4 Headlines
+ headline_eco4_original_remaining = totals_row[("ECO4 original", "", "Remaining - #", "")]
+ headline_eco4_original_remaining_revenue = totals_row[("ECO4 original", "", "Remaining - £", "")]
+ headline_eco4_postcode_list_remaining = totals_row[("ECO4 post-ciga", "", "Estimated remaining eligible - #", "")]
+ headline_eco4_postcode_list_remaining_revenue = totals_row[
+ ("ECO4 post-ciga", "", "Estimated remaining eligible - £", "")
+ ]
+ headline_eco4_delta = 100 * (
+ (headline_eco4_postcode_list_remaining - headline_eco4_original_remaining) /
+ headline_eco4_original_remaining
+ )
+ headline_eco4_delta = round(headline_eco4_delta, 1)
+
+ # GBIS Headlines
+ headline_gbis_original_remaining = totals_row[("GBIS original", "", "Remaining - #", "")]
+ headline_gbis_original_remaining_revenue = totals_row[("GBIS original", "", "Remaining - £", "")]
+ headline_gbis_postcode_list_remaining = totals_row[
+ ("GBIS Postcode list", "Warmfront post code list", "Remaining - #", "GBIS total")
+ ]
+ headline_gbis_postcode_list_remaining_revenue = totals_row[
+ ("GBIS Postcode list", "Warmfront post code list", "Remaining - £", "GBIS total")
+ ]
+ headline_gbis_delta = 100 * (
+ (headline_gbis_postcode_list_remaining - headline_gbis_original_remaining) /
+ headline_gbis_original_remaining
+ )
+ headline_gbis_delta = round(headline_gbis_delta, 1)
+
+ headline_original_total_revenue_remaining = (
+ headline_eco4_original_remaining_revenue + headline_gbis_original_remaining_revenue
+ )
+
+ headline_postcode_list_total_revenue_remaining = (
+ headline_eco4_postcode_list_remaining_revenue + headline_gbis_postcode_list_remaining_revenue
+ )
+ headline_total_delta = 100 * (
+ (headline_postcode_list_total_revenue_remaining - headline_original_total_revenue_remaining) /
+ headline_original_total_revenue_remaining
+ )
+ headline_total_delta = round(headline_total_delta, 1)
+
+ headline_eco4_sold_since_november = (
+ totals_row[('ECO4 pre-ciga', '', 'Sold - £', '')] +
+ totals_row[('ECO4 pre-ciga', '', 'Confirmed cancellations - £', '')] + # confirmed canclleations
+ totals_row[('ECO4 pre-ciga', '', 'Unconfirmed cancellations - £', '')] - # expected cancellations
+ totals_row[('ECO4 original', '', 'Sold or cancelled - £', '')]
+ )
+
+ headline_gbis_sold_since_november = (
+ totals_row[("GBIS Postcode list", "Warmfront post code list", "Sold - £", "GBIS total")] +
+ totals_row[("GBIS Postcode list", "", "Confirmed cancellations - £", "")] + # confirmed cancellations
+ totals_row[("GBIS Postcode list", "", "Unconfirmed cancellations - £", "")] - # expected cancellations
+ totals_row[('GBIS original', '', 'Sold or cancelled - £', '')]
+ )
+
+ headlines = [
+ {
+ ("", "", "", "HA Name"): "Headlines",
+ },
+ {
+ ("", "", "", "HA Name"): "ECO4 Remaining - November - #",
+ ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): headline_eco4_original_remaining
+
+ },
+ {
+ ("", "", "", "HA Name"): "ECO4 Remaining - November - £",
+ (
+ "", "Original Warmfront estimate", "Total - #",
+ "ECO4 - November"): headline_eco4_original_remaining_revenue
+ },
+ {
+ ("", "", "", "HA Name"): "ECO4 Sold or cancelled since November - £",
+ (
+ "", "Original Warmfront estimate", "Total - #",
+ "ECO4 - November"): headline_eco4_sold_since_november
+ },
+ {
+ ("", "", "", "HA Name"): "ECO4 Remaining - postcode list (post CIGA) - #",
+ ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): headline_eco4_postcode_list_remaining
+ },
+ {
+ ("", "", "", "HA Name"): "ECO4 Remaining - postcode list (post CIGA) - £",
+ ("", "Original Warmfront estimate", "Total - #",
+ "ECO4 - November"): headline_eco4_postcode_list_remaining_revenue
+ },
+ {
+ ("", "", "", "HA Name"): "ECO4 £ remaining delta - %",
+ ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(headline_eco4_delta) + "%"
+ },
+ {
+ ("", "", "", "HA Name"): "GBIS Remaining - November - #",
+ ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): headline_gbis_original_remaining
+ },
+ {
+ ("", "", "", "HA Name"): "GBIS Remaining - November - £",
+ (
+ "", "Original Warmfront estimate", "Total - #",
+ "ECO4 - November"): headline_gbis_original_remaining_revenue
+ },
+ {
+ ("", "", "", "HA Name"): "GBIS Sold or cancelled since November - £",
+ (
+ "", "Original Warmfront estimate", "Total - #",
+ "ECO4 - November"): headline_gbis_sold_since_november
+ },
+ {
+ ("", "", "", "HA Name"): "GBIS Remaining - post code list - #",
+ ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): headline_gbis_postcode_list_remaining
+ },
+ {
+ ("", "", "", "HA Name"): "GBIS Remaining - post code list - £",
+ ("", "Original Warmfront estimate", "Total - #",
+ "ECO4 - November"): headline_gbis_postcode_list_remaining_revenue
+ },
+ {
+ ("", "", "", "HA Name"): "GBIS delta %",
+ ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(headline_gbis_delta) + "%"
+ },
+ # Total revenue
+ {
+ ("", "", "", "HA Name"): "Total Remaining - November - £",
+ ("", "Original Warmfront estimate", "Total - #",
+ "ECO4 - November"): headline_original_total_revenue_remaining
+ },
+ {
+ ("", "", "", "HA Name"): "Total Remaining - post code list (post CIGA) - £",
+ ("", "Original Warmfront estimate", "Total - #",
+ "ECO4 - November"): headline_postcode_list_total_revenue_remaining
+ },
+ {
+ ("", "", "", "HA Name"): "Total Remaining delta %",
+ ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(headline_total_delta) + "%"
+ },
+ ]
+
+ assumptions = [
+ {
+ ("", "", "", "HA Name"): "Assumptions",
+ },
+ {
+ ("", "", "", "HA Name"): "ECO4 rate",
+ ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(eco4_rate)
+ },
+ {
+ ("", "", "", "HA Name"): "GBIS rate",
+ ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): "£" + str(gbis_rate)
+ },
+ {
+ ("", "", "", "HA Name"): "Median CIGA pass rate",
+ ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
+ round(median_ciga_success_rate * 100, 1)) + "%",
+ },
+ {
+ ("", "", "", "HA Name"): "Maximum allowed CIGA pass rate",
+ ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
+ round(maximum_ciga_conversion * 100, 1)) + "%",
+ ("ECO4 original", "", "Remaining - #",
+ ""): "- Maximum allowed CIGA conversion for HAs without CIGA checks We do not allow above this to be "
+ "conservative"
+ },
+ {
+ ("", "", "", "HA Name"): "Median ECO4 (no CIGA) sales conversion rate",
+ ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
+ round(median_eco4_to_install * 100, 1)) + "%",
+ ("ECO4 original", "", "Remaining - #",
+ ""): " - Sales conversion rate for a ECO4 property that didn't need a CIGA check. Surveys that resulted "
+ "in cancelled install are excluded."
+ },
+ {
+ ("", "", "", "HA Name"): "Median ECO4 (subect to CIGA) sales conversion rate",
+ ("", "Original Warmfront estimate", "Total - #", "ECO4 - November"): str(
+ round(median_ciga_pass_to_install * 100, 1)) + "%",
+ ("ECO4 original", "", "Remaining - #",
+ ""): " - Sales conversion rate for a ECO4 property that passed a CIGA check. Surveys that resulted in "
+ "cancelled installs are excluded."
+ }
+ ]
+
+ results = pd.concat(
+ [
+ results,
+ pd.DataFrame([totals_row]),
+ blank_row,
+ pd.DataFrame(headlines),
+ blank_row,
+ blank_row,
+ pd.DataFrame(assumptions)
+ ]
+ )
+ with open("HA Remaining Analysis.csv", "w", newline="") as file:
+ # Write the DataFrame data without the index (adjust if you want the index).
+ results.to_csv(file, header=True, index=False)
+
+
+def fml_data_pull(loader):
+ has_bruh = [
+ "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13",
+ "HA50", "HA24", "HA15", "HA32", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
+ "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
+ 'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52', "HA17", "HA5", "HA20",
+ ]
+
+ # Can't pull from EPC database because it's based in Scotland
+ # "HAXXX", "HAXX"
+ # DO
+ from backend.SearchEpc import SearchEpc
+ epc_api_key = "a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA="
+
+ failed_has = []
+ for ha in has_bruh:
+ print(f"Pulling data for {ha}")
+ try:
+ asset_list = loader.data[ha]["asset_list"].copy()
+ # properties found as eligibile
+ fml = asset_list[asset_list["ECO Eligibility"] != "not eligible"]
+
+ # For each property, search for the latest EPC
+ epc_data = []
+ for _, row in tqdm(fml.iterrows(), total=fml.shape[0]):
+
+ property_type, _ = get_property_type_and_built_form(property_meta=row, ha_name=ha)
+
+ if ha == "HAXXX":
+ to_join = [str(x) for x in
+ [row["Door Number"], row["Address Line 1"], row["Address Line 2"], row["Address Line 3"],
+ row["Postcode"]] if x is not None]
+ full_address = ", ".join(to_join)
+ else:
+ full_address = row["matching_address"]
+
+ searcher = SearchEpc(
+ address1=str(row["HouseNo"]),
+ postcode=row["matching_postcode"],
+ auth_token=epc_api_key,
+ os_api_key="",
+ property_type=property_type,
+ full_address=full_address,
+ fast=True
+ )
+ # Force the skipping of estimating the EPC
+ searcher.ordnance_survey_client.property_type = None
+ searcher.ordnance_survey_client.built_form = None
+
+ searcher.find_property(skip_os=True)
+ if searcher.newest_epc is None:
+ continue
+
+ epc = {
+ "asset_list_row_id": row["asset_list_row_id"],
+ **searcher.newest_epc.copy()
+ }
+
+ epc_data.append(epc)
+
+ # Remove None entries
+ epc_data = [x for x in epc_data if x is not None]
+ # Save the data in S3 as a parquet
+ epc_data_df = pd.DataFrame(epc_data)
+ save_pickle_to_s3(
+ data=epc_data_df,
+ bucket_name="retrofit-datalake-dev",
+ s3_file_name=f"ha-analysis/revised/{ha}/epc_data.pickle"
+ )
+ except Exception as e:
+ failed_has.append(ha)
+
+
+def extract_lower_bound(age_band):
+ if pd.isna(age_band):
+ return 1930
+ try:
+ return int(age_band.split(':')[1].split('-')[0].strip())
+ except (ValueError, IndexError):
+ return 1930
+
+
+def classify_loft(x):
+ # high confidence
+ if float(x["roof_insulation_thickness"]) <= 100:
+ return "high"
+
+ if float(x["roof_insulation_thickness"]) <= 200:
+ return "medium"
+
+ if float(x["roof_insulation_thickness"]) <= 270 and x["epc_age"] >= 5 * 365:
+ return "medium"
+
+ return "unlikely"
+
+
+def fml_analysis(loader):
+ assumed_ciga_pass_rate = 0.731
+ has_bruh = [
+ "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", "HA13",
+ "HA50", "HA24", "HA15", "HA32", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12",
+ "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49",
+ 'HA8', 'HA11', 'HA21', 'HA37', 'HA42', 'HA44', 'HA45', 'HA51', 'HA52', "HA17", "HA5", "HA20",
+ ]
+
+ no_ciga_cavity_descriptions = [
+ "Cavity wall, as built, insulated (assumed)",
+ "Cavity wall, as built, no insulation (assumed)",
+ "Cavity wall, as built, partial insulation (assumed)",
+ "Cavity wall, no insulation (assumed)",
+ "Cavity wall, partial insulation (assumed)",
+ "Cavity wall,",
+ "Cavity wall, insulated (assumed)",
+ "Cavity wall, no insulation (assumed)",
+ "Cavity wall, as built, insulated (assumed)",
+ "Cavity wall, partial insulation (assumed)",
+ ]
+
+ # TODO: There will be some properties that are subject to CIGA that do not look like they ned a CIGA check! pass
+ # them! Non-invasices will have checked the wall though
+
+ results = []
+ wall_descriptions = []
+ for ha_name in tqdm(has_bruh):
+
+ original_figures = loader.december_figures[
+ loader.december_figures["HA Name"] == ha_name
+ ].copy()
+ original_remaining = original_figures["ECO4 remaining"].values[0]
+ original_gbis_remaining = original_figures["GBIS remaining"].values[0]
+
+ # Read in the epc data
+ asset_list = loader.data[ha_name]["asset_list"].copy()
+ # properties found as eligibile
+ fml = asset_list[asset_list["ECO Eligibility"] != "not eligible"]
+ epc_data = read_pickle_from_s3(
+ bucket_name="retrofit-datalake-dev",
+ s3_file_name=f"ha-analysis/revised/{ha_name}/epc_data.pickle"
+ )
+ # We make sure we don't have duplicated. We do a super basic drop duplicates because it shouldn't be a huge
+ # issue at this point
+ epc_data = epc_data.drop_duplicates("uprn")
+ wall_descriptions.extend(epc_data["walls-description"].unique().tolist())
+
+ # time from the inspection to now
+ epc_data["epc_age"] = (datetime.now() - pd.to_datetime(epc_data["inspection-date"])).dt.days
+ if "estimated" not in epc_data.columns:
+ # For all after HA7, we don't use estimated surveys
+ epc_data["estimated"] = False
+
+ fuck_this = fml.merge(
+ epc_data, how="left", on="asset_list_row_id"
+ )
+ fuck_this["estimated"] = fuck_this["estimated"].fillna(True)
+ if fuck_this.shape[0] != fml.shape[0]:
+ raise Exception("What the fuck bruv")
+
+ # Take just remaining
+ if not loader.data[ha_name]["survey_list"].empty:
+ survey_list = (
+ loader.data[ha_name]["survey_list"][
+ ~pd.isnull(loader.data[ha_name]["survey_list"]["asset_list_row_id"])
+ ]
+ )
+ fuck_this = fuck_this.merge(
+ survey_list[["asset_list_row_id", "installation_status"]],
+ how="left",
+ on="asset_list_row_id"
+ )
+ # Anything that has an installation has gone to installation, and therefore is not remaining
+ fuck_this = fuck_this[pd.isnull(fuck_this["installation_status"])]
+ fuck_this = fuck_this.drop(columns=["installation_status"])
+
+ insulation_thicknesses = []
+ for _, x in fuck_this.iterrows():
+ if pd.isnull(x["roof-description"]):
+ continue
+ if x["roof-description"] == "SAP05:Roof":
+ continue
+
+ thickness = RoofAttributes(x["roof-description"]).process()["insulation_thickness"]
+ # If there is a + in the thickness, strip it out
+ thickness = str(thickness).replace("+", "")
+ insulation_thicknesses.append(
+ {'uprn': x["uprn"], "roof_insulation_thickness": thickness}
+ )
+ insulation_thicknesses = pd.DataFrame(insulation_thicknesses)
+
+ before_merge_shape = fuck_this.shape[0]
+ fuck_this = fuck_this.merge(insulation_thicknesses, how="left", on="uprn")
+
+ if fuck_this.shape[0] != before_merge_shape:
+ raise Exception("SOMETHING WENT WRONG")
+
+ if any(fuck_this["ECO Eligibility"].str.contains("subject to archetype")):
+ # We perform the archetype test. If the property is a house, we it needs to be detached, semi-detached
+ # or end terrace. If it's a bungalow, it must be attached
+ fuck_this["passes_archetype"] = None
+ fuck_this["passes_archetype"] = np.where(
+ (fuck_this["property-type"] == "House") &
+ (fuck_this["built-form"].isin(["Semi-Detached", "End-Terrace", "Detached"])),
+ True,
+ fuck_this["passes_archetype"]
+ )
+
+ fuck_this["passes_archetype"] = np.where(
+ (fuck_this["property-type"] == "Bungalow") &
+ (fuck_this["built-form"].isin(["Detached"])),
+ True,
+ fuck_this["passes_archetype"]
+ )
+
+ fuck_this["ECO Eligibility"] = np.where(
+ (fuck_this["ECO Eligibility"] == "eco4 (subject to ciga) (subject to archetype)") &
+ (fuck_this["passes_archetype"] == True),
+ "eco4 (subject to ciga)",
+ fuck_this["ECO Eligibility"]
+ )
+
+ # If failed the archetype check and needs a CIGA, it's not eligibile
+ fuck_this["ECO Eligibility"] = np.where(
+ (fuck_this["ECO Eligibility"] == "eco4 (subject to ciga) (subject to archetype)") &
+ (fuck_this["passes_archetype"] != True),
+ "not eligible",
+ fuck_this["ECO Eligibility"]
+ )
+
+ fuck_this["ECO Eligibility"] = np.where(
+ (fuck_this["ECO Eligibility"] == "eco4 (subject to archetype)") &
+ (fuck_this["passes_archetype"] == True),
+ "eco4",
+ fuck_this["ECO Eligibility"]
+ )
+
+ fuck_this["ECO Eligibility"] = np.where(
+ (fuck_this["ECO Eligibility"] == "eco4 (subject to archetype)") &
+ (fuck_this["passes_archetype"] != True),
+ "gbis",
+ fuck_this["ECO Eligibility"]
+ )
+
+ if any(fuck_this["ECO Eligibility"].str.contains("subject to archetype")):
+ raise Exception("DO THE DAMN ARCHETYPE CHECK BRO")
+
+ # clean roof insulation
+ fuck_this["roof_insulation_thickness"] = fuck_this["roof_insulation_thickness"].fillna("0")
+ fuck_this["roof_insulation_thickness"] = fuck_this[
+ "roof_insulation_thickness"
+ ].str.replace("below average", "50")
+ fuck_this["roof_insulation_thickness"] = fuck_this[
+ "roof_insulation_thickness"
+ ].str.replace("None", "0")
+ fuck_this["roof_insulation_thickness"] = fuck_this[
+ "roof_insulation_thickness"
+ ].str.replace("none", "0")
+ fuck_this["roof_insulation_thickness"] = fuck_this[
+ "roof_insulation_thickness"
+ ].str.replace("average", "150")
+ fuck_this["roof_insulation_thickness"] = fuck_this[
+ "roof_insulation_thickness"
+ ].str.replace("above 150", "150")
+
+ fuck_this["roof_classiciation"] = fuck_this.apply(lambda x: classify_loft(x), axis=1)
+
+ had_survey = fuck_this[fuck_this["estimated"] == False]
+
+ # proportion with a survey:
+ proportion_with_survey = 100 * had_survey.shape[0] / fuck_this.shape[0]
+
+ # Let's look just at the ECO4 business
+ # For things that had a survey, take the properties that didn't need a CIGA check
+ no_ciga_check_needed = had_survey[
+ had_survey["ECO Eligibility"] == "eco4"
+ ]
+
+ no_ciga_check_needed_eligible = no_ciga_check_needed[
+ (no_ciga_check_needed["walls-description"].isin(no_ciga_cavity_descriptions)) &
+ (no_ciga_check_needed["roof_classiciation"].isin(["high", "medium"])) &
+ (no_ciga_check_needed["current-energy-efficiency"].astype(float) <= 80)
+ ]
+
+ # For anything not needing a CIGA check, some of it will be GBIS
+ no_ciga_check_needed_eligible_gbis = no_ciga_check_needed[
+ (no_ciga_check_needed["walls-description"].isin(no_ciga_cavity_descriptions)) &
+ (no_ciga_check_needed["current-energy-efficiency"].astype(float) <= 80) &
+ (~no_ciga_check_needed["asset_list_row_id"].isin(no_ciga_check_needed_eligible["asset_list_row_id"].values))
+ ]
+
+ # Characterise no CIGA check needed
+ ciga_check_passed = had_survey[had_survey["ECO Eligibility"] == "eco4 - passed ciga"]
+ # These should be treated the same as one that have passed their ciga checks, from a detection perspective
+ ciga_check_passed_eligible = ciga_check_passed[
+ (ciga_check_passed["walls-description"].str.lower().str.contains("cavity") == True) &
+ (ciga_check_passed["roof_classiciation"].isin(["high", "medium"])) &
+ (ciga_check_passed["current-energy-efficiency"].astype(float) <= 80)
+ ]
+
+ if not loader.data[ha_name]["ciga_list"].empty:
+
+ proportions = loader.data[ha_name]["ciga_list"]["Guarantee"].value_counts(normalize=True)
+ ha_ciga_pass_rate = proportions[proportions.index == "No"].values[0]
+
+ else:
+ ha_ciga_pass_rate = assumed_ciga_pass_rate
+
+ # We take just the cavity walls
+ # UCL paper: https://discovery.ucl.ac.uk/id/eprint/10110371/
+ # This paper is based on London properties
+ # The proportion of EPCs with building characteristics errors are shown to
+ # differ between variables; floor and wall type errors occur in ~10-15% of EPCs,
+ # compared with ~5% for wall insulation and glazing performance
+
+ ciga_check_needed = had_survey[
+ had_survey["ECO Eligibility"].str.contains("subject to ciga")
+ ].copy()
+
+ ciga_check_needed_eligible = ciga_check_needed[
+ (ciga_check_needed["walls-description"].str.lower().str.contains("cavity") == True) &
+ (ciga_check_needed["roof_classiciation"].isin(["high", "medium"])) &
+ (ciga_check_needed["current-energy-efficiency"].astype(float) <= 80)
+ ]
+
+ # Finally, characterise gbis properties. Some of the business might look like ECO4 work, whereas we then
+ # qualify what actually looks like gbis
+ gbis_identified = had_survey[
+ had_survey["ECO Eligibility"] == "gbis"
+ ].copy()
+
+ gbis_looks_like_eco4 = gbis_identified[
+ (gbis_identified["walls-description"].isin(no_ciga_cavity_descriptions)) &
+ (gbis_identified["roof_classiciation"].isin(["high", "medium"])) &
+ (gbis_identified["current-energy-efficiency"].astype(float) <= 80) &
+ (
+ (
+ (gbis_identified["property-type"] == "House") &
+ (gbis_identified["built-form"] != "Mid-Terrace")
+ ) | (
+ (gbis_identified["property-type"] == "Bungalow") &
+ (gbis_identified["built-form"].isin(["Detached"]))
+ )
+ )
+ ]
+
+ gbis_qualified = gbis_identified[
+ (gbis_identified["walls-description"].isin(no_ciga_cavity_descriptions)) &
+ (gbis_identified["current-energy-efficiency"].astype(float) <= 80) &
+ (~gbis_identified["asset_list_row_id"].isin(gbis_looks_like_eco4["asset_list_row_id"].values))
+ ]
+
+ ciga_check_expectation = np.round(ciga_check_needed_eligible.shape[0] * ha_ciga_pass_rate)
+ without_ciga_expectation = no_ciga_check_needed_eligible.shape[0]
+ passed_ciga_expectation = ciga_check_passed_eligible.shape[0]
+ identified_as_gbis_looks_like_eco4 = gbis_looks_like_eco4.shape[0]
+
+ # Need to add on the non-ciga
+ total_eco4_expectation = (
+ ciga_check_expectation +
+ without_ciga_expectation +
+ passed_ciga_expectation +
+ identified_as_gbis_looks_like_eco4
+ )
+
+ no_ciga_check_needed_actually_gbis = no_ciga_check_needed_eligible_gbis.shape[0]
+ gbis_qualified = gbis_qualified.shape[0]
+
+ total_gbis_expectation = no_ciga_check_needed_actually_gbis + gbis_qualified
+
+ if proportion_with_survey < 100:
+ # We estimate the rest
+ without_survey_needing_ciga = fuck_this[
+ (fuck_this["estimated"] == True) &
+ (fuck_this["ECO Eligibility"].str.contains("subject to ciga") == True)
+ ]
+
+ if without_survey_needing_ciga.empty:
+ without_survey_without_ciga_expected = 0
+ else:
+ # We apply the same conversion rate as the properties with a survey
+
+ if ciga_check_needed.shape[0] == 0 and ciga_check_expectation == 0:
+ without_survey_without_ciga_expected = without_survey_needing_ciga.shape[0]
+ else:
+ without_survey_without_ciga_expected = np.round(
+ without_survey_needing_ciga.shape[0] * (ciga_check_expectation / ciga_check_needed.shape[0])
+ )
+
+ without_survey_passed_ciga = fuck_this[
+ (fuck_this["estimated"] == True) &
+ (fuck_this["ECO Eligibility"] == "eco4 - passed ciga")
+ ]
+
+ if without_survey_passed_ciga.empty:
+ without_survey_passed_ciga_expected = 0
+ else:
+ # We apply the same conversion rate as the properties with a survey
+ without_survey_passed_ciga_expected = np.round(
+ without_survey_passed_ciga.shape[0] * (passed_ciga_expectation / ciga_check_passed.shape[0])
+ )
+
+ # Finally, no ciga needed
+ without_survey_eco4 = fuck_this[
+ (fuck_this["estimated"] == True) &
+ (fuck_this["ECO Eligibility"] == "eco4")
+ ]
+
+ if without_survey_eco4.empty:
+ without_survey_eco4_expected = 0
+ without_survey_gbis_expected = 0
+ else:
+ # We apply the same conversion rate as the properties with a survey
+ without_survey_eco4_expected = np.round(
+ without_survey_eco4.shape[0] * (without_ciga_expectation / no_ciga_check_needed.shape[0])
+ )
+
+ without_survey_gbis_expected = np.round(
+ without_survey_eco4.shape[0] * (total_gbis_expectation / no_ciga_check_needed.shape[0])
+ )
+
+ # And gbis
+ without_survey_gbis = fuck_this[
+ (fuck_this["estimated"] == True) &
+ (fuck_this["ECO Eligibility"] == "gbis")
+ ]
+
+ if without_survey_gbis.empty:
+ without_survey_identified_as_gbis_qualified = 0
+ without_survey_identified_as_gbis_eco4 = 0
+ else:
+ # We apply the same conversion rate as the properties with a survey
+ without_survey_identified_as_gbis_qualified = np.round(
+ without_survey_gbis.shape[0] * (gbis_qualified / gbis_identified.shape[0])
+ )
+
+ without_survey_identified_as_gbis_eco4 = np.round(
+ without_survey_eco4.shape[0] * (identified_as_gbis_looks_like_eco4 / gbis_identified.shape[0])
+ )
+
+ total_eco4_expectation = (
+ total_eco4_expectation +
+ without_survey_without_ciga_expected +
+ without_survey_passed_ciga_expected +
+ without_survey_eco4_expected +
+ without_survey_identified_as_gbis_eco4
+ )
+
+ total_gbis_expectation = (
+ total_gbis_expectation +
+ without_survey_gbis_expected +
+ without_survey_identified_as_gbis_qualified
+ )
+
+ results.append(
+ {
+ "HA Name": ha_name,
+ "Original ECO4 Estimate - Remaining": original_remaining,
+ "Original GGBIS Estimate - Remaining": original_gbis_remaining,
+ # "Postcode List - Remaining": postcode_list_remaining,
+ # "Of which sold": sales_since_nov,
+ "EPC verified ECO4 Eligible - Remaining": int(total_eco4_expectation),
+ "EPC verified GBIS Eligibile - Remaining": int(total_gbis_expectation),
+ }
+ )
+
+ results_df = pd.DataFrame(results)
+ results_df.to_csv("analysis - revised.csv")
+
+ # results_df["Delta vs November"] = 100 * (
+ # results_df["Of which ECO4 Eligible - Remaining"] - results_df["Original ECO4 Estimate - Remaining"]
+ # ) / results_df["Original ECO4 Estimate - Remaining"]
+
+ # TODO: Add in estimated GBIS (for eco jobs, of which look like gbis)
+ # TODO: Change the left hand side number for our post CIGA estimates
+
+
+def create_final_report():
+ """
+ This function will produce the final output for the HA analysis
+ :return:
+ """
+ epc_validated_results = pd.read_csv("analysis - revised.csv")
+ pipeline_results = pd.read_csv("pipeline_remaining_raw.csv")
+
+ ####################################
+ # Original Warmfront estimates
+ ####################################
+ # Create the volumes result
+ all_ha_summary_remaining = pipeline_results[
+ [
+ "('', '', '', 'HA Name')",
+ "('ECO4 original', '', 'Remaining - #', '')",
+ "('GBIS original', '', 'Remaining - #', '')",
+ ]
+ ].copy().rename(
+ columns={
+ "('', '', '', 'HA Name')": "HA Name",
+ "('ECO4 original', '', 'Remaining - #', '')": "# ECO4 remaining - All HA Summary",
+ "('GBIS original', '', 'Remaining - #', '')": "# GBIS remaining - All HA Summary",
+ }
+ )
+ all_ha_summary_remaining["# Total remaining - All HA Summary"] = (
+ all_ha_summary_remaining["# ECO4 remaining - All HA Summary"] +
+ all_ha_summary_remaining["# GBIS remaining - All HA Summary"]
+ )
+ all_ha_summary_remaining = all_ha_summary_remaining.sort_values("HA Name")
+
+ ####################################
+ # Postcode list - pre-CIGA
+ ####################################
+ postcode_list_pre_ciga_remaining = pipeline_results[
+ [
+ "('', '', '', 'HA Name')",
+ "('ECO4 pre-ciga', '', 'Remaining - #', '')",
+ "('GBIS Postcode list', 'Warmfront post code list', 'Remaining - #', 'GBIS total')",
+ ]
+ ].copy().rename(
+ columns={
+ "('', '', '', 'HA Name')": "HA Name",
+ "('ECO4 pre-ciga', '', 'Remaining - #', '')": "# ECO4 remaining - Postcode list (pre CIGA)",
+ "('GBIS Postcode list', 'Warmfront post code list', 'Remaining - #', 'GBIS total')": (
+ "# GBIS remaining - Postcode list (pre CIGA)"
+ ),
+ }
+ )
+
+ postcode_list_pre_ciga_remaining["# Total remaining - Postcode list (pre CIGA)"] = (
+ postcode_list_pre_ciga_remaining["# ECO4 remaining - Postcode list (pre CIGA)"] +
+ postcode_list_pre_ciga_remaining["# GBIS remaining - Postcode list (pre CIGA)"]
+ )
+ postcode_list_pre_ciga_remaining = postcode_list_pre_ciga_remaining.sort_values("HA Name")
+
+ ####################################
+ # Postcode list - post-CIGA
+ ####################################
+ postcode_list_post_ciga_remaining = pipeline_results[
+ [
+ "('', '', '', 'HA Name')",
+ "('ECO4 post-ciga', '', 'Estimated remaining eligible - #', '')",
+ "('GBIS Postcode list', 'Warmfront post code list', 'Remaining - #', 'GBIS total')",
+ ]
+ ].copy().rename(
+ columns={
+ "('', '', '', 'HA Name')": "HA Name",
+ "('ECO4 post-ciga', '', 'Estimated remaining eligible - #', '')":
+ "# ECO4 remaining - Postcode list (post CIGA)",
+ "('GBIS Postcode list', 'Warmfront post code list', 'Remaining - #', 'GBIS total')": (
+ "# GBIS remaining - Postcode list (post CIGA)"
+ ),
+ }
+ )
+
+ postcode_list_post_ciga_remaining["# Total remaining - Postcode list (post CIGA)"] = (
+ postcode_list_post_ciga_remaining["# ECO4 remaining - Postcode list (post CIGA)"] +
+ postcode_list_post_ciga_remaining["# GBIS remaining - Postcode list (post CIGA)"]
+ )
+ postcode_list_post_ciga_remaining = postcode_list_post_ciga_remaining.sort_values("HA Name")
+
+ ####################################
+ # From EPC Database
+ ####################################
+ from_epc_database = epc_validated_results[
+ [
+ "HA Name",
+ "EPC verified ECO4 Eligible - Remaining",
+ "EPC verified GBIS Eligibile - Remaining"
+ ]
+ ].copy().rename(
+ columns={
+ "EPC verified ECO4 Eligible - Remaining": "# ECO4 remaining - From EPC Database (post CIGA)",
+ "EPC verified GBIS Eligibile - Remaining": "# GBIS remaining - From EPC Database (post CIGA)",
+ }
+ )
+
+ from_epc_database["# Total remaining - From EPC Database (post CIGA)"] = (
+ from_epc_database["# ECO4 remaining - From EPC Database (post CIGA)"] +
+ from_epc_database["# GBIS remaining - From EPC Database (post CIGA)"]
+ )
+ from_epc_database = from_epc_database.sort_values("HA Name")
+
+ # Combine the datasets
+ volumes = all_ha_summary_remaining.merge(
+ postcode_list_pre_ciga_remaining, how="left", on="HA Name"
+ ).merge(
+ postcode_list_post_ciga_remaining, how="left", on="HA Name"
+ ).merge(
+ from_epc_database, how="inner", on="HA Name"
+ )
+
+ revenue = volumes.copy()
+ # Convert the ECO4 volumes to revenue
+ for col in [
+ '# ECO4 remaining - All HA Summary',
+ '# ECO4 remaining - Postcode list (pre CIGA)',
+ '# ECO4 remaining - Postcode list (post CIGA)',
+ '# ECO4 remaining - From EPC Database (post CIGA)'
+ ]:
+ revenue[col] = revenue[col] * 1710
+
+ # Convert the GBIS volumes to revenue
+ for col in [
+ '# GBIS remaining - All HA Summary',
+ '# GBIS remaining - Postcode list (pre CIGA)',
+ '# GBIS remaining - Postcode list (post CIGA)',
+ '# GBIS remaining - From EPC Database (post CIGA)'
+ ]:
+ revenue[col] = revenue[col] * 600
+
+ # Re-calculate the totals
+ revenue['# Total remaining - All HA Summary'] = (
+ revenue['# ECO4 remaining - All HA Summary'] + revenue['# GBIS remaining - All HA Summary']
+ )
+
+ revenue['# Total remaining - Postcode list (pre CIGA)'] = (
+ revenue['# ECO4 remaining - Postcode list (pre CIGA)'] + revenue['# GBIS remaining - Postcode list (pre CIGA)']
+ )
+
+ revenue['# Total remaining - Postcode list (post CIGA)'] = (
+ revenue['# ECO4 remaining - Postcode list (post CIGA)'] + revenue[
+ '# GBIS remaining - Postcode list (post CIGA)']
+ )
+
+ revenue['# Total remaining - From EPC Database (post CIGA)'] = (
+ revenue['# ECO4 remaining - From EPC Database (post CIGA)'] +
+ revenue['# GBIS remaining - From EPC Database (post CIGA)']
+ )
+
+ # Replace the # with £ in the columns
+ revnue_colnames = [col.replace("#", "£") for col in revenue.columns]
+ revenue.columns = revnue_colnames
+
+ # We check that each column gets smaller
+ decreasing_check1 = all(
+ volumes["# ECO4 remaining - Postcode list (pre CIGA)"] >= volumes[
+ '# ECO4 remaining - Postcode list (post CIGA)']
+ )
+ if not decreasing_check1:
+ raise ValueError("decreasing_check1 failed")
+
+ # Just HA32 and HA17 should fail this, and it's due to GBIS jobs looking like ECO4
+ decreasing_check2 = volumes[volumes["# ECO4 remaining - From EPC Database (post CIGA)"] > volumes[
+ "# ECO4 remaining - Postcode list (post CIGA)"]]
+
+ if set(decreasing_check2["HA Name"].tolist()) != {"HA17", "HA32"}:
+ raise ValueError("decreasing_check2 failed")
+
+ # Check for GBIS
+ decreasing_check3 = all(
+ volumes["# GBIS remaining - Postcode list (pre CIGA)"] >= volumes[
+ '# GBIS remaining - Postcode list (post CIGA)']
+ )
+
+ if not decreasing_check3:
+ raise ValueError("decreasing_check3 failed")
+
+ # Don't perform this - this happens for multiple
+ # decreasing_check4 = volumes[volumes["# GBIS remaining - From EPC Database (post CIGA)"] > volumes[
+ # "# GBIS remaining - Postcode list (post CIGA)"]]
+
+ # Store final outputs
+ volumes.to_csv("HA Analysis Final - volumes.csv")
+ revenue.to_csv("HA Analysis Final - revenue.csv")
+
+
+def identify_eco_works(loader):
+ # ha_names = [
+ # "HA16", # For Housing
+ # "HA39", # Rooftop
+ # "HA41", # Settle
+ # "HA23", # Lambeth
+ # "HA14", # EMH
+ # "HA7", # Believe
+ # "HA102", # Thrive
+ # ]
+
+ # Unitas, fairhive, acis, LHP
+ ha_names = [
+ "HA50", # Unitas
+ "HA15", # Fairhive
+ "HA107", # ACIS
+ "HA24", # LHP
+ ]
+ names = {
+ "HA50": "Unitas",
+ "HA15": "Fairhive",
+ "HA107": "ACIS",
+ "HA24": "LHP"
+ }
+
+ # gbis rate
+ breakdowns = []
+ # lists = {}
+ for ha, data_assets in loader.data.items():
+ if ha not in ha_names:
+ continue
+
+ asset_list = data_assets["asset_list"].copy()
+ survey_list = data_assets["survey_list"].copy()
+ # Remove things that have sold
+ if not survey_list.empty:
+ asset_list = asset_list.merge(
+ survey_list[["asset_list_row_id", "installation_status"]],
+ how="left",
+ on="asset_list_row_id"
+ )
+ # Anything that has an installation has gone to installation, and therefore is not remaining
+ asset_list = asset_list[pd.isnull(asset_list["installation_status"])]
+ asset_list = asset_list.drop(columns=["installation_status"])
+
+ # Needing a CIGA check
+ needs_cga = asset_list[
+ asset_list["ECO Eligibility"] == "eco4 (subject to ciga)"
+ ].copy()
+
+ eco4 = asset_list[
+ asset_list["ECO Eligibility"] == "eco4"
+ ].copy()
+
+ eco4_passed_ciga = asset_list[
+ asset_list["ECO Eligibility"] == "eco4 - passed ciga"
+ ].copy()
+
+ # lists[ha] = {
+ # "needs_cga": needs_cga,
+ # "eco4": eco4,
+ # "eco4_passed_ciga": eco4_passed_ciga
+ # }
+
+ # Store the data
+ if not needs_cga.empty:
+ needs_cga.to_csv(f"local_data/{names[ha]} - needs ciga.csv")
+
+ if not eco4.empty:
+ eco4.to_csv(f"local_data/{names[ha]} - eco4.csv")
+
+ if not eco4_passed_ciga.empty:
+ eco4_passed_ciga.to_csv(f"local_data/{names[ha]} - eco4 passed ciga.csv")
+
+ summary = {
+ "HA Name": ha,
+ "n_needing_ciga": needs_cga.shape[0],
+ "eco4": eco4.shape[0],
+ "eco4_passed_ciga": eco4_passed_ciga.shape[0]
+ }
+
+ breakdowns.append(summary)
+ breakdowns = pd.DataFrame(breakdowns)
+ breakdowns = breakdowns.fillna(0)
+
+
+def unitas_data_prep(loader):
+ #####
+ # Adhoc - for UNITAS, stripping out additional surveys that have been completed
+ unitas_data = loader.data["HA50"].copy()
+ unitas_asset_list = unitas_data["asset_list"].copy()
+ unitas_survey_sheet = unitas_data["survey_list"].copy()
+
+ # We remove the surveyed properties from the asset sheet
+ unitas_survey_sheet = unitas_survey_sheet[~pd.isnull(unitas_survey_sheet["asset_list_row_id"])]
+ unitas_asset_list = unitas_asset_list.merge(
+ unitas_survey_sheet[["asset_list_row_id", "installation_status"]],
+ how="left",
+ on="asset_list_row_id"
+ )
+ unitas_asset_list = unitas_asset_list[pd.isnull(unitas_asset_list["installation_status"])]
+ unitas_asset_list = unitas_asset_list.drop(columns=["installation_status"])
+
+ # We read in the data for the further completed surveys
+ unitas_phase_1_workbook = openpyxl.load_workbook(
+ "local_data/ha_data/UNITAS ( STOKE) MASTER ROLLING SHEET UPDATED 8.4.24 K - no password.xlsx"
+ )
+ phase_1_worksheet = unitas_phase_1_workbook["ECO 4 - PHASE 1"]
+ phase_2_worksheet = unitas_phase_1_workbook["ECO4 - PHASE 2"]
+ phase1_colnames = [cell.value for cell in phase_1_worksheet[1]]
+ phase_1_rows_data = []
+ for row in phase_1_worksheet.iter_rows(min_row=2, values_only=False):
+ row_data = [cell.value for cell in row] # This will get you the cell values
+ phase_1_rows_data.append(row_data)
+
+ phase_1_surveys = pd.DataFrame(phase_1_rows_data, columns=phase1_colnames)
+
+ # Correct phase 1 surveys in the same fashion as the previous approach
+ phase_1_surveys = DataLoader.correct_ha50_survey_list(phase_1_surveys.copy())
+
+ # We check all phase 1 surveys are contained in the data we had before
+ additional = []
+ for _, row in tqdm(phase_1_surveys.iterrows(), total=len(phase_1_surveys)):
+ # We look for the entry in the old survey sheet:
+ # matched_uprn = unitas_survey_sheet[unitas_survey_sheet["EPR UPRN NUMBER"] == row["UPRN"]]
+ # if matched_uprn.shape[0] == 1:
+ # continue
+
+ matched_1 = unitas_survey_sheet[
+ (unitas_survey_sheet["Post Code"] == row["Post Code"]) &
+ (unitas_survey_sheet["NO."] == row["NO."])
+ ]
+
+ if matched_1.shape[0] == 1:
+ continue
+
+ matched_2 = unitas_survey_sheet[
+ (unitas_survey_sheet["Street / Block Name"] == row["Street / Block Name"]) &
+ (unitas_survey_sheet["NO."] == row["NO."])
+ ]
+
+ if matched_2.shape[0] == 1:
+ continue
+
+ additional.append(row.to_dict())
+ additional = pd.DataFrame(additional)
+
+ phase_2_rows_data = []
+ for row in phase_2_worksheet.iter_rows(min_row=2, values_only=False):
+ row_data = [cell.value for cell in row] # This will get you the cell values
+ phase_2_rows_data.append(row_data)
+
+ phase2_colnames = [cell.value for cell in phase_2_worksheet[1]]
+ phase_2_surveys = pd.DataFrame(phase_2_rows_data, columns=phase2_colnames)
+ # Drop all of the occurances of "OFFICE USE ONLY" columns
+ phase_2_surveys = phase_2_surveys.drop(columns=[c for c in phase_2_surveys.columns if "OFFICE USE ONLY" in c])
+ common_columns = list({c for c in phase_2_surveys.columns if c in additional.columns})
+ additional_filtered = additional[common_columns]
+
+ further_unitas_completed_surveys = pd.concat(
+ [phase_2_surveys, additional_filtered],
+ axis=0,
+ ignore_index=True
+ )
+
+ # Add a phase 2 key
+ further_unitas_completed_surveys["survey_list_row_id"] = [
+ "unitas_phase_2" + str(i) for i in further_unitas_completed_surveys.index
+ ]
+
+ not_in_asset_list = [
+ "unitas_phase_20", "unitas_phase_234", "unitas_phase_2163", "unitas_phase_2173", "unitas_phase_2374"
+ ]
+
+ additional_postcodes = ["st28bg"]
+
+ full_asset_list = unitas_data["asset_list"].copy()
+ full_asset_list["matching_postcode"] = full_asset_list["matching_postcode"].str.lower().str.replace(" ", "")
+ further_unitas_completed_surveys["Post Code"] = further_unitas_completed_surveys["Post Code"].str.replace(
+ "ST 5DT", "ST3 5DT"
+ )
+
+ # We match these back to the asset list
+ matching_lookup = []
+ for _, row in tqdm(further_unitas_completed_surveys.iterrows(), total=len(further_unitas_completed_surveys)):
+
+ if row["survey_list_row_id"] in not_in_asset_list:
+ continue
+
+ postcode_lower = row["Post Code"].lower().strip().replace(" ", "")
+ if postcode_lower in additional_postcodes:
+ continue
+
+ # Confirmed not in asset lsit
+ # Filter asset list on postcode
+ df = full_asset_list[
+ full_asset_list["matching_postcode"].str.contains(postcode_lower)
+ ]
+
+ df = df[df["HouseNo"] == str(row["NO."])]
+
+ if df.shape[0] != 1:
+ raise Exception("NOT FOUND")
+
+ matching_lookup.append(
+ {
+ "survey_list_row_id": row["survey_list_row_id"],
+ "asset_list_row_id": df["asset_list_row_id"].values[0],
+ }
+ )
+
+ matching_lookup = pd.DataFrame(matching_lookup)
+ matching_lookup["phase_2_surveyed"] = True
+
+ # We merge this onto the asset list and remove the rows
+ unitas_asset_list = unitas_asset_list.merge(
+ matching_lookup, how="left", on="asset_list_row_id"
+ )
+ # Drop rows where phase_2_surveyed is populated
+ unitas_asset_list = unitas_asset_list[
+ pd.isnull(unitas_asset_list["phase_2_surveyed"])
+ ]
+
+ # We add in the new CIGA submissions
+ unitas_round_2_ciga_workbook = openpyxl.load_workbook("local_data/ha_data/Unitas second round CIGA checks.xlsx")
+ ciga_round_2_worksheet = unitas_round_2_ciga_workbook["Worksheet"]
+ ciga_round_2_colnames = [cell.value for cell in ciga_round_2_worksheet[1]]
+ round_2_rows_data = []
+ for row in ciga_round_2_worksheet.iter_rows(min_row=2, values_only=False):
+ row_data = [cell.value for cell in row] # This will get you the cell values
+ round_2_rows_data.append(row_data)
+
+ ciga_round_2 = pd.DataFrame(round_2_rows_data, columns=ciga_round_2_colnames)
+ # We merge the ciga sheet to the asset list
+ ciga_dependent_asset_list = unitas_asset_list[
+ unitas_asset_list["ECO Eligibility"].str.contains("subject to ciga")
+ ].copy()
+
+ # We merge the ciga sheet to the asset list
+ ciga_round_2_matched = ciga_dependent_asset_list.merge(
+ ciga_round_2, how="inner", on=["Address Line 1", "Post Code"]
+ )
+ # Filter on just the properties that had no guarantee
+ ciga_round_2_matched = ciga_round_2_matched[ciga_round_2_matched["Guarantee"] == "No"]
+
+ # ECO Eligibility
+ # not eligible 9227
+ # failed ciga 2711
+ # eco4 (subject to ciga) 2238
+ # eco4 - passed ciga 901
+ # gbis 114
+ # eco4 91
+
+ # We filter on the properties we're looking to re-survey
+ unitas_properties_to_survey = unitas_asset_list[
+ unitas_asset_list["ECO Eligibility"].isin(
+ [
+ "eco4 - passed ciga",
+ "eco4"
+ ]
+ )
+ ].copy()
+
+ unitas_properties_to_survey = pd.concat(
+ [
+ unitas_properties_to_survey,
+ ciga_round_2_matched[unitas_properties_to_survey.columns]
+ ]
+ )
+
+ epc_api_key = "a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA="
+
+ # We now retrieve the lastest EPC data
+ epc_data = []
+ for _, unitas_property in tqdm(unitas_properties_to_survey.iterrows(), total=len(unitas_properties_to_survey)):
+ property_type, _ = get_property_type_and_built_form(property_meta=unitas_property, ha_name="HA50")
+
+ full_address = unitas_property["matching_address"]
+
+ searcher = SearchEpc(
+ address1=str(unitas_property["HouseNo"]),
+ postcode=unitas_property["matching_postcode"],
+ auth_token=epc_api_key,
+ os_api_key="",
+ property_type=property_type,
+ full_address=full_address,
+ fast=True
+ )
+ # Force the skipping of estimating the EPC
+ searcher.ordnance_survey_client.property_type = None
+ searcher.ordnance_survey_client.built_form = None
+
+ searcher.find_property(skip_os=True)
+ if searcher.newest_epc is None:
+ continue
+
+ epc = {
+ "asset_list_row_id": unitas_property["asset_list_row_id"],
+ **searcher.newest_epc.copy()
+ }
+
+ epc_data.append(epc)
+
+ epc_df = pd.DataFrame(epc_data)
+ # Pull out just the columns we need
+ epc_df = epc_df[
+ [
+ "asset_list_row_id",
+ "address1", "postcode",
+ "current-energy-efficiency",
+ "current-energy-rating",
+ "inspection-date",
+ "transaction-type",
+ "built-form"
+ ]
+ ]
+
+ epc_df["EPC Rating"] = (
+ epc_df["current-energy-efficiency"].astype(str) +
+ epc_df["current-energy-rating"].astype(str)
+ )
+
+ # Merge onto the Unitas data:
+ unitas_properties_to_survey_full = unitas_properties_to_survey.merge(
+ epc_df[
+ [
+ "asset_list_row_id",
+ "EPC Rating",
+ "inspection-date",
+ "transaction-type",
+ "built-form"
+ ]
+ ],
+ how="left",
+ on="asset_list_row_id"
+ )
+
+ unitas_properties_to_survey_full["ECO Eligibility"] = unitas_properties_to_survey_full["ECO Eligibility"].replace(
+ "eco4 (subject to ciga)", "eco4 - passed ciga, phase 2 check"
+ )
+
+ for col in ["EPC Rating", "inspection-date", "transaction-type", "built-form"]:
+ unitas_properties_to_survey_full[col] = np.where(
+ pd.isnull(unitas_properties_to_survey_full[col]),
+ "No EPC found",
+ unitas_properties_to_survey_full[col]
+ )
+ unitas_properties_to_survey_full[col] = unitas_properties_to_survey_full[col].fillna(
+ "No EPC found"
+ )
+ unitas_properties_to_survey_full[col] = unitas_properties_to_survey_full[col].astype(str)
+
+ unitas_properties_to_survey_full = unitas_properties_to_survey_full.rename(
+ columns={
+ "inspection-date": "Last EPC Inspection Date",
+ "transaction-type": "Last EPC Reason",
+ "built-form": "Last EPC Built Form",
+ }
+ )
+
+ # We now match to the survey outcomes
+ unitas_survey_outcomes_workbook = openpyxl.load_workbook(
+ "local_data/ha_data/UNITAS - survey outcomes 26.03.2024.xlsx"
+ )
+ unitas_survey_outcomes_worksheet = unitas_survey_outcomes_workbook["OUTCOMES"]
+ unitas_outcomes_colnames = [cell.value for cell in unitas_survey_outcomes_worksheet[2]]
+ outcomes_rows_data = []
+ for row in unitas_survey_outcomes_worksheet.iter_rows(min_row=3, values_only=False):
+ row_data = [cell.value for cell in row] # This will get you the cell values
+ outcomes_rows_data.append(row_data)
+
+ unitas_outcomes = pd.DataFrame(outcomes_rows_data, columns=unitas_outcomes_colnames)
+ unitas_outcomes = unitas_outcomes.rename(
+ columns={
+ "Notes (If 'no answer' under outcomes, have you checked around the property for access "
+ "issues where possible?)": "Notes"
+ }
+ )
+
+ unitas_outcomes["Postcode"].unique()
+ eg1 = unitas_properties_to_survey_full[
+ (unitas_properties_to_survey_full["Post Code"] == "ST6 6RF")
+ ]
+ eg1_outcomes = unitas_outcomes[
+ (unitas_outcomes["Postcode"] == "ST6 6RF")
+ ]
+
+ # Merge outcomes onto properties to survey. Will probably have to do algorithmically
+ full_asset_list["matching_postcode_nospace"] = full_asset_list["matching_postcode"].str.lower().str.replace(" ", "")
+ outcome_matching = []
+ for _, outcome in tqdm(unitas_outcomes.iterrows(), total=len(unitas_outcomes)):
+ # We search for the corresponding entry in the asset list
+ postcode_lower = outcome["Postcode"].lower().strip().replace(" ", "")
+
+ # Confirmed not in asset lsit
+ # Filter asset list on postcode
+ df = unitas_properties_to_survey_full[
+ unitas_properties_to_survey_full["matching_postcode_nospace"].str.contains(postcode_lower)
+ ]
+
+ df = df[df["HouseNo"] == str(outcome["No."])]
+ if df.empty:
+ continue
+
+ if df.shape[0] == 1:
+ outcome_matching.append(
+ {
+ "asset_list_row_id": df["asset_list_row_id"].values[0],
+ **outcome.to_dict()
+ }
+ )
+ continue
+
+ raise Exception("something went wrong")
+ outcome_matching = pd.DataFrame(outcome_matching)
+
+ # We can have duplicate matches, so we format the Date letter sent column and retrieve the newest outcome
+ outcome_matching["Date letters sent"] = outcome_matching["Date letters sent"].str.lower()
+ outcome_matching["Extracted Date"] = outcome_matching["Date letters sent"].str.extract(
+ r'(?:w[./]c )(\d{2}\.\d{2}\.\d{4})')
+ outcome_matching["Extracted Date"] = pd.to_datetime(outcome_matching["Extracted Date"], format='%d.%m.%Y')
+ # We sort by asset_list_row_id and extracted date, and retrieve the newest
+ outcome_matching = outcome_matching.sort_values(["asset_list_row_id", "Extracted Date"], ascending=[True, False])
+
+ # Some properties will have multiple outcomes - for these, we re-format
+ outcome_matching_grouped = []
+ for asset_list_row_id, grouped_data in outcome_matching.groupby("asset_list_row_id"):
+ if grouped_data.shape[0] == 1:
+ outcome_matching_grouped.append(
+ {
+ "Number of previous visits": 1,
+ **grouped_data.to_dict("records")[0]
+ }
+ )
+ continue
+ if grouped_data.shape[0] == 2:
+ newest_visit = grouped_data.head(1)
+ oldest_visit = grouped_data.tail(1)[['Outcomes', 'Surveyor', 'Notes', 'Date letters sent']].add_suffix(
+ " second visit")
+ to_append = {
+ "Number of previous visits": 2,
+ **newest_visit.to_dict("records")[0],
+ **oldest_visit.to_dict("records")[0]
+ }
+ outcome_matching_grouped.append(to_append)
+ else:
+ raise Exception("something went wrong")
+
+ outcome_matching_grouped = pd.DataFrame(outcome_matching_grouped)
+
+ unitas_properties_to_survey_with_outcomes = unitas_properties_to_survey_full.merge(
+ outcome_matching_grouped, how="left", on="asset_list_row_id"
+ )
+ unitas_properties_to_survey_with_outcomes["Number of previous visits"] = (
+ unitas_properties_to_survey_with_outcomes["Number of previous visits"].fillna(0)
+ )
+
+ # Store as an excel
+ unitas_properties_to_survey_with_outcomes.to_excel("Unitas - phase 2 properties to Survey.xlsx")
+
+ unitas_properties_to_survey_with_outcomes["Last EPC Built Form"].value_counts()
+
+
+def app():
+ """
+ This app contains the housin association analysis for HAs 1, 6, 14, 39 and 107.
+ Only HA 6 has surveys
+ :return:
+ """
+
+ # Determines if we want to use the cached data in s3
+ use_cache = True
+ # Determines if we want to perform the data pull
+ pull_data = False
+ # Override to re-build all inputs
+ rebuild_inputs = False
+
+ # List all of the data in the folder
+ directories = [str(file) for entry in DATA_FOLDER.iterdir() if entry.is_dir()
+ for file in entry.iterdir() if file.suffix == '.xlsx']
+ # Grab the December HA figures filepath
+ december_figures_filepath = "local_data/ha_data/HA_December_figures.csv"
+
+ # Add in:
+ priority_has = [
+ "HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24",
+ "HA25", "HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54",
+ "HA56", "HA63", "HA107", "HA117", "HA8", "HA11", "HA21", "HA37", "HA42",
+ # Added as of March 18th
+ "HA44", "HA45", "HA51", "HA52", "HA17", "HA5", "HA20",
+ # New HAS
+ "HAXX", "HAXXX",
+ ]
+ # Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
+ # back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE],
+ # 35 [DONE], 56 [DONE], 19 [DONE], 18 [DONE], 9 [DONE], 27 [DONE], 34 [DONE], 30 [DONE], 31 [DONE], 54 [DONE]
#
- # # Re-do
- # res = []
- # for _, row in tqdm(datasets["results_df"].iterrows(), total=datasets["results_df"].shape[0]):
- # epc = {
- # "walls-description": row["walls"],
- # "roof-description": row["roof"],
- # "floor-description": "",
- # "tenure": "",
- # "current-energy-efficiency": row["sap"],
- # }
- # eligibility = Eligibility(epc=epc, cleaned=cleaned)
- # eligibility.check_eco4_warmfront()
- # res.append(
- # {
- # "row_id": row["row_id"],
- # "eligibility_cavity_type": eligibility.eco4_warmfront["cavity_type"],
- # "eligibility_loft_type": eligibility.eco4_warmfront["loft_type"]
- # }
- # )
- #
- # # Merge back on
- # res = pd.DataFrame(res)
- # datasets["results_df"] = datasets["results_df"].merge(res, how="left", on="row_id")
- #
- # # Re-save in s3
- # save_pickle_to_s3(
- # data={
- # "results_df": datasets["results_df"],
- # "scoring_df": datasets["scoring_df"],
- # "nodata": datasets["nodata"]
- # },
- # bucket_name="retrofit-datalake-dev",
- # s3_file_name=f"ha-analysis/{ha_name}/processed_results.pickle"
- # )
+ # Consider for ECO4:
+ # HA 70 - have to merge ECO3 list though,
+ # HA17 has LOTs of assets, but the asset list is a mess
+ # HA53 but has EPCs done
+ # Consider for GBIS:
+ # Ignore for now:
+ # 38 [problematic, but no ECO4], 10 problematic (no eligibility), 20 has barely any in
+ # Filter down the directories to only the priority HAs
+ directories = [d for d in directories if d.split("/")[2] in priority_has]
+
+ loader = DataLoader(directories, december_figures_filepath, use_cache, rebuild_inputs)
+ loader.load()
+ loader.ha_facts_and_figures()
+
+ forecast_remaining_sales(loader)
+
+ # Adhoc - for HA16, get the properties that still need a CIGA check
+ asset_list_ha16 = loader.data["HA16"]["asset_list"].copy()
+ ha_16_need_ciga = asset_list_ha16[
+ asset_list_ha16["ECO Eligibility"].str.contains("subject to ciga")
+ ]
+ completed_cigas = loader.data["HA16"]["ciga_list"].copy()
+ # Store the results
+ ha_16_need_ciga.to_csv("ha16_need_ciga.csv")
+ completed_cigas.to_csv("ha16_completed_cigas.csv")
+
+ # Adhoc - look at the current pipeline and identify how many dormant, CIGA dependent properties there are for
+ # live projects
+
+ # Read excel
+ orderbook_filepath = "local_data/ha_data/Warmfront HA client order book overview_20240129.xlsx"
+ orderbook_workbook = openpyxl.load_workbook(orderbook_filepath)
+ orderbook_sheet = orderbook_workbook["Contractual Info"]
+ orderbook_colnames = [cell.value for cell in orderbook_sheet[1]]
+
+ rows = []
+ for row in orderbook_sheet.iter_rows(min_row=2, values_only=False):
+ row_data = [cell.value for cell in row] # This will get you the cell values
+ rows.append(row_data)
+
+ orderbook = pd.DataFrame(rows, columns=orderbook_colnames)
+ live_orderbook = orderbook[orderbook["Live, New, or Historic?"] == "LIVE"].copy()
+ live_orderbook['Redacted HA'] = live_orderbook['Redacted HA'].str.replace(" ", "")
+
+ dormant_properties = []
+ missed_has = []
+ for _, customer in live_orderbook.iterrows():
+ if customer['Redacted HA'] not in loader.data.keys():
+ missed_has.append(customer['Redacted HA'])
+ continue
+ asset_list = loader.data[customer['Redacted HA']]["asset_list"].copy()
+ survey_list = loader.data[customer['Redacted HA']]["survey_list"].copy()
+ # Remove sold
+ if not survey_list.empty:
+ survey_list = survey_list[~pd.isnull(survey_list["asset_list_row_id"])]
+ asset_list = asset_list.merge(
+ survey_list[["asset_list_row_id", "installation_status"]],
+ how="left",
+ on="asset_list_row_id"
+ )
+ # Anything that has an installation has gone to installation, and therefore is not remaining
+ asset_list = asset_list[pd.isnull(asset_list["installation_status"])]
+ asset_list = asset_list.drop(columns=["installation_status"])
+
+ # We pull out the properties that need a CIGA check
+ need_ciga = asset_list[asset_list["ECO Eligibility"] == "eco4 (subject to ciga)"]
+ need_archetype = asset_list[asset_list["ECO Eligibility"] == "eco4 (subject to archetype)"]
+ need_ciga_and_archetype = asset_list[
+ asset_list["ECO Eligibility"] == "eco4 (subject to ciga) (subject to archetype)"
+ ]
+
+ dormant_properties.append(
+ {
+ "HA Name": customer['Redacted HA'],
+ "Need CIGA": need_ciga.shape[0],
+ "Need Archetype": need_archetype.shape[0],
+ "Need CIGA and Archetype": need_ciga_and_archetype.shape[0]
+ }
+ )
+
+ dormant_properties = pd.DataFrame(dormant_properties)
+ totals = dormant_properties.sum()
+ totals["HA Name"] = "Total"
+
+ dormant_properties = pd.concat([dormant_properties, totals.to_frame().T])
+ dormant_properties.to_csv("dormant_properties.csv")
+
+ loader.december_figures["ECO4 remaining"].sum()
+ december_figures = loader.december_figures.copy()
+ december_figures["ECO4 remaining"] = np.where(
+ december_figures["ECO4 remaining"] < 0,
+ 0,
+ december_figures["ECO4 remaining"]
+ )
+ december_figures["ECO4 remaining"].sum()
diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py
index 3228668e..23f5a371 100644
--- a/etl/epc/Dataset.py
+++ b/etl/epc/Dataset.py
@@ -203,11 +203,11 @@ class TrainingDataset(BaseDataset):
common_cols = [[col + "_starting", col + "_ending"] for col in common_cols]
self.df = self.df.loc[
- :,
- no_suffix_cols
- + only_ending_cols
- + [col for cols in common_cols for col in cols],
- ]
+ :,
+ no_suffix_cols
+ + only_ending_cols
+ + [col for cols in common_cols for col in cols],
+ ]
def _remove_abnormal_change_in_floor_area(self):
"""
@@ -509,7 +509,7 @@ class TrainingDataset(BaseDataset):
expanded_df["is_sandstone_or_limestone"]
== expanded_df["is_sandstone_or_limestone_ending"]
)
- ]
+ ]
elif component == "floor":
expanded_df = expanded_df[
(expanded_df["is_suspended"] == expanded_df["is_suspended_ending"])
@@ -526,7 +526,7 @@ class TrainingDataset(BaseDataset):
expanded_df["is_to_external_air"]
== expanded_df["is_to_external_air_ending"]
)
- ]
+ ]
elif component == "roof":
expanded_df = expanded_df[
(expanded_df["is_pitched"] == expanded_df["is_pitched_ending"])
@@ -539,7 +539,7 @@ class TrainingDataset(BaseDataset):
expanded_df["has_dwelling_above"]
== expanded_df["has_dwelling_above_ending"]
)
- ]
+ ]
return expanded_df
diff --git a/etl/epc/Pipeline.py b/etl/epc/Pipeline.py
index f8be16b4..ff5dd352 100644
--- a/etl/epc/Pipeline.py
+++ b/etl/epc/Pipeline.py
@@ -1,9 +1,11 @@
import msgpack
import pandas as pd
+from datetime import datetime
from typing import List
from pathlib import Path
from tqdm import tqdm
+import multiprocessing as mp
from etl.epc.DataProcessor import EPCDataProcessor
from etl.epc.Record import EPCRecord, EPCDifferenceRecord
@@ -87,9 +89,10 @@ class EPCPipeline:
run_mode="training",
epc_local_file="certificates.csv",
epc_bucket_name="retrofit-data-dev",
- epc_cleaning_dataset_key="sap_change_model/cleaning_dataset_record.parquet",
- epc_all_equal_rows_key="sap_change_model/all_equal_rows_record.parquet",
- epc_compiled_dataset_key="sap_change_model/dataset_record.parquet",
+ epc_cleaning_dataset_key="sap_change_model/{}/cleaning_dataset_rooms.parquet",
+ epc_all_equal_rows_key="sap_change_model/{}/all_equal_rows_rooms.parquet",
+ epc_compiled_dataset_key="sap_change_model/{}/dataset_rooms.parquet",
+ use_parallel=False,
):
"""
:param directories: List of directories to process
@@ -111,9 +114,13 @@ class EPCPipeline:
self.run_mode = run_mode
self.epc_local_file = epc_local_file
self.epc_bucket_name = epc_bucket_name
- self.epc_cleaning_dataset_key = epc_cleaning_dataset_key
- self.epc_all_equal_rows_key = epc_all_equal_rows_key
- self.epc_compiled_dataset_key = epc_compiled_dataset_key
+
+ self.use_parallel = use_parallel
+ self.timeprefix = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
+
+ self.epc_cleaning_dataset_key = epc_cleaning_dataset_key.format(self.timeprefix)
+ self.epc_all_equal_rows_key = epc_all_equal_rows_key.format(self.timeprefix)
+ self.epc_compiled_dataset_key = epc_compiled_dataset_key.format(self.timeprefix)
def run(self):
"""
@@ -209,8 +216,11 @@ class EPCPipeline:
"Directories not specified - Unable to run Training pipeline"
)
- for directory in tqdm(self.directories):
- self.process_directory(directory)
+ if self.use_parallel:
+ self.run_training_dataset_parallel_pipeline()
+ else:
+ for directory in tqdm(self.directories):
+ self.process_directory(directory)
save_dataframe_to_s3_parquet(
df=self.compiled_dataset,
@@ -230,6 +240,41 @@ class EPCPipeline:
file_key=self.epc_cleaning_dataset_key,
)
+ def run_training_dataset_parallel_pipeline(self):
+ """
+ Run the training pipeline in parallel
+ """
+
+ with mp.Pool() as pool:
+ results = list(
+ tqdm(
+ pool.imap(self.process_directory_task, self.directories),
+ total=len(self.directories),
+ ),
+ )
+
+ for result in tqdm(results):
+ self.compiled_dataset = pd.concat(
+ [self.compiled_dataset, result["dataset"]]
+ )
+ self.compiled_cleaning_averages.append(result["cleaning_averages"])
+ self.compiled_all_equal_rows.extend(result["all_equal_rows"])
+
+ def process_directory_task(self, directory: str) -> pd.DataFrame:
+ """
+ Task to enable parallel processing
+ """
+
+ self.process_directory(directory=directory)
+
+ output = {
+ "dataset": self.compiled_dataset,
+ "cleaning_averages": self.epc_data_processor.cleaning_averages,
+ "all_equal_rows": self.compiled_all_equal_rows,
+ }
+
+ return output
+
def process_directory(self, directory: Path):
"""
Process a single directory
@@ -241,12 +286,13 @@ class EPCPipeline:
self.epc_data_processor.prepare_data(filepath=filepath)
constituency_data = self.epc_data_processor.data
+
self.compiled_cleaning_averages.append(
self.epc_data_processor.cleaning_averages
)
constituency_difference_records = []
- # self.check_records = []
+
for uprn, property_data in constituency_data.groupby("uprn", observed=True):
difference_records = self.process_uprn(
uprn=str(uprn), property_data=property_data, directory=directory
@@ -254,12 +300,6 @@ class EPCPipeline:
if difference_records is not None:
constituency_difference_records.extend(difference_records)
- # check_list = []
- # for check_record in self.check_records:
- # check_list.append(check_record["difference_record"])
-
- # td = TrainingDataset(datasets=check_list, cleaned_lookup=clean_lookup)
-
constituency_dataset = TrainingDataset(
datasets=constituency_difference_records, cleaned_lookup=clean_lookup
)
diff --git a/etl/epc/Record.py b/etl/epc/Record.py
index c793716f..9a965c6a 100644
--- a/etl/epc/Record.py
+++ b/etl/epc/Record.py
@@ -191,7 +191,7 @@ class EPCRecord:
This method will clean the records using the data processor
"""
epc_data_processor = EPCDataProcessor(
- data=self.epc_record_as_dataframe("prepared_epc"),
+ data=self.epc_record_as_dataframe("prepared_epc").copy(),
run_mode="newdata",
cleaning_averages=self.cleaning_data,
)
@@ -725,26 +725,26 @@ class EPCRecord:
if self.prepared_epc["construction-age-band"] in DATA_ANOMALY_MATCHES:
if self.old_data:
# Take the most recent
- max_datetime = max(
- [
- old_record["lodgement-datetime"]
- for old_record in self.old_data
- if old_record["construction-age-band"]
- not in DATA_ANOMALY_MATCHES
- ]
- )
-
- most_recent = [
- old_record
+ old_age_bands = [
+ old_record["lodgement-datetime"]
for old_record in self.old_data
- if old_record["lodgement-datetime"] == max_datetime
+ if old_record["construction-age-band"] not in DATA_ANOMALY_MATCHES
]
- self.prepared_epc["construction-age-band"] = (
- EPCDataProcessor.clean_construction_age_band(
- most_recent[0]["construction-age-band"]
+ if old_age_bands:
+ max_datetime = max(old_age_bands)
+
+ most_recent = [
+ old_record
+ for old_record in self.old_data
+ if old_record["lodgement-datetime"] == max_datetime
+ ]
+
+ self.prepared_epc["construction-age-band"] = (
+ EPCDataProcessor.clean_construction_age_band(
+ most_recent[0]["construction-age-band"]
+ )
)
- )
self.construction_age_band = self.prepared_epc["construction-age-band"]
self.age_band = england_wales_age_band_lookup.get(self.construction_age_band)
diff --git a/etl/epc/generate_scenarios_data.py b/etl/epc/generate_scenarios_data.py
index 172e8a27..f9f66034 100644
--- a/etl/epc/generate_scenarios_data.py
+++ b/etl/epc/generate_scenarios_data.py
@@ -20,6 +20,10 @@ from recommendations.Recommendations import Recommendations
from utils.logger import setup_logger
from utils.s3 import read_dataframe_from_s3_parquet, save_dataframe_to_s3_parquet
+from datetime import datetime
+
+now = datetime.now().strftime("%d-%m-%Y-%H-%M-%S")
+
logger = setup_logger()
logger.info("Connecting to db")
@@ -50,9 +54,19 @@ scenario_properties = [
"postcode": "NN1 5JY",
"lmk-key": "1459796789102016070507274146560098",
"measures": [
- [["internal_wall_insulation"], "11", None, [0]],
- [["external_wall_insulation"], "10", None, [0]],
- [["solar", "windows"], "12-15", {"photo_supply_ending": 50}, [0, 1]],
+ [
+ ["internal_wall_insulation"],
+ "11",
+ {"walls_insulation_thickness_ending": "average"},
+ [0],
+ ],
+ [
+ ["external_wall_insulation"],
+ "10",
+ {"walls_insulation_thickness_ending": "average"},
+ [0],
+ ],
+ [["solar", "windows"], "15", {"photo_supply_ending": 50}, [0, 1]],
],
},
{
@@ -60,7 +74,12 @@ scenario_properties = [
"postcode": "HP1 2HA",
"lmk-key": "c14029235739827d5f627dc8aa9bb567d026b267e851e0db0001db24638667b1",
"measures": [
- [["cavity_wall_insulation", "loft_insulation"], "15", None, [0, 1]],
+ [
+ ["cavity_wall_insulation", "loft_insulation"],
+ "15",
+ {"walls_insulation_thickness_ending": "average"},
+ [0, 1],
+ ],
],
},
{
@@ -68,7 +87,12 @@ scenario_properties = [
"postcode": "HP1 2HE",
"lmk-key": "99296a6dda21314fef3a61cda59e441e9a2aacf115eb96f4a0fa85696bf7b117",
"measures": [
- [["cavity_wall_insulation", "loft_insulation"], "15", None, [0, 1]],
+ [
+ ["cavity_wall_insulation", "loft_insulation"],
+ "15",
+ {"walls_insulation_thickness_ending": "average"},
+ [0, 1],
+ ],
],
},
{
@@ -76,7 +100,12 @@ scenario_properties = [
"postcode": "HP1 2AN",
"lmk-key": "d1e0534be3a44c33003323b21d0e322e3daddc65b5ee71936f89c59ddab96b50",
"measures": [
- [["cavity_wall_insulation", "loft_insulation"], "15", None, [0, 1]],
+ [
+ ["cavity_wall_insulation", "loft_insulation"],
+ "15",
+ {"walls_insulation_thickness_ending": "average"},
+ [0, 1],
+ ],
],
},
{
@@ -84,11 +113,17 @@ scenario_properties = [
"postcode": "HP1 2HX",
"lmk-key": "1eae354db522a95188018d9cd0502ed8c609910b6c88f8797d3a25f59b11770a",
"measures": [
- [["cavity_wall_insulation", "loft_insulation"], "15", None, [0, 1]],
+ [
+ ["cavity_wall_insulation", "loft_insulation"],
+ "15",
+ {"walls_insulation_thickness_ending": "average"},
+ [0, 1],
+ ],
],
},
]
+
recommendations_scoring_data = []
for scenario_property in scenario_properties:
@@ -132,7 +167,7 @@ for scenario_property in scenario_properties:
p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
recommender = Recommendations(property_instance=p, materials=materials)
- property_recommendations = recommender.recommend()
+ property_recommendations = recommender.recommend("0")
wall_recommendations = recommender.wall_recomender.recommendations
loft_recommendations = recommender.roof_recommender.recommendations
@@ -213,6 +248,9 @@ for scenario_property in scenario_properties:
recommendations_scoring_data.extend(scoring_list)
recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
+recommendations_scoring_data["impact"] = recommendations_scoring_data["impact"].astype(
+ int
+)
recommendations_scoring_data = recommendations_scoring_data.drop(
columns=[
"rdsap_change",
@@ -247,5 +285,5 @@ all_predictions = model_api.predict_all(
save_dataframe_to_s3_parquet(
recommendations_scoring_data,
"retrofit-data-dev",
- "scenario_data/recommendations_scoring_data.parquet",
+ f"scenario_data/{now}/recommendations_scoring_data.parquet",
)
diff --git a/etl/epc/property_change_app.py b/etl/epc/property_change_app.py
index 8c97bff4..ad058598 100644
--- a/etl/epc/property_change_app.py
+++ b/etl/epc/property_change_app.py
@@ -16,7 +16,7 @@ def main():
epc_pipeline = EPCPipeline(
directories=directories,
- run_mode="record",
+ use_parallel=True,
epc_data_processor=EPCDataProcessor(run_mode="training"),
)
diff --git a/etl/epc/requirements.txt b/etl/epc/requirements.txt
index 9f972bde..87148180 100644
--- a/etl/epc/requirements.txt
+++ b/etl/epc/requirements.txt
@@ -1,4 +1,5 @@
pandas==2.1.3
tqdm==4.66.1
msgpack==1.0.7
-boto3==1.29.6
\ No newline at end of file
+boto3==1.29.6
+pyarrow==15.0.2
\ No newline at end of file
diff --git a/etl/epc_clean/app.py b/etl/epc_clean/app.py
index 53c1a329..3f1a1a80 100644
--- a/etl/epc_clean/app.py
+++ b/etl/epc_clean/app.py
@@ -36,8 +36,11 @@ def app():
cleaned_data = {}
epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
+ WALLS = []
for directory in tqdm(epc_directories):
data = pd.read_csv(directory / "certificates.csv", low_memory=False)
+ z = data["WALLS_DESCRIPTION"].unique().tolist()
+ WALLS.extend(z)
# Rename the columns to the same format as the api returns
data.columns = [c.replace("_", "-").lower() for c in data.columns]
# Take just date before the date threshold
diff --git a/etl/epc_clean/epc_attributes/RoofAttributes.py b/etl/epc_clean/epc_attributes/RoofAttributes.py
index 9d3b46b4..76f99f09 100644
--- a/etl/epc_clean/epc_attributes/RoofAttributes.py
+++ b/etl/epc_clean/epc_attributes/RoofAttributes.py
@@ -122,6 +122,13 @@ class RoofAttributes(Definitions):
result["is_valid"] = "invalid" not in description
description = description.replace("invalid", "")
+ # We handle an edge case where the description is "pitched, 150 loft insulation" and is missing the mm
+ if result["is_pitched"] or result["is_loft"]:
+ # Search for a regular expression that matches 150 insulation
+ match = re.search(r"(\d+\+?)\s*insulation", description)
+ if match:
+ result['insulation_thickness'] = match.group(1)
+
# insulation thickness
thickness_map = {
"ceiling insulated": "average",
@@ -137,11 +144,11 @@ class RoofAttributes(Definitions):
# Remove the match from the description
# description = description.replace(key, "")
break
- else:
- # Extract insulation thickness in mm, if present
- match = re.search(r'(\d+\+?)\s*mm', description)
- if match:
- result['insulation_thickness'] = match.group(1)
+
+ # Extract insulation thickness in mm, if present
+ match = re.search(r'(\d+\+?)\s*mm', description)
+ if match:
+ result['insulation_thickness'] = match.group(1)
if "insulation_thickness" not in result:
result['insulation_thickness'] = None
diff --git a/etl/non_intrusive_surveys/photos/README.md b/etl/non_intrusive_surveys/photos/README.md
new file mode 100644
index 00000000..9dbe951f
--- /dev/null
+++ b/etl/non_intrusive_surveys/photos/README.md
@@ -0,0 +1,19 @@
+# Non Intrusive Surveys - photo upload
+
+This folder contains photos taken during non-intrusive surveys. Photos are stored in folders named after the survey ID.
+
+## Getting started
+
+Install the required packages by running the following command:
+
+```bash
+pip install -r requirements.txt
+```
+
+## Usage
+
+The main application is found in the app.py file. To run the application, use the following command:
+
+```bash
+python app.py
+```
\ No newline at end of file
diff --git a/etl/non_intrusive_surveys/photos/app.py b/etl/non_intrusive_surveys/photos/app.py
new file mode 100644
index 00000000..c531355b
--- /dev/null
+++ b/etl/non_intrusive_surveys/photos/app.py
@@ -0,0 +1,149 @@
+import boto3
+import os
+from PIL import Image
+from pathlib import Path
+from dotenv import load_dotenv
+
+# Inputs
+ENV_FILEPATH = "etl/non_intrusive_surveys/photos/.env"
+PHOTO_DIRECTORY = "/Users/khalimconn-kowlessar/Downloads/IMMO - Dudley Pilot - non-invasive raw data"
+FOLDER_UPRN_LOOKUP = {
+ "91 Osprey Drive DY1 2JS": 90048026,
+ "195 Ashenhurst Rd DY1 2JB": 90051858,
+ "6 Beech Rd DY1 4BP": 90055152,
+ "53 Bromley DY5 4PJ": 90060989,
+ "5 Oaklands B62 0JA": 90028499,
+ "47 Fairfield Rd DY8 5UJ": 90077535,
+ "150 Huntingtree Rd B63 4HP": 90093693,
+ "27 Milton Rd DY1 2JB": 90106884,
+ "21 Wells Rd DY5 3TB": 90022227,
+ "8 Corporation Rd DY2 7PX": 90070461
+}
+
+load_dotenv(ENV_FILEPATH)
+CLOUDFRONT_DISTRIBUTION_DOMAIN_NAME = os.getenv("CLOUDFRONT_DISTRIBUTION_DOMAIN_NAME", None)
+CDN_BUCKET_NAME = os.getenv("CDN_BUCKET_NAME", None)
+
+
+def list_subdirectories(directory_path):
+ """
+ List all subdirectories within a given directory.
+
+ :param directory_path: Path to the directory.
+ :return: A list of paths to the subdirectories.
+ """
+ directory = Path(directory_path)
+ subdirectories = [subdir for subdir in directory.iterdir() if subdir.is_dir()]
+ return subdirectories
+
+
+def list_files_in_directory(directory_path, file_extension=".jpg"):
+ """
+ List all files with a specific extension within a given directory and its subdirectories.
+
+ :param directory_path: Path to the directory to scan.
+ :param file_extension: File extension to filter by.
+ :return: A list of paths to the files.
+ """
+ # Convert the directory path to a Path object if it's not already one
+ directory = Path(directory_path) if not isinstance(directory_path, Path) else directory_path
+
+ # List all files of the specified type in the directory and subdirectories
+ file_list = [file for file in directory.rglob(f'*{file_extension}')]
+
+ return file_list
+
+
+def create_images(input_path, uprn):
+ # Define the base directory path
+ base_directory = f"non_intrusive_photos/{uprn}"
+ print(f"Creating directory: {base_directory}") # Debug: print the directory to be created
+
+ # Need to create local directory if it doesn't exist
+ os.makedirs(base_directory, exist_ok=True)
+
+ # Define output paths
+ thumbnail_path = os.path.join(base_directory, "thumbnail.jpg")
+ full_hd_path = os.path.join(base_directory, "1080p.jpg")
+ webp_path = os.path.join(base_directory, "webp.webp") # Save as WebP format
+
+ # Load the image
+ with Image.open(input_path) as img:
+ # Create a thumbnail
+ thumbnail = img.copy()
+ thumbnail.thumbnail((128, 128), Image.Resampling.LANCZOS)
+ thumbnail.save(thumbnail_path, 'JPEG', quality=85)
+
+ # Create a 1080p version
+ full_hd = img.copy()
+ full_hd.thumbnail((1920, 1080), Image.Resampling.LANCZOS)
+ full_hd.save(full_hd_path, 'JPEG', quality=90)
+
+ # Convert to WebP for better compression
+ webp = img.copy()
+ webp.save(webp_path, 'WEBP', quality=90)
+
+ # Return paths to the processed images
+ return thumbnail_path, full_hd_path, webp_path
+
+
+def upload_to_s3(bucket_name, file_path, object_name):
+ s3_client = boto3.client('s3')
+ s3_client.upload_file(file_path, bucket_name, object_name)
+ print(f"Uploaded {object_name} to S3 bucket {bucket_name}")
+
+
+def upload_photos_to_s3(bucket_name, photo_paths):
+ # Upload each photo
+ for path in photo_paths:
+ object_name = path.split('/')[-1] # Assuming the path format is folder/filename
+ upload_to_s3(bucket_name, path, object_name)
+
+
+def generate_cdn_url(distribution_domain, object_name):
+ return f"https://{distribution_domain}/{object_name}"
+
+
+def process_and_upload_images(uprn, input_image_path, bucket_name, distribution_domain):
+ # Create images
+ thumbnail, full_hd, original = create_images(input_image_path, uprn=str(uprn))
+
+ # Upload images
+ upload_photos_to_s3(bucket_name, photo_paths=[thumbnail, full_hd, original])
+
+ # Generate CDN links
+ cdn_links = [generate_cdn_url(distribution_domain, path.split('/')[-1]) for path in [thumbnail, full_hd, original]]
+
+ # Delete local files
+ for path in [thumbnail, full_hd, original]:
+ os.remove(path)
+
+ return cdn_links
+
+
+def app():
+ """
+ This application is tasked with uploading the photos, recorded during the non-invasive surveys, to s3 and the
+ database.
+ To begin with, this app will simply read the files from the local machine, however we will come up with a more
+ efficient way to do this in the future.
+
+ :return:
+ """
+
+ # List all files in the directory using pathlib
+ property_directories = list_subdirectories(PHOTO_DIRECTORY)
+
+ # For each property, we want to list all of the photos in the directory
+ for property_dir in property_directories:
+ photo_files = list_files_in_directory(property_dir)
+ uprn = FOLDER_UPRN_LOOKUP[property_dir.name]
+
+ # We now want to convert each file, and upload it to s3
+ for photo_filepath in photo_files:
+ process_and_upload_images(
+ uprn=uprn,
+ input_image_path=photo_filepath,
+ bucket_name=CDN_BUCKET_NAME,
+ distribution_domain=CLOUDFRONT_DISTRIBUTION_DOMAIN_NAME
+ )
diff --git a/etl/non_intrusive_surveys/photos/requirements.txt b/etl/non_intrusive_surveys/photos/requirements.txt
new file mode 100644
index 00000000..2199a0b4
--- /dev/null
+++ b/etl/non_intrusive_surveys/photos/requirements.txt
@@ -0,0 +1,3 @@
+Pillow
+boto3
+python-dotenv
\ No newline at end of file
diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf
index d545cdf8..55266e10 100644
--- a/infrastructure/terraform/main.tf
+++ b/infrastructure/terraform/main.tf
@@ -66,7 +66,7 @@ resource "aws_security_group" "allow_db" {
resource "aws_db_instance" "default" {
allocated_storage = var.allocated_storage
engine = "postgres"
- engine_version = "14.7"
+ engine_version = "14.10"
instance_class = var.instance_class
db_name = var.database_name
username = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)["db_assessment_model_username"]
@@ -181,4 +181,16 @@ module "lambda_carbon_prediction_ecr" {
module "lambda_heat_prediction_ecr" {
ecr_name = "lambda-heat-prediction-${var.stage}"
source = "./modules/ecr"
+}
+
+##############################################
+# CDN - Cloudfront
+##############################################
+module "cloudfront_distribution" {
+ source = "./modules/cloudfront"
+ bucket_name = module.s3.bucket_name
+ bucket_id = module.s3.bucket_id
+ bucket_arn = module.s3.bucket_arn
+ bucket_domain_name = module.s3.bucket_domain_name
+ stage = var.stage
}
\ No newline at end of file
diff --git a/infrastructure/terraform/modules/cloudfront/main.tf b/infrastructure/terraform/modules/cloudfront/main.tf
new file mode 100644
index 00000000..281ff09f
--- /dev/null
+++ b/infrastructure/terraform/modules/cloudfront/main.tf
@@ -0,0 +1,65 @@
+resource "aws_cloudfront_distribution" "s3_distribution" {
+ origin {
+ domain_name = var.bucket_domain_name
+ origin_id = "S3-${var.bucket_name}"
+
+ s3_origin_config {
+ origin_access_identity = aws_cloudfront_origin_access_identity.oai.cloudfront_access_identity_path
+ }
+ }
+
+ enabled = true
+
+ default_cache_behavior {
+ allowed_methods = ["GET", "HEAD"]
+ cached_methods = ["GET", "HEAD"]
+ target_origin_id = "S3-${var.bucket_name}"
+ viewer_protocol_policy = "redirect-to-https"
+ compress = true
+
+ forwarded_values {
+ query_string = false
+ cookies {
+ forward = "none"
+ }
+ }
+
+ min_ttl = 0
+ default_ttl = 86400
+ max_ttl = 31536000
+ }
+
+ price_class = "PriceClass_All"
+
+ restrictions {
+ geo_restriction {
+ restriction_type = "none"
+ }
+ }
+
+ viewer_certificate {
+ cloudfront_default_certificate = true
+ }
+}
+
+resource "aws_cloudfront_origin_access_identity" "oai" {
+ comment = "OAI for ${var.bucket_name}"
+}
+
+resource "aws_s3_bucket_policy" "bucket_policy" {
+ bucket = var.bucket_id
+
+ policy = jsonencode({
+ Version = "2012-10-17"
+ Statement = [
+ {
+ Effect = "Allow"
+ Principal = {
+ AWS = "arn:aws:iam::cloudfront:user/CloudFront Origin Access Identity ${aws_cloudfront_origin_access_identity.oai.id}"
+ }
+ Action = "s3:GetObject"
+ Resource = "${var.bucket_arn}/*"
+ },
+ ]
+ })
+}
diff --git a/infrastructure/terraform/modules/cloudfront/variables.tf b/infrastructure/terraform/modules/cloudfront/variables.tf
new file mode 100644
index 00000000..88f770a8
--- /dev/null
+++ b/infrastructure/terraform/modules/cloudfront/variables.tf
@@ -0,0 +1,24 @@
+variable "bucket_name" {
+ description = "The name of the bucket"
+ type = string
+}
+
+variable "stage" {
+ description = "The deployment stage"
+ type = string
+}
+
+variable "bucket_id" {
+ description = "The ID of the S3 bucket"
+ type = string
+}
+
+variable "bucket_arn" {
+ description = "The ARN of the S3 bucket"
+ type = string
+}
+
+variable "bucket_domain_name" {
+ description = "The regional domain name of the S3 bucket"
+ type = string
+}
\ No newline at end of file
diff --git a/infrastructure/terraform/modules/s3/outputs.tf b/infrastructure/terraform/modules/s3/outputs.tf
index a5e7ddb4..7668dbc4 100644
--- a/infrastructure/terraform/modules/s3/outputs.tf
+++ b/infrastructure/terraform/modules/s3/outputs.tf
@@ -2,3 +2,15 @@ output "bucket_name" {
description = "The name of the S3 bucket"
value = aws_s3_bucket.bucket.bucket
}
+
+output "bucket_id" {
+ value = aws_s3_bucket.bucket.id
+}
+
+output "bucket_arn" {
+ value = aws_s3_bucket.bucket.arn
+}
+
+output "bucket_domain_name" {
+ value = aws_s3_bucket.bucket.bucket_regional_domain_name
+}
\ No newline at end of file
diff --git a/recommendations/Costs.py b/recommendations/Costs.py
index b2874f28..fd3c1692 100644
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@@ -37,12 +37,76 @@ MCS_SOLAR_PV_COST_DATA = {
"average_cost_per_kwh-Northern Ireland": 2126.09,
}
+# This data is based on the MCS database, We use the larger figure between the 2023 and 2024 average,
+# to be conservative
+MCS_AIR_SOURCE_HEAT_PUMP_COST_DATA = {
+ "Outer London": 13220,
+ "Inner London": 13220,
+ "South East England": 13547,
+ "South West England": 12776,
+ "East of England": 12585,
+ "East Midlands": 12239,
+ "West Midlands": 13182,
+ "North East England": 11829,
+ "North West England": 11714,
+ "Yorkshire and the Humber": 11919,
+ "Wales": 13701,
+ "Scotland": 12586,
+ "Northern Ireland": 12000, # There are hardly any air source heat pump installs going on in Northern Ireland
+}
+BOILER_UPGRADE_SCHEME_ASHP_VALUE = 7500
+
# This is based on quotes from installers
BATTERY_COST = 3500
# This is based on https://www.checkatrade.com/blog/cost-guides/cost-smart-thermostat/
SMART_APPLIANCE_THERMOSTAT_COST = 400
-PROGRAMMER_COST = 200
+PROGRAMMER_COST = 120
+ROOM_THERMOSTAT_COST = 150
+TRVS_COST = 35
+
+# Cost for TTZC
+# Smart thermostat based on checkatrade https://www.checkatrade.com/blog/cost-guides/cost-smart-thermostat/
+# Based on the Nest system
+TTZC_SMART_THERMOSTAT_COST = 205
+TTZC_SMART_THERMOSTAT_LABOUR_HOURS = 2
+TTZC_ELECTRICIAN_HOURLY_RATE = 45
+# Based on cost of a Nest temperature sensor
+TTZC_ROOM_TEMPERATURE_SENSOR_COST = 50
+TTZC_ROOM_TEMPERATURE_SENSOR_LABOUR_HOURS = 0.17 # (Assume ~ 10 mins install per sensor)
+# Basedon an average cost of smart radiator values
+TTZC_SMART_RADIATOR_VALUES = 50
+TTZC_SMART_RADIATOR_VALUES_LABOUR_HOURS = 0.37 # (Assume ~ 15-30 mins install per valve)
+
+# Low carbon combi boiler - median value based on £2200 - £3000 range
+LOW_CARBON_COMBI_BOILER = 2200
+
+# boiler prices based on
+# https://www.greenmatch.co.uk/boilers/30kw-boiler
+# https://www.greenmatch.co.uk/boilers/35kw-boiler
+# https://www.greenmatch.co.uk/boilers/40kw-boiler
+# These are exclusive of installation costs
+CONDENSING_BOILER_COSTS = {
+ "30kw": 1550,
+ "35kw": 1610,
+ "40kw": 1625
+}
+
+# Assumes 3 hours to remove each heater (including re-decorating)
+ROOM_HEATER_REMOVAL_COST = 120
+ROOM_HEATER_REMOVAL_LABOUR_HOURS = 3
+
+# This is a cost quoted by Jim for a system flush - existig system will run more efficiently
+SYSTEM_FLUSH_COST = 250
+
+SINGLE_RADIATOR_COST = 150
+DOUBLE_RADIATOR_COST = 300
+FLUE_COST = 600
+PIPEWORK_COST = 750 # Min cost is £500
+
+# This is the cost per meter squared for cavity extraction
+# https://www.checkatrade.com/blog/cost-guides/cavity-wall-insulation-removal-cost/
+CAVITY_EXTRACTION_COST = 21.5
class Costs:
@@ -126,7 +190,7 @@ class Costs:
if not self.labour_adjustment_factor:
raise ValueError("Labour adjustment factor not found")
- def cavity_wall_insulation(self, wall_area, material):
+ def cavity_wall_insulation(self, wall_area, material, is_extraction_and_refill=False):
"""
Calculates the total cost for cavity wall insulation based on material and labor costs,
including contingency, preliminaries, profit, and VAT.
@@ -161,6 +225,13 @@ class Costs:
# Assume a team of 2
labour_days = (labour_hours / 8) / 2
+ if is_extraction_and_refill:
+ # bump up the cost of the work
+ total_cost = total_cost + CAVITY_EXTRACTION_COST * wall_area
+ # Additional 2 days work
+ labour_hours = labour_hours + (2 * 8)
+ labour_days = labour_days + 2
+
return {
"total": total_cost,
"subtotal": subtotal_before_vat,
@@ -998,3 +1069,219 @@ class Costs:
"labour_hours": 0,
"labour_days": 0,
}
+
+ def roomstat_programmer_trvs(
+ self, number_heated_rooms, has_programmer, has_trvs, has_room_thermostat
+ ):
+ """
+
+ :return:
+ """
+
+ total_cost = 0
+ labour_hours = 0
+
+ if not has_programmer:
+ total_cost += PROGRAMMER_COST
+ labour_hours += 1
+
+ if not has_trvs:
+ total_cost += TRVS_COST * number_heated_rooms
+ labour_hours += 0.25 * number_heated_rooms
+
+ if not has_room_thermostat:
+ total_cost += ROOM_THERMOSTAT_COST
+ labour_hours += 0.5
+
+ subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
+ vat = total_cost - subtotal_before_vat
+
+ return {
+ "total": total_cost,
+ "subtotal": subtotal_before_vat,
+ "vat": vat,
+ "labour_hours": labour_hours,
+ "labour_days": 1,
+ }
+
+ def time_and_temperature_zone_control(self, number_heated_rooms):
+
+ # The product costs are inclusive of VAT
+ product_costs = (
+ TTZC_SMART_THERMOSTAT_COST +
+ TTZC_ROOM_TEMPERATURE_SENSOR_COST * number_heated_rooms +
+ TTZC_SMART_RADIATOR_VALUES * number_heated_rooms
+ )
+ labour_hours = (
+ TTZC_SMART_THERMOSTAT_LABOUR_HOURS +
+ TTZC_ROOM_TEMPERATURE_SENSOR_LABOUR_HOURS * number_heated_rooms +
+ TTZC_SMART_RADIATOR_VALUES_LABOUR_HOURS * number_heated_rooms
+ )
+ labour_costs = TTZC_ELECTRICIAN_HOURLY_RATE * labour_hours
+ # Add continency and preliminaries to the labour to account for the complexity of the job
+ labour_costs = labour_costs * (1 + self.CONTINGENCY + self.PRELIMINARIES)
+
+ vat = labour_costs * self.VAT_RATE
+
+ subtotal_before_vat = product_costs + labour_costs
+ total_cost = subtotal_before_vat + vat
+
+ labour_days = np.ceil(labour_hours / 8)
+
+ return {
+ "total": total_cost,
+ "subtotal": subtotal_before_vat,
+ "vat": vat,
+ "labour_hours": labour_hours,
+ "labour_days": labour_days,
+ }
+
+ def heater_removal(self, n_rooms):
+ """
+ Estimates the costs of removal of heaters, including the redecoration costs of the space behind the heater
+ :return:
+ """
+
+ removal_cost = ROOM_HEATER_REMOVAL_COST * n_rooms
+ removal_labour_hours = ROOM_HEATER_REMOVAL_LABOUR_HOURS * n_rooms
+
+ vat = removal_cost * self.VAT_RATE
+
+ subtotal_before_vat = removal_cost
+ total_cost = subtotal_before_vat + vat
+
+ return {
+ "total": total_cost,
+ "subtotal": subtotal_before_vat,
+ "vat": vat,
+ "labour_hours": removal_labour_hours,
+ "labour_days": np.ceil(removal_labour_hours / 8),
+ }
+
+ @staticmethod
+ def _estimate_n_radiators(number_habitable_rooms, total_floor_area, property_type, built_form):
+ # Base number of radiators: one per habitable room
+ base_radiators = number_habitable_rooms
+
+ # Additional radiators for non-habitable essential areas (e.g., kitchens, hallways)
+ additional_radiators = 3 # Initial assumption
+
+ # Adjust additional radiators based on property type
+ if property_type == 'Flat':
+ additional_radiators -= 1 # Flats may need fewer radiators due to less exposure
+ elif property_type in ['House', 'Bungalow', 'Maisonette']:
+ # Multiple floors in Maisonette may require additional heating points
+ additional_radiators += 2 # Houses and bungalows might need more due to greater exposure
+ else:
+ raise Exception("Invalid property type")
+
+ # Adjust total radiator needs based on built form
+ form_factor = {
+ 'Mid-Terrace': 0.95,
+ 'Semi-Detached': 1.05,
+ 'Detached': 1.25,
+ 'End-Terrace': 1.05
+ }
+
+ # Calculate total heating power needed and number of radiators based on standard output
+ total_heating_power_required = total_floor_area * 80 # Watts per square meter
+ radiator_output = 1000 # Average wattage per radiator
+ total_radiators_based_on_power = (total_heating_power_required / radiator_output) * form_factor[built_form]
+
+ # Final estimation taking the higher of calculated needs or base room count
+ estimated_radiators = max(total_radiators_based_on_power, base_radiators + additional_radiators)
+ return round(estimated_radiators)
+
+ def boiler(self, size, exising_room_heaters, system_change, n_heated_rooms, n_rooms):
+ """
+ Based on a basic estimate of median value £2600 to install a low carbon combi boiler
+ First time central heating vosts can als be found here:
+ https://www.checkatrade.com/blog/cost-guides/central-heating-installation-cost/
+ :return:
+ """
+
+ unit_cost = CONDENSING_BOILER_COSTS[size]
+ # The unit cost is the cost without VAT
+ # We now need to estimate the cost of the works
+ labour_days = 2
+ labour_hours = labour_days * 8
+ labour_rate = 300
+
+ # Average cost of installation is 1 (maybe 2days) at £300 per day
+ # https://www.checkatrade.com/blog/cost-guides/new-boiler-cost/
+ # To be pessimistic, assume 2 days work
+ labour_cost = labour_rate * self.labour_adjustment_factor * labour_days
+ # Add contingency and preliminaries
+ labour_cost = labour_cost * (1 + self.CONTINGENCY + self.PRELIMINARIES)
+
+ # labour_days = labour_days + (removal_labour_hours / 8)
+
+ vat = labour_cost * self.VAT_RATE
+
+ subtotal_before_vat = unit_cost + labour_cost
+ total_cost = subtotal_before_vat + vat
+
+ # if there are existing room heaters, we need to add the cost of removing them
+ if exising_room_heaters:
+ removal_costing = self.heater_removal(n_rooms=n_heated_rooms)
+ # Add the totals to the existing totals
+ total_cost += removal_costing["total"]
+ subtotal_before_vat += removal_costing["subtotal"]
+ labour_hours += removal_costing["labour_hours"]
+ labour_days += removal_costing["labour_days"]
+ vat += removal_costing["vat"]
+
+ if system_change:
+ # We need the cost of radiators
+ n_radiators = self._estimate_n_radiators(
+ number_habitable_rooms=n_rooms,
+ total_floor_area=self.property.floor_area,
+ property_type=self.property.data["property-type"],
+ built_form=self.property.data["built-form"]
+ )
+
+ additionals_labour_cost = labour_rate * self.labour_adjustment_factor
+ radiator_cost = DOUBLE_RADIATOR_COST * n_radiators
+ system_change_cost = radiator_cost + FLUE_COST + PIPEWORK_COST + additionals_labour_cost
+ system_change_cost_before_vat = system_change_cost / (1 + self.VAT_RATE)
+ system_change_vat = system_change_cost - system_change_cost_before_vat
+ # We add an extra labour day for the system change
+ labour_days += 1
+ labour_hours += 8
+ total_cost += system_change_cost
+ subtotal_before_vat += system_change_cost_before_vat
+ vat += system_change_vat
+
+ return {
+ "total": total_cost,
+ "subtotal": subtotal_before_vat,
+ "vat": vat,
+ "labour_hours": labour_hours,
+ "labour_days": labour_days,
+ }
+
+ def air_source_heat_pump(self):
+ """
+ Based on the region and type of property, this function will produce a cost estimation for an air source heat
+ pump. This cost will include the boiler upgrade scheme grant
+
+ """
+
+ # This is the average cost of a project, we'll add some additional contingency
+ regional_cost = MCS_AIR_SOURCE_HEAT_PUMP_COST_DATA[self.region]
+
+ total_cost = regional_cost * (1 + self.CONTINGENCY) - BOILER_UPGRADE_SCHEME_ASHP_VALUE
+ subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
+ vat = total_cost - subtotal_before_vat
+
+ # We assume 3 days installation
+ labour_days = 3
+ labour_hours = labour_days * 8
+
+ return {
+ "total": total_cost,
+ "subtotal": subtotal_before_vat,
+ "vat": vat,
+ "labour_hours": labour_hours,
+ "labour_days": labour_days,
+ }
diff --git a/recommendations/FireplaceRecommendations.py b/recommendations/FireplaceRecommendations.py
index 5d620d49..601a8eb0 100644
--- a/recommendations/FireplaceRecommendations.py
+++ b/recommendations/FireplaceRecommendations.py
@@ -32,7 +32,8 @@ class FireplaceRecommendations(Definitions):
if number_open_fireplaces == 0:
return
- estimated_cost = number_open_fireplaces * self.COST_OF_WORK
+ already_installed = "sealing_open_fireplace" in self.property.already_installed
+ estimated_cost = number_open_fireplaces * self.COST_OF_WORK if not already_installed else 0
# We recommend installing two mechanical ventilation systems
self.recommendation = [
@@ -44,6 +45,7 @@ class FireplaceRecommendations(Definitions):
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
+ "already_installed": already_installed,
"total": estimated_cost,
# Take a very basic estimate of 6 hours, multipled by the number of open fireplaces to seal
"labour_hours": 6 * number_open_fireplaces,
diff --git a/recommendations/FloorRecommendations.py b/recommendations/FloorRecommendations.py
index 713d5f92..3f764d83 100644
--- a/recommendations/FloorRecommendations.py
+++ b/recommendations/FloorRecommendations.py
@@ -8,7 +8,7 @@ from datatypes.enums import QuantityUnits
from backend.Property import Property
from recommendations.recommendation_utils import (
r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns, update_lowest_selected_u_value,
- get_recommended_part, get_floor_u_value
+ get_recommended_part, get_floor_u_value, override_costs
)
from recommendations.Costs import Costs
@@ -192,12 +192,21 @@ class FloorRecommendations(Definitions):
material=material.to_dict(),
non_insulation_materials=non_insulation_materials
)
+
+ already_installed = "suspended_floor_insulation" in self.property.already_installed
+ if already_installed:
+ cost_result = override_costs(cost_result)
+
elif material["type"] == "solid_floor_insulation":
cost_result = self.costs.solid_floor_insulation(
insulation_floor_area=self.property.insulation_floor_area,
material=material.to_dict(),
non_insulation_materials=non_insulation_materials
)
+
+ already_installed = "solid_floor_insulation" in self.property.already_installed
+ if already_installed:
+ cost_result = override_costs(cost_result)
else:
raise NotImplementedError("Implement me!")
@@ -217,6 +226,7 @@ class FloorRecommendations(Definitions):
"starting_u_value": u_value,
"new_u_value": new_u_value,
"sap_points": None,
+ "already_installed": already_installed,
**cost_result
}
)
diff --git a/recommendations/HeatingControlRecommender.py b/recommendations/HeatingControlRecommender.py
index 81597f61..76da6c37 100644
--- a/recommendations/HeatingControlRecommender.py
+++ b/recommendations/HeatingControlRecommender.py
@@ -1,5 +1,5 @@
from recommendations.Costs import Costs
-from recommendations.recommendation_utils import check_simulation_difference
+from recommendations.recommendation_utils import check_simulation_difference, override_costs
from backend.Property import Property
from etl.epc_clean.epc_attributes.MainheatControlAttributes import MainheatControlAttributes
@@ -27,6 +27,17 @@ class HeatingControlRecommender:
self.recommend_high_heat_retention_controls()
return
+ if heating_description in ["Boiler and radiators, mains gas"]:
+ # We can recommend roomstat programmer trvs
+ self.recommend_roomstat_programmer_trvs()
+ # We can also recommend time and temperature zone controls
+ self.recommend_time_temperature_zone_controls()
+
+ return
+
+ if heating_description in ["Air source heat pump, radiators, electric"]:
+ self.recommend_time_temperature_zone_controls()
+
def recommend_room_heaters_electric_controls(self):
"""
If the home has Room heaters, electric, we start by identifying potential heating controls that could
@@ -105,3 +116,136 @@ class HeatingControlRecommender:
# We don't implement any other recommendations right now
return
+
+ def recommend_roomstat_programmer_trvs(self):
+ """
+ If the home has a boiler and radiators, mains gas, we start by identifying potential heating controls that could
+ be upgraded, that would provide a practical impact.
+
+ The criteria for recommending an upgrade to heating controls are (one of these must be true)
+ 1) There are no controls
+ 2) No programmer
+ 3) No room thermostat
+ 4) No TRVs
+
+
+ :return:
+ """
+
+ # We check if we have the conditions to recommend this upgrade
+
+ needs_programmer = self.property.main_heating_controls["switch_system"] is None
+ needs_room_thermostat = self.property.main_heating_controls["thermostatic_control"] is None
+ needs_trvs = self.property.main_heating_controls["trvs"] is None
+
+ can_recommend = (
+ (self.property.main_heating_controls["no_control"] is not None) or
+ needs_programmer or
+ needs_room_thermostat or
+ needs_trvs
+ )
+
+ if not can_recommend:
+ return
+
+ ending_config = MainheatControlAttributes("Programmer, room thermostat and TRVS").process()
+ # We use this to determine how we should be updating the config
+ simulation_config = check_simulation_difference(
+ new_config=ending_config, old_config=self.property.main_heating_controls
+ )
+ # This upgrade will only take the heating system to average energy efficiency
+ # If the current system is below good, we make it good
+ if self.property.data["mainheatc-energy-eff"] in ["Poor", "Very Poor", "Average"]:
+ simulation_config["mainheatc_energy_eff_ending"] = "Good"
+
+ has_programmer = not needs_programmer
+ has_room_thermostat = not needs_room_thermostat
+ has_trvs = not needs_trvs
+
+ cost_result = self.costs.roomstat_programmer_trvs(
+ number_heated_rooms=int(self.property.data["number-heated-rooms"]),
+ has_programmer=has_programmer,
+ has_room_thermostat=has_room_thermostat,
+ has_trvs=has_trvs
+ )
+
+ description = "upgrade heating controls to Room thermostat, programmer and TRVs"
+
+ already_installed = "heating_control" in self.property.already_installed
+ if already_installed:
+ cost_result = override_costs(cost_result)
+ description = "Heating controls have already been upgraded, no further action needed."
+
+ self.recommendation.append(
+ {
+ "type": "heating_control",
+ "parts": [],
+ "description": description,
+ **cost_result,
+ "starting_u_value": None,
+ "new_u_value": None,
+ "sap_points": None,
+ "already_installed": already_installed,
+ "simulation_config": simulation_config
+ }
+ )
+
+ return
+
+ def recommend_time_temperature_zone_controls(self):
+ """
+ If the home has a boiler, we can recommend time and temperature zone controls. This is a more advanced
+ and more efficient control system than the standard controls that come with a boiler. However, it may come
+ with a higher cost and more involved usage
+ :return:
+ """
+
+ # We check if the efficiency of the current heating controls is good or below, and
+
+ # Conditions for installation are as follows:
+ # 1) The current heating controls are not time and temperature zone controls
+ # 2) The current heating controls are not already at 'Very Good' or above
+
+ if (
+ (self.property.main_heating_controls["thermostatic_control"] == "time and temperature zone control") or
+ (self.property.data["mainheatc-energy-eff"] in ["Very Good"])
+ ):
+ # No recommendation needed
+ return
+
+ ending_config = MainheatControlAttributes("Time and temperature zone control").process()
+
+ # We use this to determine how we should be updating the config
+ simulation_config = check_simulation_difference(
+ new_config=ending_config, old_config=self.property.main_heating_controls
+ )
+
+ # If the current system is below very good, we make it very good
+ if self.property.data["mainheatc-energy-eff"] in ["Poor", "Very Poor", "Average", "Good"]:
+ simulation_config["mainheatc_energy_eff_ending"] = "Very Good"
+
+ cost_result = self.costs.time_and_temperature_zone_control(
+ number_heated_rooms=int(self.property.data["number-heated-rooms"])
+ )
+
+ description = ("Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & "
+ "temperature zone control)")
+
+ already_installed = "heating_control" in self.property.already_installed
+ if already_installed:
+ cost_result = override_costs(cost_result)
+ description = "Heating controls have already been upgraded, no further action needed."
+
+ self.recommendation.append(
+ {
+ "type": "heating_control",
+ "parts": [],
+ "description": description,
+ **cost_result,
+ "starting_u_value": None,
+ "new_u_value": None,
+ "sap_points": None,
+ "already_installed": already_installed,
+ "simulation_config": simulation_config
+ }
+ )
diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py
index 11ae3da6..a51803f2 100644
--- a/recommendations/HeatingRecommender.py
+++ b/recommendations/HeatingRecommender.py
@@ -1,9 +1,9 @@
-import pandas as pd
-
-from recommendations.Costs import Costs
-from recommendations.recommendation_utils import check_simulation_difference
+from recommendations.Costs import Costs, BOILER_UPGRADE_SCHEME_ASHP_VALUE
+from recommendations.recommendation_utils import check_simulation_difference, override_costs
from backend.Property import Property
from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes
+from etl.epc_clean.epc_attributes.HotWaterAttributes import HotWaterAttributes
+from etl.epc_clean.epc_attributes.MainFuelAttributes import MainFuelAttributes
from recommendations.HeatingControlRecommender import HeatingControlRecommender
@@ -13,18 +13,194 @@ class HeatingRecommender:
self.property = property_instance
self.costs = Costs(self.property)
- self.recommendations = []
+ self.heating_recommendations = []
+ self.heating_control_recommendations = []
- def recommend(self, phase=0):
- self.recommendations = []
+ def recommend(self, has_cavity_or_loft_recommendations, phase=0):
+ """
+ Produces heating recommendations
+ :param has_cavity_or_loft_recommendations: boolean indicating if we have produced a cavity or loft insulation
+ recommendation. If there are cavity or loft recommendations, the property would need to complete those measures
+ before being able to get the boiler upgrade scheme benefits. The messaging in the front end would be to
+ :param phase: indicates the phase of the retrofit programme
+ """
+
+ # TODO: We could have a system flush recommendation for an existing boiler, where there is no need to replace
+ # the boiler, but instead flushing the system will make it run more efficiently. There is a cost for this
+ # in the Costs class, stored as SYSTEM_FLUSH_COST
+
+ self.heating_recommendations = []
+ self.heating_control_recommendations = []
# This first iteration of the recommender will provide very basic recommendation
# We recommend heating controls based on the main heating system
- if self.property.main_heating["clean_description"] in [
+
+ has_electric_heating_description = self.property.main_heating["clean_description"] in [
"Room heaters, electric", "Electric storage heaters", "Electric storage heaters, radiators"
- ]:
+ ]
+
+ no_heating_no_mains = (
+ self.property.main_heating["clean_description"] in ["No system present, electric heaters assumed"] and
+ not self.property.data["mains-gas-flag"]
+ )
+
+ if has_electric_heating_description or no_heating_no_mains:
# Recommend high heat retention storage heaters
- self.recommend_electric_storage_heaters(phase=phase, system_change=True, heating_controls_only=False)
- return
+ self.recommend_hhr_storage_heaters(phase=phase, system_change=True, heating_controls_only=False)
+
+ # if the property has mains heating with boiler and radiators, we recommend optimal heating controls
+ has_boiler = self.property.main_heating["clean_description"] in ["Boiler and radiators, mains gas"]
+
+ # We also check that the property doesn't have a heating system, but it has access to the mains gas
+ no_heating_has_mains = self.property.main_heating["clean_description"] in [
+ 'No system present, electric heaters assumed'
+ ] and self.property.data["mains-gas-flag"]
+
+ has_gas_heaters = (
+ self.property.main_heating["clean_description"] in ["Room heaters, mains gas"] and
+ self.property.data["mains-gas-flag"]
+ )
+
+ # We also check if the property has electric heating, but it has access to the mains gas
+ electic_heating_has_mains = has_electric_heating_description and self.property.data["mains-gas-flag"]
+
+ portable_heaters_has_mains = (
+ self.property.main_heating["clean_description"] in ["Portable electric heaters assumed for most rooms"] and
+ self.property.data["mains-gas-flag"]
+ )
+
+ if (
+ has_boiler or
+ no_heating_has_mains or
+ electic_heating_has_mains or
+ has_gas_heaters or
+ portable_heaters_has_mains
+ ):
+ # This indicates that the home previously did not have a boiler in place and so would require
+ # an overhaul to the system - right now, this is all reasons, apart from if there is an existing boiler
+ system_change = not has_boiler
+ exising_room_heaters = self.property.main_heating["clean_description"] in [
+ "Room heaters, electric", "Room heaters, mains gas"
+ ]
+
+ self.recommend_boiler_upgrades(
+ phase=phase, system_change=system_change, exising_room_heaters=exising_room_heaters
+ )
+
+ # We recommend air source heat pumps
+ # Heat pumps are suitable for all property types:
+ # https://energysavingtrust.org.uk/from-flats-to-terraced-houses-heat-pumps-are-suitable-for-all-property-types/
+ # Just seems least probable for flats, so we'll allow houses and bungalows
+ # In the future, we'll allow overrides, so that non-intrusive surveys can contradict these conditions
+ # and either allow or prevent the recommendation of an air source heat pump
+
+ suitable_property_type = self.property.data["property-type"] in ["House", "Bungalow"]
+ has_air_source_heat_pump = self.property.main_heating["has_air_source_heat_pump"]
+
+ if suitable_property_type and not has_air_source_heat_pump:
+ self.recommend_air_source_heat_pump(
+ phase=phase, has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations
+ )
+
+ return
+
+ def recommend_air_source_heat_pump(self, phase, has_cavity_or_loft_recommendations):
+ """
+ This method will implement the recommendation for an air source heat pump
+ This is ultimately an overhaul to the heating system and so is recommended as an alternative to other
+ heating system recommendations
+ :return:
+ """
+
+ controls_recommender = HeatingControlRecommender(self.property)
+ controls_recommender.recommend(heating_description="Air source heat pump, radiators, electric")
+
+ ashp_costs = self.costs.air_source_heat_pump()
+ # We add the costs of the heating controls, onto each key in the costs dictionary
+ if controls_recommender.recommendation:
+ for key in ashp_costs:
+ ashp_costs[key] += controls_recommender.recommendation[0][key]
+
+ already_installed = "air_source_heat_pump" in self.property.already_installed
+ if already_installed:
+ ashp_costs = override_costs(ashp_costs)
+ description = "The property already has an air source heat pump, no further action needed."
+ else:
+ if controls_recommender.recommendation:
+ description = ("Install an air source heat pump, and upgrade heating controls to Smart Thermostats, "
+ "room sensors and smart radiator valves (time & temperature zone control).")
+ else:
+ description = "Install an air source heat pump."
+
+ # If the property does not have existing cavity and loft insulation, we include a note that the cost
+ # includes the boiler upgrade scheme and that the cavity and loft need to be treated, to ensure access
+ # to the funding
+ if has_cavity_or_loft_recommendations:
+ description = description + (f" The cost includes the £"
+ f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant. "
+ f"You must ensure that the property has an insulated cavity and "
+ f"270mm+ loft insulation to qualify for the grant")
+ else:
+ description = description + (f" The cost includes the £"
+ f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant")
+
+ simulation_config = {
+ "mainheat_energy_eff_ending": "Good",
+ "hot_water_energy_eff_ending": "Good"
+ }
+ # Installation of a boiler improves the hot water system so we need to reflect this in
+ # the outcome of the recommendation
+ heating_ending_config = MainHeatAttributes("Air source heat pump, radiators, electric").process()
+ hotwater_ending_config = HotWaterAttributes("From main system").process()
+
+ # If the property does not currently have electric main fuel, we'll simulate the change
+ fuel_ending_config = {}
+ if self.property.main_fuel["fuel_type"] != "electricity":
+ fuel_ending_config = MainFuelAttributes("electricity (not community)").process()
+
+ # Check the simulation differences
+ heating_simulation_config = check_simulation_difference(
+ new_config=heating_ending_config, old_config=self.property.main_heating
+ )
+ hotwater_simulation_config = check_simulation_difference(
+ new_config=hotwater_ending_config, old_config=self.property.hotwater
+ )
+ fuel_simulation_config = check_simulation_difference(
+ new_config=fuel_ending_config, old_config=self.property.main_fuel
+ )
+
+ simulation_config = {
+ **simulation_config,
+ **heating_simulation_config,
+ **hotwater_simulation_config,
+ **fuel_simulation_config,
+ }
+
+ if controls_recommender.recommendation:
+ # We should have just the single recommendation for heat controls, which is time
+ # and temperature zone controls
+ if len(controls_recommender.recommendation) != 1:
+ raise NotImplementedError("More than one heat controls recommendation for air source heat pump")
+ simulation_config = {
+ **simulation_config,
+ **controls_recommender.recommendation[0]["simulation_config"]
+ }
+
+ ashp_recommendation = {
+ "phase": phase,
+ "parts": [
+ # TODO
+ ],
+ "type": "heating",
+ "description": description,
+ "starting_u_value": None,
+ "new_u_value": None,
+ "sap_points": None,
+ "already_installed": already_installed,
+ "simulation_config": simulation_config,
+ **ashp_costs
+ }
+
+ self.heating_recommendations.append(ashp_recommendation)
@staticmethod
def check_simulation_difference(old_config, new_config):
@@ -39,9 +215,8 @@ class HeatingRecommender:
return differences
- @staticmethod
def combine_heating_and_controls(
- controls_recommendations, heating_simulation_config, costs, description, phase, heating_controls_only,
+ self, controls_recommendations, heating_simulation_config, costs, description, phase, heating_controls_only,
system_change
):
"""
@@ -82,8 +257,18 @@ class HeatingRecommender:
**recommendation_simulation_config,
**controls_recommendations[0]["simulation_config"]
}
+ controls_description = controls_recommendations[0]['description']
+ # Make the first letter of the description lowercase
+ controls_description = (
+ controls_description[0].lower() + controls_description[1:]
+ )
- recommendation_description = f"{description} and {controls_recommendations[0]['description']}"
+ recommendation_description = f"{description} and {controls_description}"
+
+ already_installed = "heating_controls" in self.property.already_installed
+ if already_installed:
+ total_costs = override_costs(total_costs)
+ recommendation_description = "Heating system has already been upgraded, no further action needed."
recommendation = {
"phase": phase,
@@ -95,6 +280,7 @@ class HeatingRecommender:
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
+ "already_installed": already_installed,
**total_costs,
"simulation_config": recommendation_simulation_config
}
@@ -126,9 +312,8 @@ class HeatingRecommender:
return output
- def recommend_electric_storage_heaters(self, phase, system_change, heating_controls_only):
+ def recommend_hhr_storage_heaters(self, phase, system_change, heating_controls_only):
"""
- We recommend electric storage heaters as an upgrade to the heating system.
We will recommend upgrading to a high heat retention storage system, if the current system is not already
high heat retention storage
@@ -165,9 +350,18 @@ class HeatingRecommender:
# This upgrade will only take the heating system to average energy efficiency
heating_simulation_config["mainheat_energy_eff_ending"] = "Average"
+ # If the property is off-gas and has no heating system in place, the number of heated rooms will actually
+ # be 0, so we use the number of rooms as the figure
+ number_heated_rooms = (
+ self.property.data["number-heated-rooms"] if self.property.data["number-heated-rooms"] > 0
+ else (
+ self.property.number_of_rooms - 1 if self.property.number_of_rooms > 1 else
+ self.property.number_of_rooms
+ )
+ )
# Upgrade to electric storage heaters
costs = self.costs.high_heat_electric_storage_heaters(
- number_heated_rooms=self.property.data["number-heated-rooms"]
+ number_heated_rooms=number_heated_rooms
)
description = "Install high heat retention electric storage heaters"
@@ -181,4 +375,182 @@ class HeatingRecommender:
system_change=system_change
)
- self.recommendations.extend(recommendations)
+ self.heating_recommendations.extend(recommendations)
+
+ @staticmethod
+ def estimate_boiler_size(property_type, built_form, floor_area, floor_height, num_heated_rooms):
+ # Step 1: Base size estimation based on property type (as a starting point)
+ base_size = {
+ 'Flat': 25,
+ 'House': 30,
+ 'Maisonette': 28,
+ 'Bungalow': 27
+ }
+
+ # Step 2: Calculate the volume of the property
+ volume = floor_area * floor_height
+
+ # Step 3: Adjust base size for built form (to account for heat retention)
+ form_adjustment = {
+ 'Mid-Terrace': 0,
+ 'End-Terrace': 2,
+ 'Semi-Detached': 4,
+ 'Detached': 6
+ }
+
+ # Step 4: Further adjust for the total volume and number of heated rooms
+ volume_adjustment = (volume / 100) # Simplified adjustment factor for volume
+ rooms_adjustment = (num_heated_rooms - 5) * 0.5 # Assuming base case of 5 rooms
+
+ # Calculate the estimated boiler size
+ estimated_size = base_size[property_type] + form_adjustment[built_form] + volume_adjustment + rooms_adjustment
+
+ # Step 5: Align with available boiler sizes and ensure it does not exceed 35kW, as it's rare to need more
+ available_sizes = [30, 35, 40, 45, 50]
+ estimated_size = min(max(estimated_size, 30), 40) # Ensure within 30kW to 35kW range
+
+ # Find the closest available size (in this case, either rounding up or down to align with 30 or 35)
+ closest_size = min(available_sizes, key=lambda x: abs(x - estimated_size))
+
+ return closest_size
+
+ def recommend_boiler_upgrades(self, phase, system_change, exising_room_heaters):
+ """
+ This boiler recommendation will only recommend a like-for-like upgrade, since changing the system
+ is generally more expensive
+ :param phase:
+ :param system_change: Indicates if the property would be undergoing a heating system change. This could be true
+ if the home didn't have a heating system in place, or if the home had electric heating
+ previously
+ :param exising_room_heaters: Indicates if the property had room heaters previously - if so, a boiler
+ recommendation will need to be accompanied by removal of the room heaters
+ :return:
+ """
+
+ recommendation_phase = phase
+
+ # We now recommend boiler upgrades, if applicable
+ simulation_config = {}
+ boiler_costs = {}
+ boiler_recommendation = {}
+
+ has_inefficient_space_heating = self.property.data["mainheat-energy-eff"] in ["Very Poor", "Poor", "Average"]
+
+ has_inefficient_mains_water = (
+ self.property.hotwater["clean_description"] in ["From main system"] and
+ self.property.data["hot-water-energy-eff"] in ["Very Poor", "Poor", "Average"]
+ )
+
+ if has_inefficient_space_heating or has_inefficient_mains_water:
+ boiler_size = self.estimate_boiler_size(
+ property_type=self.property.data["property-type"],
+ built_form=self.property.data["built-form"],
+ floor_area=self.property.floor_area,
+ floor_height=self.property.floor_height,
+ num_heated_rooms=self.property.data["number-heated-rooms"],
+ )
+
+ description = "Upgrade to a new condensing boiler"
+
+ simulation_config = {
+ "mainheat_energy_eff_ending": "Good",
+ "hot_water_energy_eff_ending": "Good"
+ }
+ if system_change:
+ # Installation of a boiler improves the hot water system so we need to reflect this in
+ # the outcome of the recommendation
+ heating_ending_config = MainHeatAttributes("Boiler and radiators, mains gas").process()
+ hotwater_ending_config = HotWaterAttributes("From main system").process()
+ fuel_ending_config = MainFuelAttributes("mains gas (not community)").process()
+
+ heating_simulation_config = check_simulation_difference(
+ new_config=heating_ending_config, old_config=self.property.main_heating
+ )
+ hotwater_simulation_config = check_simulation_difference(
+ new_config=hotwater_ending_config, old_config=self.property.hotwater
+ )
+ fuel_simulation_config = check_simulation_difference(
+ new_config=fuel_ending_config, old_config=self.property.main_fuel
+ )
+
+ simulation_config = {
+ **simulation_config,
+ **heating_simulation_config,
+ **hotwater_simulation_config,
+ **fuel_simulation_config,
+ }
+
+ boiler_costs = self.costs.boiler(
+ size=f"{boiler_size}kw",
+ exising_room_heaters=exising_room_heaters,
+ system_change=system_change,
+ n_heated_rooms=self.property.data["number-heated-rooms"],
+ n_rooms=self.property.number_of_rooms
+ )
+
+ already_installed = "heating" in self.property.already_installed
+ if already_installed:
+ boiler_costs = override_costs(boiler_costs)
+ description = "Heating system has already been upgraded, no further action needed."
+
+ boiler_recommendation = {
+ "phase": recommendation_phase,
+ "parts": [
+ # TODO
+ ],
+ "type": "heating",
+ "description": description,
+ "starting_u_value": None,
+ "new_u_value": None,
+ "sap_points": None,
+ "already_installed": already_installed,
+ "simulation_config": simulation_config,
+ **boiler_costs
+ }
+
+ # We recommend the heating controls
+ # If the property did not previously have a boiler, we combine
+ controls_recommender = HeatingControlRecommender(self.property)
+ controls_recommender.recommend(heating_description="Boiler and radiators, mains gas")
+ # We may have 2 recommendations from the heating controls
+
+ if not controls_recommender.recommendation and not boiler_recommendation:
+ return
+
+ if not system_change and len(boiler_recommendation):
+ # If there is not a system change, we add the boiler recommendation at point.
+ self.heating_recommendations.extend([boiler_recommendation])
+
+ if system_change:
+ # We combine the heating and controls recommendations, in the case of a system change
+ combined_recommendations = []
+ for controls_recommendation in controls_recommender.recommendation:
+ combined_recommendation = self.combine_heating_and_controls(
+ controls_recommendations=[controls_recommendation],
+ heating_simulation_config=simulation_config,
+ costs=boiler_costs,
+ description=boiler_recommendation["description"],
+ phase=recommendation_phase,
+ heating_controls_only=False,
+ system_change=True
+ )
+ combined_recommendations.extend(combined_recommendation)
+
+ # Overwrite the existing boiler recommendation
+ self.heating_recommendations.extend(combined_recommendations)
+ else:
+ # We increment the recommendation phase, since the heating controls are separate from the boiler upgrade
+ # but we'll only upgrade if we have a heating recommendation
+ has_heating_recommendation = any(
+ rec["type"] == "heating" for rec in self.heating_recommendations
+ )
+ if has_heating_recommendation:
+ recommendation_phase += 1
+ # The heating controls recommendation is distrinct from the boiler upgrade recommendation
+ # We insert phase into the recommendations for heating controls
+ for recommendation in controls_recommender.recommendation:
+ recommendation["phase"] = recommendation_phase
+
+ self.heating_control_recommendations.extend(controls_recommender.recommendation)
+
+ return
diff --git a/recommendations/HotwaterRecommendations.py b/recommendations/HotwaterRecommendations.py
index 298671a2..9c5c7045 100644
--- a/recommendations/HotwaterRecommendations.py
+++ b/recommendations/HotwaterRecommendations.py
@@ -1,5 +1,6 @@
from backend.Property import Property
from recommendations.Costs import Costs
+from recommendations.recommendation_utils import override_costs
class HotwaterRecommendations:
@@ -22,8 +23,14 @@ class HotwaterRecommendations:
# This first iteration of the recommender will provide very basic recommendation
# We recommend heating controls based on the main heating system
- if (self.property.hotwater["heater_type"] in ["electric immersion"]) & \
- (self.property.data["hot-water-energy-eff"] == "Very Poor"):
+
+ # If there is no system present, but access to the mains, we
+
+ if (
+ (self.property.hotwater["heater_type"] in ["electric immersion"]) &
+ (self.property.data["hot-water-energy-eff"] == "Very Poor") &
+ (self.property.hotwater["no_system_present"] is None)
+ ):
self.recommend_tank_insulation(phase=phase)
return
@@ -35,6 +42,13 @@ class HotwaterRecommendations:
recommendation_cost = self.costs.hot_water_tank_insulation()
+ already_installed = "hot_water_tank_insulation" in self.property.already_installed
+ if already_installed:
+ recommendation_cost = override_costs(recommendation_cost)
+ description = "Insulation tank has already been insulated, no further action required"
+ else:
+ description = "Insulate hot water tank"
+
self.recommendations.append(
{
"phase": phase,
@@ -42,10 +56,11 @@ class HotwaterRecommendations:
# TODO
],
"type": "hot_water_tank_insulation",
- "description": "Insulate the hot water tank with an insulation jacket",
+ "description": description,
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
+ "already_installed": already_installed,
**recommendation_cost,
"simulation_config": {"hot_water_energy_eff_ending": "Average"}
}
diff --git a/recommendations/LightingRecommendations.py b/recommendations/LightingRecommendations.py
index 352c4d8a..31720579 100644
--- a/recommendations/LightingRecommendations.py
+++ b/recommendations/LightingRecommendations.py
@@ -1,6 +1,7 @@
from backend.Property import Property
from typing import List
from recommendations.Costs import Costs
+from recommendations.recommendation_utils import override_costs
class LightingRecommendations:
@@ -91,6 +92,11 @@ class LightingRecommendations:
heat_demand_change, carbon_change = self.estimate_lighting_impact(number_non_lel_outlets)
+ already_installed = "low_energy_lighting" in self.property.already_installed
+ if already_installed:
+ cost_result = override_costs(cost_result)
+ description = "Low energy lighting has already been installed, no further action required"
+
self.recommendation = [
{
"phase": phase,
@@ -99,6 +105,7 @@ class LightingRecommendations:
"description": description,
"starting_u_value": None,
"new_u_value": None,
+ "already_installed": already_installed,
# For SAP points, we use the fact that lighting is usually worth 2 points and we scale this to
# the proportion of lights that will be set to low energy
"sap_points": round(2 * (number_non_lel_outlets / number_lighting_outlets), 2),
diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py
index 9f838e1c..c8113cdc 100644
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@@ -11,6 +11,7 @@ from recommendations.SolarPvRecommendations import SolarPvRecommendations
from recommendations.WindowsRecommendations import WindowsRecommendations
from recommendations.HeatingRecommender import HeatingRecommender
from recommendations.HotwaterRecommendations import HotwaterRecommendations
+from recommendations.SecondaryHeating import SecondaryHeating
from backend.ml_models.AnnualBillSavings import AnnualBillSavings
@@ -22,7 +23,8 @@ class Recommendations:
def __init__(
self,
property_instance: Property,
- materials: List
+ materials: List,
+ exclusions: List[str] = None,
):
"""
:param property_instance: Instance of the Property class, for the home associated to property_id
@@ -31,6 +33,7 @@ class Recommendations:
self.property_instance = property_instance
self.materials = materials
+ self.exclusions = exclusions if exclusions else []
self.floor_recommender = FloorRecommendations(property_instance=property_instance, materials=materials)
self.wall_recomender = WallRecommendations(property_instance=property_instance, materials=materials)
@@ -44,8 +47,9 @@ class Recommendations:
self.solar_recommender = SolarPvRecommendations(property_instance=property_instance)
self.heating_recommender = HeatingRecommender(property_instance=property_instance)
self.hotwater_recommender = HotwaterRecommendations(property_instance=property_instance)
+ self.secondary_heating_recommender = SecondaryHeating(property_instance=property_instance)
- def recommend(self, portfolio_id):
+ def recommend(self):
"""
This method runs the recommendations for the individual measures and then appends them to a list for output
@@ -58,78 +62,139 @@ class Recommendations:
property_recommendations = []
phase = 0
- print("WALL RECOMMENDATIONS HAVE BEEN COMMENTED OUT TEMPORARILY - ADD ME BACK IN")
- if portfolio_id != 66:
- # Building Fabric
+ # Building Fabric
+ if "wall_insulation" not in self.exclusions:
self.wall_recomender.recommend(phase=phase)
if self.wall_recomender.recommendations:
property_recommendations.append(self.wall_recomender.recommendations)
phase += 1
- # Ventilation recommendations
- # We only produce a ventilation recommendation if the property is recommended to have wall or roof
- # insulation
- # We will not attribute a SAP impact to the ventilation recommendation, since we've seen that this has no
- # real impact on the SAP score. Therefore, we don't need to include phasing for ventilation. If we have any
- # wall or roof recommendations, we will ensure that ventilation is included in the simulation
+ if "roof_insulation" not in self.exclusions:
+ self.roof_recommender.recommend(phase=phase)
+ if self.roof_recommender.recommendations:
+ property_recommendations.append(self.roof_recommender.recommendations)
+ phase += 1
+
+ # Ventilation recommendations
+ # We only produce a ventilation recommendation if the property is recommended to have wall or roof
+ # insulation
+ # We will not attribute a SAP impact to the ventilation recommendation, since we've seen that this
+ # has no
+ # real impact on the SAP score. Therefore, we don't need to include phasing for ventilation. If we
+ # have any
+ # wall or roof recommendations, we will ensure that ventilation is included in the simulation
+ if "ventilation" not in self.exclusions:
if self.wall_recomender.recommendations or self.roof_recommender.recommendations:
self.ventilation_recomender.recommend()
if self.ventilation_recomender.recommendation:
property_recommendations.append(self.ventilation_recomender.recommendation)
- self.roof_recommender.recommend(phase=phase)
- if self.roof_recommender.recommendations:
- property_recommendations.append(self.roof_recommender.recommendations)
- phase += 1
+ if "floor_insulation" not in self.exclusions:
+ self.floor_recommender.recommend(phase=phase)
+ if self.floor_recommender.recommendations:
+ property_recommendations.append(self.floor_recommender.recommendations)
+ phase += 1
- self.floor_recommender.recommend(phase=phase)
- if self.floor_recommender.recommendations:
- property_recommendations.append(self.floor_recommender.recommendations)
- phase += 1
+ if "windows" not in self.exclusions:
+ self.windows_recommender.recommend(phase=phase)
+ if self.windows_recommender.recommendation:
+ property_recommendations.append(self.windows_recommender.recommendation)
+ phase += 1
- self.windows_recommender.recommend(phase=phase)
- if self.windows_recommender.recommendation:
- property_recommendations.append(self.windows_recommender.recommendation)
- phase += 1
-
- self.fireplace_recommender.recommend(phase=phase)
- if self.fireplace_recommender.recommendation:
- property_recommendations.append(self.fireplace_recommender.recommendation)
- phase += 1
+ if "fireplace" not in self.exclusions:
+ self.fireplace_recommender.recommend(phase=phase)
+ if self.fireplace_recommender.recommendation:
+ property_recommendations.append(self.fireplace_recommender.recommendation)
+ phase += 1
# Heating and Electical systems
- self.heating_recommender.recommend(phase=phase)
- if self.heating_recommender.recommendations:
- property_recommendations.append(self.heating_recommender.recommendations)
- phase += 1
+ if "heating" not in self.exclusions:
+
+ cavity_or_loft_recommendations = [
+ r for r in self.wall_recomender.recommendations + self.roof_recommender.recommendations
+ if r["type"] in ["cavity_wall_insulation", "loft_insulation"]
+ ]
+ has_cavity_or_loft_recommendations = len(cavity_or_loft_recommendations) > 0
+
+ self.heating_recommender.recommend(
+ phase=phase, has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations
+ )
+ if (
+ self.heating_recommender.heating_recommendations or
+ self.heating_recommender.heating_control_recommendations
+ ):
+
+ # We split into first and second phase recommendations
+ first_phase_recommendations = [
+ r for r in (
+ self.heating_recommender.heating_recommendations +
+ self.heating_recommender.heating_control_recommendations
+ )
+ if r["phase"] == phase
+ ]
+ second_phase_recommendations = [
+ r for r in (
+ self.heating_recommender.heating_recommendations +
+ self.heating_recommender.heating_control_recommendations
+ )
+ if r["phase"] == phase + 1
+ ]
+
+ if first_phase_recommendations:
+ property_recommendations.append(first_phase_recommendations)
+
+ if second_phase_recommendations:
+ property_recommendations.append(second_phase_recommendations)
+
+ # We check if we have distinct heating and heating controls recommendations
+ # If so, we increment by 2 (one of the heating system, one for the heating controls)
+ # otherwise we incremenet by 1
+ max_used_phase = max(
+ [rec["phase"] for rec in
+ self.heating_recommender.heating_recommendations +
+ self.heating_recommender.heating_control_recommendations]
+ )
+ amount_to_increment = max_used_phase - phase + 1
+ phase += amount_to_increment
# Hot water
- self.hotwater_recommender.recommend(phase=phase)
- if self.hotwater_recommender.recommendations:
- property_recommendations.append(self.hotwater_recommender.recommendations)
- phase += 1
+ if "hot_water" not in self.exclusions:
+ self.hotwater_recommender.recommend(phase=phase)
+ if self.hotwater_recommender.recommendations:
+ property_recommendations.append(self.hotwater_recommender.recommendations)
+ phase += 1
- self.lighting_recommender.recommend(phase=phase)
- if self.lighting_recommender.recommendation:
- property_recommendations.append(self.lighting_recommender.recommendation)
- phase += 1
+ if "lighting" not in self.exclusions:
+ self.lighting_recommender.recommend(phase=phase)
+ if self.lighting_recommender.recommendation:
+ property_recommendations.append(self.lighting_recommender.recommendation)
+ phase += 1
+
+ if "secondary_heating" not in self.exclusions:
+ self.secondary_heating_recommender.recommend(phase=phase)
+ if self.secondary_heating_recommender.recommendation:
+ property_recommendations.append(self.secondary_heating_recommender.recommendation)
+ phase += 1
# Renewables
- self.solar_recommender.recommend(phase=phase)
- if self.solar_recommender.recommendation:
- property_recommendations.append(self.solar_recommender.recommendation)
- phase += 1
+ if "solar_pv" not in self.exclusions:
+ self.solar_recommender.recommend(phase=phase)
+ if self.solar_recommender.recommendation:
+ property_recommendations.append(self.solar_recommender.recommendation)
+ phase += 1
# We insert temporary ids into the recommendations which is important for the optimiser later
property_recommendations = self.insert_temp_recommendation_id(property_recommendations)
# We also need to create the representative recommendations for each recommendation type
- property_representative_recommendations = self.create_representative_recommendations(property_recommendations)
+ property_representative_recommendations = self.create_representative_recommendations(
+ property_recommendations, non_invasive_recommendations=self.property_instance.non_invasive_recommendations
+ )
return property_recommendations, property_representative_recommendations
@staticmethod
- def create_representative_recommendations(property_recommendations):
+ def create_representative_recommendations(property_recommendations, non_invasive_recommendations):
"""
This method will create a representative recommendation for each recommendation type
In order to create a representative recommendation, we choose the recommendation that has:
@@ -144,6 +209,13 @@ class Recommendations:
for recommendations_by_type in property_recommendations:
+ # If the property was initially surveyed as filled, but the cavity was only partially filled, we don't
+ # want to include the cavity wall insulation recommendation in the defaults
+ # if (recommendations_by_type[0].get("type") == "cavity_wall_insulation") and (
+ # "cavity_surveyed_as_filled_is_partial" in non_invasive_recommendations
+ # ):
+ # continue
+
if recommendations_by_type[0].get("type") == "mechanical_ventilation":
continue
@@ -213,13 +285,13 @@ class Recommendations:
property_sap_predictions = all_predictions["sap_change_predictions"][
all_predictions["sap_change_predictions"]["property_id"] == str(property_instance.id)
- ]
+ ].copy()
property_heat_predictions = all_predictions["heat_demand_predictions"][
all_predictions["heat_demand_predictions"]["property_id"] == str(property_instance.id)
- ]
+ ].copy()
property_carbon_predictions = all_predictions["carbon_change_predictions"][
all_predictions["carbon_change_predictions"]["property_id"] == str(property_instance.id)
- ]
+ ].copy()
property_recommendations = recommendations[property_instance.id].copy()
@@ -247,6 +319,8 @@ class Recommendations:
current_epc_rating=property_instance.data["current-energy-rating"],
)
+ # TODO: This isn't quite right as this is based on EVERY possible measure, not just the ones that are
+ # actually implemented
expected_adjusted_energy = AnnualBillSavings.adjust_energy_to_metered(
epc_energy_consumption=expected_heat_demand,
current_epc_rating=property_instance.data["current-energy-rating"],
@@ -256,6 +330,10 @@ class Recommendations:
current_adjusted_energy - expected_adjusted_energy
)
+ # TODO: We should determine if the home is gas & electricity or just electricity
+ current_energy_bill = AnnualBillSavings.calculate_annual_bill(current_adjusted_energy)
+ expected_energy_bill = AnnualBillSavings.calculate_annual_bill(expected_adjusted_energy)
+
for recommendations_by_type in property_recommendations:
for rec in recommendations_by_type:
@@ -330,4 +408,10 @@ class Recommendations:
rec["heat_demand"] is None) or (rec["energy_cost_savings"] is None):
raise ValueError("sap points, co2 or heat demand is missing")
- return property_recommendations, current_adjusted_energy, expected_adjusted_energy
+ return (
+ property_recommendations,
+ current_adjusted_energy,
+ expected_adjusted_energy,
+ current_energy_bill,
+ expected_energy_bill
+ )
diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py
index eb1c6c4f..dc5ee7db 100644
--- a/recommendations/RoofRecommendations.py
+++ b/recommendations/RoofRecommendations.py
@@ -5,7 +5,7 @@ from typing import List
from datatypes.enums import QuantityUnits
from recommendations.recommendation_utils import (
get_roof_u_value, r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns,
- update_lowest_selected_u_value, get_recommended_part, convert_thickness_to_numeric
+ update_lowest_selected_u_value, get_recommended_part, convert_thickness_to_numeric, override_costs
)
from recommendations.Costs import Costs
@@ -20,8 +20,9 @@ class RoofRecommendations:
DIMINISHING_RETURNS_U_VALUE = 0.14
- # It is recommended that lofts should have at least 270mm of insulation
- MINIMUM_LOFT_ISULATION_MM = 270
+ # It is recommended that lofts should have at least 270mm of insulation. If the property has more than 200mm of
+ # loft insulation in place already, we do not recommend anything for the moment
+ MINIMUM_LOFT_ISULATION_MM = 200
# Flat roof should have at least 100mm of insulation
MINIMUM_FLAT_ROOF_ISULATION_MM = 100
@@ -71,7 +72,7 @@ class RoofRecommendations:
# Building regulations part L recommend installing at least 270mm of insulation, however generally we
# experience diminishing returns in terms of SAP once we go beyond around 150mm of insulation
# This only holds true for pitched roofs.
- if (insulation_thickness >= self.MINIMUM_LOFT_ISULATION_MM) and self.property.roof["is_pitched"]:
+ if (insulation_thickness > self.MINIMUM_LOFT_ISULATION_MM) and self.property.roof["is_pitched"]:
return
if (insulation_thickness >= self.MINIMUM_FLAT_ROOF_ISULATION_MM) and self.property.roof["is_flat"]:
@@ -206,12 +207,18 @@ class RoofRecommendations:
floor_area=self.property.insulation_floor_area,
material=material
)
+ already_installed = "loft_insulation" in self.property.already_installed
+ if already_installed:
+ cost_result = override_costs(cost_result)
elif material["type"] == "flat_roof_insulation":
cost_result = self.costs.flat_roof_insulation(
floor_area=self.property.insulation_floor_area,
material=material,
non_insulation_materials=non_insulation_materials
)
+ already_installed = "flat_roof_insulation" in self.property.already_installed
+ if already_installed:
+ cost_result = override_costs(cost_result)
else:
raise ValueError("Invalid material type")
@@ -231,6 +238,7 @@ class RoofRecommendations:
"starting_u_value": u_value,
"new_u_value": new_u_value,
"sap_points": None,
+ "already_installed": already_installed,
**cost_result
}
)
diff --git a/recommendations/SecondaryHeating.py b/recommendations/SecondaryHeating.py
new file mode 100644
index 00000000..5d763510
--- /dev/null
+++ b/recommendations/SecondaryHeating.py
@@ -0,0 +1,65 @@
+from recommendations.Costs import Costs
+from recommendations.recommendation_utils import override_costs
+from backend.Property import Property
+
+
+class SecondaryHeating:
+ """
+ This class recommends the removal of the secondary heating system for properties that have a primary heating
+ system.
+ """
+
+ # The list of existing heating systems that are accepted
+ ACCEPTED_MAINHEAT_DESCRIPTIONS = ["Boiler and radiators, mains gas"]
+ ACCEPTED_SECONDHEAT_DESCRIPTIONS = ["Room heaters, electric"]
+ # These are the heaters where works are required to remove them
+ FIXED_HEATER_DESCRIPTIONS = ["Room heaters, electric"]
+
+ def __init__(self, property_instance: Property):
+ self.property = property_instance
+ self.costs = Costs(self.property)
+
+ self.recommendation = []
+
+ def recommend(self, phase: int):
+ # Reset
+ self.recommendation = []
+
+ if self.property.main_heating["clean_description"] not in self.ACCEPTED_MAINHEAT_DESCRIPTIONS:
+ return
+
+ # TODO: We need to clean secondary data
+ if self.property.data['secondheat-description'] not in self.ACCEPTED_SECONDHEAT_DESCRIPTIONS:
+ return
+
+ if self.property.data['secondheat-description'] in self.FIXED_HEATER_DESCRIPTIONS:
+ # We have an associated cost otherwise, there is no cost
+ n_rooms = self.property.data['number-heated-rooms']
+ else:
+ n_rooms = 0
+
+ costs = self.costs.heater_removal(n_rooms=n_rooms)
+
+ already_installed = "secondary_heating" in self.property.already_installed
+ if already_installed:
+ costs = override_costs(costs)
+ description = "Secondary heating system has already been removed, no further action required"
+ else:
+ description = "Remove the secondary heating system"
+
+ self.recommendation.append(
+ {
+ "phase": phase,
+ "parts": [],
+ "type": "secondary_heating",
+ "description": description,
+ "starting_u_value": None,
+ "new_u_value": None,
+ "sap_points": None,
+ "already_installed": already_installed,
+ **costs,
+ "simulation_config": {
+ "secondheat_description_ending": "None"
+ }
+ }
+ )
diff --git a/recommendations/SolarPvRecommendations.py b/recommendations/SolarPvRecommendations.py
index 3a89b213..58d4b123 100644
--- a/recommendations/SolarPvRecommendations.py
+++ b/recommendations/SolarPvRecommendations.py
@@ -1,5 +1,6 @@
import numpy as np
from recommendations.Costs import Costs
+from recommendations.recommendation_utils import override_costs
class SolarPvRecommendations:
@@ -8,6 +9,9 @@ class SolarPvRecommendations:
# Wattage per panel - this is based on the average wattage of a solar panel being between 250w and 420w
SOLAR_PANEL_WATTAGE = 250
+ MAX_SYSTEM_WATTAGE = 6000
+ MIN_SYSTEM_WATTAGE = 1000
+
def __init__(self, property_instance):
"""
:param property_instance: Instance of the Property class, for the home associated to property_id
@@ -18,6 +22,19 @@ class SolarPvRecommendations:
self.recommendation = []
+ @staticmethod
+ def trim_solar_wattage_options(scenarios_with_wattage):
+ # Initialize the list with the first element, assuming the list is not empty
+ trimmed_list = [scenarios_with_wattage[0]]
+
+ # Iterate over the list starting from the second element
+ for scenario in scenarios_with_wattage[1:]:
+ # Compare the second element (index 1) of the current tuple with the last tuple in the trimmed list
+ if scenario[1] > trimmed_list[-1][1]:
+ trimmed_list.append(scenario)
+
+ return trimmed_list
+
def recommend(self, phase):
"""
We check if a property is potentially suitable for solar PV based on the following criteria:
@@ -27,7 +44,7 @@ class SolarPvRecommendations:
:return:
"""
- is_valid_property_type = self.property.data["property-type"] in ["House", "Bungalow"]
+ is_valid_property_type = self.property.data["property-type"] in ["House", "Bungalow", "Maisonette"]
is_valid_roof_type = (
self.property.roof["is_flat"] or self.property.roof["is_pitched"] or self.property.roof["is_roof_room"]
)
@@ -39,33 +56,56 @@ class SolarPvRecommendations:
if not is_valid_property_type or not is_valid_roof_type or not has_no_existing_solar_pv:
return
+ solar_pv_percentage = self.property.solar_pv_percentage
+ # We round up to the neaest 10%
+ solar_pv_percentage = np.ceil(solar_pv_percentage * 10) / 10
+
# For the solar recommendations, we produce the following scenarios:
# 1) Solar panels only, we present a high, medium and low coverage
# 2) With and without battery
roof_coverage_scenarios = [
- self.property.solar_pv_percentage - 0.1, self.property.solar_pv_percentage,
- self.property.solar_pv_percentage + 0.1
+ solar_pv_percentage - 0.1, solar_pv_percentage,
]
- # We make sure we haven't gone too low or high
- roof_coverage_scenarios = [v for v in roof_coverage_scenarios if 0 <= v <= 1]
+ if solar_pv_percentage <= 0.4:
+ roof_coverage_scenarios.append(solar_pv_percentage + 0.1)
+ # We make sure we haven't gone too low or high - we allow no more than 60% coverage
+ roof_coverage_scenarios = [v for v in roof_coverage_scenarios if 0 <= v <= 0.6]
+ # If we only have two scenarios, we add a coverage scenario 10% less than the smallest
+ if len(roof_coverage_scenarios) == 2:
+ roof_coverage_scenarios.insert(0, roof_coverage_scenarios[0] - 0.1)
battery_scenarios = [False, True]
- # I now produce the cross product of the scenarios
- scenarios = [(roof, battery) for roof in roof_coverage_scenarios for battery in battery_scenarios]
-
- for roof_coverage, has_battery in scenarios:
+ scenarios_with_wattage = []
+ for roof_coverage in roof_coverage_scenarios:
# We now have a property which is potentially suitable for solar PV
solar_pv_roof_area = self.property.get_solar_pv_roof_area(roof_coverage)
number_solar_panels = np.floor(solar_pv_roof_area / self.SOLAR_PANEL_AREA)
solar_panel_wattage = number_solar_panels * self.SOLAR_PANEL_WATTAGE
- roof_coverage_percent = round(roof_coverage * 100)
+ if solar_panel_wattage < self.MIN_SYSTEM_WATTAGE:
+ continue
+ solar_panel_wattage = np.clip(
+ a=solar_panel_wattage, a_min=self.MIN_SYSTEM_WATTAGE, a_max=self.MAX_SYSTEM_WATTAGE
+ )
+ scenarios_with_wattage.append((roof_coverage, solar_panel_wattage))
+
+ # We trim the scenarios, so that we don't have duplicate wattages
+ scenarios_with_wattage = self.trim_solar_wattage_options(scenarios_with_wattage)
+
+ # Produce the cross product of the scenarios
+ scenarios = [
+ (roof, wattage, battery) for roof, wattage in scenarios_with_wattage for battery in battery_scenarios
+ ]
+ # We deduce the wattage of the solar panels based on the roof coverage
+
+ for roof_coverage, solar_panel_wattage, has_battery in scenarios:
+ # We now have a property which is potentially suitable for solar PV
+ roof_coverage_percent = round(roof_coverage * 100)
# Given the wattage, we estimate the cost of the solar PV system. This is based on the MCS database
# of solar PV installations
cost_result = self.costs.solar_pv(wattage=solar_panel_wattage, has_battery=has_battery)
-
kw = np.floor(solar_panel_wattage / 100) / 10
if has_battery:
@@ -75,6 +115,10 @@ class SolarPvRecommendations:
description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) p"
f"anel system on {round(roof_coverage_percent)}% the roof.")
+ already_installed = "solar_pv" in self.property.already_installed
+ if already_installed:
+ cost_result = override_costs(cost_result)
+
self.recommendation.append(
{
"phase": phase,
@@ -84,9 +128,11 @@ class SolarPvRecommendations:
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
+ "already_installed": already_installed,
**cost_result,
# This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we scale
# back up here
- "photo_supply": 100 * roof_coverage
+ "photo_supply": 100 * roof_coverage,
+ "has_battery": has_battery
}
)
diff --git a/recommendations/VentilationRecommendations.py b/recommendations/VentilationRecommendations.py
index 1657b759..5b36bd9c 100644
--- a/recommendations/VentilationRecommendations.py
+++ b/recommendations/VentilationRecommendations.py
@@ -50,7 +50,11 @@ class VentilationRecommendations(Definitions):
part = self.materials.copy()
- estimated_cost = n_units * part[0]["cost"]
+ already_installed = "cavity_wall_insulation" in self.property.already_installed
+
+ estimated_cost = n_units * part[0]["cost"] if not already_installed else 0
+ labour_hours = 4 * n_units if not already_installed else 0
+ labour_days = 4 * n_units / 8.0 if not already_installed else 0
part[0]["total"] = estimated_cost
part[0]["quantity"] = n_units
@@ -65,6 +69,7 @@ class VentilationRecommendations(Definitions):
"description": f"Install {n_units} {part[0]['description']} units",
"starting_u_value": None,
"new_u_value": None,
+ "already_installed": already_installed,
"sap_points": 0,
"heat_demand": 0,
"adjusted_heat_demand": 0,
@@ -72,7 +77,7 @@ class VentilationRecommendations(Definitions):
"energy_cost_savings": 0,
"total": estimated_cost,
# We use a very simple and rough estimate of 4 hours per unit
- "labour_hours": 4 * n_units,
- "labour_days": 4 * n_units / 8.0 # Assume 8 hour day
+ "labour_hours": labour_hours,
+ "labour_days": labour_days # Assume 8 hour day
}
]
diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py
index 6b59c148..20fc453c 100644
--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@@ -8,7 +8,7 @@ from backend.Property import Property
from BaseUtility import Definitions
from recommendations.recommendation_utils import (
r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns, update_lowest_selected_u_value,
- get_recommended_part, get_wall_u_value
+ get_recommended_part, get_wall_u_value, override_costs
)
from recommendations.config import PARTIALLY_FILLED_PERCENTAGE_ASSUMPTION
from recommendations.Costs import Costs
@@ -113,7 +113,9 @@ class WallRecommendations(Definitions):
insulation_thickness = self.property.walls["insulation_thickness"]
# We check if the wall is already insulated and if so, we exit
- if (insulation_thickness in ["average", "above average"]) or self.property.walls["is_filled_cavity"]:
+ if ((insulation_thickness in ["average", "above average"]) or self.property.walls["is_filled_cavity"]) and (
+ "cavity_extract_and_refill" not in self.property.non_invasive_recommendations
+ ):
return
if u_value:
@@ -216,11 +218,26 @@ class WallRecommendations(Definitions):
if new_u_value <= self.BUILDING_REGULATIONS_PART_L_CAVITY_WALL_MAX_U_VALUE:
lowest_selected_u_value = update_lowest_selected_u_value(lowest_selected_u_value, new_u_value)
+ is_extraction_and_refill = "cavity_extract_and_refill" in self.property.non_invasive_recommendations
+
cost_result = self.costs.cavity_wall_insulation(
wall_area=self.property.insulation_wall_area,
material=material.to_dict(),
+ is_extraction_and_refill=is_extraction_and_refill
)
+ already_installed = "cavity_wall_insulation" in self.property.already_installed
+ if already_installed:
+ cost_result = override_costs(cost_result)
+
+ if is_extraction_and_refill:
+ description = f"Extract and refill cavity wall insulation with {material['description']}"
+ else:
+ description = self._make_description(material)
+
+ # updated the new u-value with the best possible our installers have
+ new_u_value = max(0.31, new_u_value)
+
recommendations.append(
{
"phase": phase,
@@ -233,10 +250,11 @@ class WallRecommendations(Definitions):
)
],
"type": "cavity_wall_insulation",
- "description": self._make_description(material),
+ "description": description,
"starting_u_value": u_value,
"new_u_value": new_u_value,
"sap_points": None,
+ "already_installed": already_installed,
**cost_result
}
)
@@ -277,12 +295,19 @@ class WallRecommendations(Definitions):
material=material.to_dict(),
non_insulation_materials=non_insulation_materials
)
+ already_installed = "internal_wall_insulation" in self.property.already_installed
+ if already_installed:
+ cost_result = override_costs(cost_result)
+
elif material["type"] == "external_wall_insulation":
cost_result = self.costs.external_wall_insulation(
wall_area=self.property.insulation_wall_area,
material=material.to_dict(),
non_insulation_materials=non_insulation_materials
)
+ already_installed = "external_wall_insulation" in self.property.already_installed
+ if already_installed:
+ cost_result = override_costs(cost_result)
else:
raise ValueError("Invalid material type")
@@ -301,6 +326,7 @@ class WallRecommendations(Definitions):
"description": self._make_description(material),
"starting_u_value": u_value,
"new_u_value": new_u_value,
+ "already_installed": already_installed,
"sap_points": None,
**cost_result
}
diff --git a/recommendations/WindowsRecommendations.py b/recommendations/WindowsRecommendations.py
index d7404e3b..b7c2823a 100644
--- a/recommendations/WindowsRecommendations.py
+++ b/recommendations/WindowsRecommendations.py
@@ -4,6 +4,7 @@ import numpy as np
from backend.Property import Property
from recommendations.Costs import Costs
+from recommendation_utils import override_costs
class WindowsRecommendations:
@@ -70,18 +71,23 @@ class WindowsRecommendations:
is_secondary_glazing=is_secondary_glazing
)
- glazing_type = "secondary glazing" if is_secondary_glazing else "double glazing"
- if self.property.windows["glazing_coverage"] in ["partial", "most"]:
- description = f"Install {glazing_type} to the remaining windows"
+ already_installed = "windows_glazing" in self.property.already_installed
+ if already_installed:
+ cost_result = override_costs(cost_result)
+ description = "The property already has double glazing installed. No further action is required."
else:
- description = f"Install {glazing_type} to all windows"
+ glazing_type = "secondary glazing" if is_secondary_glazing else "double glazing"
+ if self.property.windows["glazing_coverage"] in ["partial", "most"]:
+ description = f"Install {glazing_type} to the remaining windows"
+ else:
+ description = f"Install {glazing_type} to all windows"
- if self.property.is_listed:
- description += ". Secondary glazing recommended due to listed building status"
- elif self.property.is_heritage:
- description += ". Secondary glazing recommended due to herigate building status"
- elif self.property.in_conservation_area:
- description += ". Secondary glazing recommended due to conservation area status"
+ if self.property.is_listed:
+ description += ". Secondary glazing recommended due to listed building status"
+ elif self.property.is_heritage:
+ description += ". Secondary glazing recommended due to herigate building status"
+ elif self.property.in_conservation_area:
+ description += ". Secondary glazing recommended due to conservation area status"
self.recommendation = [
{
@@ -92,6 +98,7 @@ class WindowsRecommendations:
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
+ "already_installed": already_installed,
**cost_result,
"is_secondary_glazing": is_secondary_glazing
}
diff --git a/recommendations/optimiser/optimiser_functions.py b/recommendations/optimiser/optimiser_functions.py
index 27838d6e..d6353eea 100644
--- a/recommendations/optimiser/optimiser_functions.py
+++ b/recommendations/optimiser/optimiser_functions.py
@@ -1,17 +1,13 @@
-def prepare_input_measures(property_recommendations, goal, housing_type):
+def prepare_input_measures(property_recommendations, goal):
"""
Basic function to convert recommendations_to_upload to a format that is
suitable for the optimiser - large
:param property_recommendations: object containing the recommendations, created in the plan trigger api
:param goal: goal to be optimised for, should be one of the keys in gain_map. E.g. if the gain is SAP points,
the goal should reflect that desired gain
- :param housing_type: type of housing the recommendations are for - should be one of "Social" or "Private"
:return: Nested list of input measures
"""
- if housing_type not in ["Social", "Private"]:
- raise ValueError("Invalid housing type - investigate me")
-
goal_map = {
"Increase EPC": "sap_points"
}
@@ -20,12 +16,14 @@ def prepare_input_measures(property_recommendations, goal, housing_type):
if not goal_key:
raise NotImplementedError("Not implemented this gain type - investigate me")
- # We don't include suspended and solid floor insulation as possible measures in private housing, because
- # of the need to decant the tenant
- ignored_measures = ["suspended_floor_insulation", "solid_floor_insulation"] if housing_type == "Private" else []
-
input_measures = []
for recs in property_recommendations:
+ if recs[0]["type"] == "solar_pv":
+ # if the recommendation is a solar recommendation without a battery, we exclude it from the optimisation.
+ # That will ensure that the optimiser only considers solar recommendations with batteries, so we don't
+ # under-report the potential cost
+ recs = [r for r in recs if r["has_battery"]]
+
input_measures.append(
[
{
@@ -34,7 +32,7 @@ def prepare_input_measures(property_recommendations, goal, housing_type):
"gain": rec[goal_key],
"type": rec["type"]
}
- for rec in recs if rec["type"] not in ignored_measures
+ for rec in recs
]
)
diff --git a/recommendations/recommendation_utils.py b/recommendations/recommendation_utils.py
index 0d5f9743..a3043c31 100644
--- a/recommendations/recommendation_utils.py
+++ b/recommendations/recommendation_utils.py
@@ -767,3 +767,15 @@ def check_simulation_difference(old_config, new_config):
differences = {key + "_ending": new_config[key] for key in new_config if old_config[key] != new_config[key]}
return differences
+
+
+def override_costs(costs):
+ """
+ If the method is overridden, we want to make sure that the costs are zero. This function sets the costs to zero
+ :param costs: Dictionary of costing, as returned by the Costs class
+ :return:
+ """
+ for k in costs:
+ costs[k] = 0
+
+ return costs
diff --git a/recommendations/tests/test_air_source_heat_pump.py b/recommendations/tests/test_air_source_heat_pump.py
new file mode 100644
index 00000000..0d69b10d
--- /dev/null
+++ b/recommendations/tests/test_air_source_heat_pump.py
@@ -0,0 +1,944 @@
+import pandas as pd
+import msgpack
+from datetime import datetime
+
+from utils.s3 import read_dataframe_from_s3_parquet, read_from_s3
+from backend.Property import Property
+from recommendations.HeatingRecommender import HeatingRecommender
+from recommendations.Recommendations import Recommendations
+from etl.epc.Record import EPCRecord
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+from backend.ml_models.api import ModelApi
+
+
+def find_examples():
+ """ Some scrappy helper code to find EPC examples"""
+ # Let's look for some testing data, where the only thing different pre and post is the installation of an
+ # air source heat pump
+ data = read_dataframe_from_s3_parquet(
+ bucket_name="retrofit-data-dev",
+ file_key="sap_change_model/2024-03-24-15-51-13/dataset_no_cleaning.parquet"
+ )
+
+ # Firstly, take records where before there was no air source heat pump and afterwards there was
+ data = data[
+ data["has_air_source_heat_pump_ending"] & ~data["has_air_source_heat_pump"]
+ ]
+
+ # Start with a property that has a boiler
+ data = data[data["has_boiler"]]
+
+ static_columns = [
+ # Walls
+ 'walls_thermal_transmittance_ending',
+ 'is_filled_cavity_ending',
+ 'is_park_home_ending',
+ 'walls_insulation_thickness_ending',
+ 'external_insulation_ending',
+ 'internal_insulation_ending',
+ # Floors
+ # 'floor_thermal_transmittance_ending', # Don't subset on this, because it changes based on floor area
+ 'floor_insulation_thickness_ending',
+ # Roof
+ 'roof_thermal_transmittance_ending',
+ 'is_at_rafters_ending',
+ 'roof_insulation_thickness_ending',
+ # Hot water - air source heat pump will shange the hot water system (probably from whatever it was -> main)
+ # 'heater_type_ending',
+ # 'system_type_ending',
+ # 'thermostat_characteristics_ending',
+ # 'heating_scope_ending',
+ # 'energy_recovery_ending',
+ # 'hotwater_tariff_type_ending',
+ # 'extra_features_ending',
+ # 'chp_systems_ending',
+ # 'distribution_system_ending',
+ # 'no_system_present_ending',
+ # 'appliance_ending',
+ # Heating - Will change when installing an ASHP
+ # 'has_radiators_ending',
+ # 'has_fan_coil_units_ending',
+ # 'has_pipes_in_screed_above_insulation_ending',
+ # 'has_pipes_in_insulated_timber_floor_ending',
+ # 'has_pipes_in_concrete_slab_ending',
+ # 'has_boiler_ending',
+ # 'has_air_source_heat_pump_ending', # We want the air source heat pump to change
+ # 'has_room_heaters_ending',
+ # 'has_electric_storage_heaters_ending',
+ # 'has_warm_air_ending',
+ # 'has_electric_underfloor_heating_ending',
+ # 'has_electric_ceiling_heating_ending',
+ # 'has_community_scheme_ending',
+ # 'has_ground_source_heat_pump_ending',
+ # 'has_no_system_present_ending',
+ # 'has_portable_electric_heaters_ending',
+ # 'has_water_source_heat_pump_ending',
+ # 'has_electric_heat_pump_ending',
+ # 'has_micro-cogeneration_ending',
+ # 'has_solar_assisted_heat_pump_ending',
+ # 'has_exhaust_source_heat_pump_ending',
+ # 'has_community_heat_pump_ending',
+ # 'has_electric_ending',
+ # 'has_mains_gas_ending',
+ # 'has_wood_logs_ending', 'has_coal_ending', 'has_oil_ending',
+ # 'has_wood_pellets_ending', 'has_anthracite_ending', 'has_dual_fuel_mineral_and_wood_ending',
+ # 'has_smokeless_fuel_ending', 'has_lpg_ending', 'has_b30k_ending', 'has_electricaire_ending',
+ # 'has_assumed_for_most_rooms_ending', 'has_underfloor_heating_ending',
+ # 'thermostatic_control_ending',
+ # 'charging_system_ending',
+ # 'switch_system_ending',
+ # 'no_control_ending',
+ # 'dhw_control_ending',
+ # 'community_heating_ending',
+ # 'multiple_room_thermostats_ending',
+ # 'auxiliary_systems_ending',
+ # 'trvs_ending',
+ # 'rate_control_ending',
+ # Window
+ 'glazing_type_ending',
+ # Fuel - could change with ASHP
+ # 'fuel_type_ending',
+ # 'main-fuel_tariff_type_ending',
+ # 'is_community_ending',
+ # 'no_individual_heating_or_community_network_ending',
+ # 'complex_fuel_type_ending',
+
+ 'mechanical_ventilation_ending', 'secondheat_description_ending', 'glazed_type_ending',
+ 'multi_glaze_proportion_ending', 'low_energy_lighting_ending', 'number_open_fireplaces_ending',
+ 'solar_water_heating_flag_ending',
+ 'photo_supply_ending',
+ 'energy_tariff_ending',
+ 'extension_count_ending',
+ 'total_floor_area_ending',
+ # 'hot_water_energy_eff_ending',
+ 'floor_energy_eff_ending',
+ 'windows_energy_eff_ending',
+ 'walls_energy_eff_ending',
+ 'sheating_energy_eff_ending',
+ 'roof_energy_eff_ending',
+ # 'mainheat_energy_eff_ending',
+ # 'mainheatc_energy_eff_ending',
+ 'lighting_energy_eff_ending',
+ 'number_habitable_rooms_ending',
+ 'number_heated_rooms_ending',
+ ]
+
+ for col in static_columns:
+
+ base_starting = col.split("_ending")[0]
+ if base_starting + "_starting" in data.columns:
+ starting_col = base_starting + "_starting"
+ else:
+ starting_col = base_starting
+ # Filter
+ print("Column: %s" % col)
+ print("Starting size: %s" % data.shape[0])
+ data = data[data[starting_col] == data[col]]
+ print("Ending size: %s" % data.shape[0])
+
+ z = data[['uprn', col, starting_col]]
+
+ # Great example UPRNs
+ # 100030969273
+ # 10034685399 - Completely transforms the heating and hot water systems in the home (goes from oil -> electricity)
+ # 100091200828 - goes from a liquid petroleum gas boiler to ashp
+
+ # Look for starting with a gas boiler
+ data[
+ data["has_boiler"] & data["has_radiators"] & data["has_mains_gas"] & ~data["has_boiler_ending"]
+ ]
+
+ # UPRN: 100011776843
+
+
+class TestAirSourceHeatPump:
+
+ def test_eligible(self):
+ # This tests a house, which will be suitable for an air source heat pump
+ epc_record = EPCRecord()
+ epc_record.prepared_epc = {
+ "county": "Broxbourne",
+ "mainheat-energy-eff": "Good",
+ "hot-water-energy-eff": "Good",
+ "mainheatc-energy-eff": "Good",
+ "number-heated-rooms": 5,
+ "property-type": "House",
+ "built-form": "Semi-Detached"
+ }
+
+ property_instance = Property(id=0, address="fake", postcode="fake", epc_record=epc_record)
+ property_instance.main_heating = {
+ 'original_description': 'Boiler and radiators, mains gas',
+ "clean_description": "Boiler and radiators, mains gas",
+ 'has_radiators': True,
+ 'has_fan_coil_units': False, 'has_pipes_in_screed_above_insulation': False,
+ 'has_pipes_in_insulated_timber_floor': False, 'has_pipes_in_concrete_slab': False, 'has_boiler': True,
+ 'has_air_source_heat_pump': False,
+ 'has_room_heaters': False, 'has_electric_storage_heaters': False,
+ 'has_warm_air': False,
+ 'has_electric_underfloor_heating': False,
+ 'has_electric_ceiling_heating': False, 'has_community_scheme': False,
+ 'has_ground_source_heat_pump': False, 'has_no_system_present': False,
+ 'has_portable_electric_heaters': False,
+ 'has_water_source_heat_pump': False, 'has_electric': False,
+ 'has_mains_gas': True, 'has_wood_logs': False,
+ 'has_coal': False, 'has_oil': False, 'has_wood_pellets': False,
+ 'has_anthracite': False,
+ 'has_dual_fuel_mineral_and_wood': False, 'has_smokeless_fuel': False,
+ 'has_lpg': False, 'has_assumed': False,
+ 'has_electricaire': False, 'has_assumed_for_most_rooms': False,
+ 'has_underfloor_heating': False,
+ "has_electric_heat_pumps": False,
+ "has_micro-cogeneration": False
+ }
+ property_instance.main_fuel = {
+ 'original_description': 'mains gas (not community)', 'fuel_type': 'mains gas',
+ 'tariff_type': None,
+ 'is_community': False, 'no_individual_heating_or_community_network': False,
+ 'complex_fuel_type': None
+ }
+ property_instance.hotwater = {
+ 'original_description': 'From main system',
+ 'clean_description': 'From main system',
+ 'heater_type': None,
+ 'system_type': 'from main system',
+ 'thermostat_characteristics': None, 'heating_scope': None,
+ 'energy_recovery': None, 'tariff_type': None,
+ 'extra_features': None, 'chp_systems': None, 'distribution_system': None,
+ 'no_system_present': None,
+ 'assumed': False, "appliance": None
+ }
+ property_instance.main_heating_controls = {
+ 'original_description': 'Programmer, room thermostat and TRVs',
+ 'thermostatic_control': 'room thermostat', 'charging_system': None, 'switch_system': 'programmer',
+ 'no_control': None, 'dhw_control': None, 'community_heating': None, 'multiple_room_thermostats': False,
+ 'auxiliary_systems': None, 'trvs': 'trvs', 'rate_control': None
+
+ }
+
+ recommender = HeatingRecommender(property_instance=property_instance)
+
+ assert not recommender.heating_recommendations
+
+ recommender.recommend(phase=0)
+
+ assert recommender.recommendation is None
+
+ def test_air_source_heat_pump_gas_boiler_starting(self):
+ starting_epc = {
+ 'low-energy-fixed-light-count': '', 'address': '430 Gidlow Lane', 'uprn-source': 'Energy Assessor',
+ 'floor-height': '2.62', 'heating-cost-potential': '599', 'unheated-corridor-length': '',
+ 'hot-water-cost-potential': '67', 'construction-age-band': 'England and Wales: 1950-1966',
+ 'potential-energy-rating': 'C', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Good',
+ 'lighting-energy-eff': 'Very Good', 'environment-impact-potential': '72',
+ 'glazed-type': 'double glazing installed during or after 2002', 'heating-cost-current': '913',
+ 'address3': '', 'mainheatcont-description': 'Programmer, no room thermostat', 'sheating-energy-eff': 'N/A',
+ 'property-type': 'House', 'local-authority-label': 'Wigan', 'fixed-lighting-outlets-count': '9',
+ 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '210',
+ 'county': '', 'postcode': 'WN6 8RG', 'solar-water-heating-flag': 'N', 'constituency': 'E14001039',
+ 'co2-emissions-potential': '2.6', 'number-heated-rooms': '4',
+ 'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '180',
+ 'local-authority': 'E08000010', 'built-form': 'Mid-Terrace', 'number-open-fireplaces': '0',
+ 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2022-02-15',
+ 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '78', 'address1': '430 Gidlow Lane',
+ 'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Wigan',
+ 'roof-energy-eff': 'Very Poor', 'total-floor-area': '80.0', 'building-reference-number': '10002334112',
+ 'environment-impact-current': '38', 'co2-emissions-current': '6.2',
+ 'roof-description': 'Pitched, no insulation (assumed)', 'floor-energy-eff': 'N/A',
+ 'number-habitable-rooms': '4', 'address2': '', 'hot-water-env-eff': 'Poor', 'posttown': 'WIGAN',
+ 'mainheatc-energy-eff': 'Very Poor', 'main-fuel': 'mains gas (not community)',
+ 'lighting-env-eff': 'Very Good', 'windows-energy-eff': 'Good', 'floor-env-eff': 'N/A',
+ 'sheating-env-eff': 'N/A', 'lighting-description': 'Low energy lighting in all fixed outlets',
+ 'roof-env-eff': 'Very Poor', 'walls-energy-eff': 'Average', 'photo-supply': '0.0',
+ 'lighting-cost-potential': '67', 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100',
+ 'main-heating-controls': '', 'lodgement-datetime': '2022-02-23 16:39:41', 'flat-top-storey': '',
+ 'current-energy-rating': 'E', 'secondheat-description': 'Room heaters, mains gas',
+ 'walls-env-eff': 'Average', 'transaction-type': 'ECO assessment', 'uprn': '100011776843',
+ 'current-energy-efficiency': '45', 'energy-consumption-current': '441',
+ 'mainheat-description': 'Boiler and radiators, mains gas', 'lighting-cost-current': '67',
+ 'lodgement-date': '2022-02-23', 'extension-count': '1', 'mainheatc-env-eff': 'Very Poor',
+ 'lmk-key': '46cb404438a6d88ddff8965cab8b3027ec15c32d93e0b6a5f0381a5109b9bb0d', 'wind-turbine-count': '0',
+ 'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '77',
+ 'hot-water-energy-eff': 'Poor', 'low-energy-lighting': '100',
+ 'walls-description': 'Cavity wall, filled cavity',
+ 'hotwater-description': 'From main system, no cylinder thermostat'
+ }
+
+ ending_epc = {
+ 'low-energy-fixed-light-count': '', 'address': '430 Gidlow Lane', 'uprn-source': 'Energy Assessor',
+ 'floor-height': '2.62', 'heating-cost-potential': '803', 'unheated-corridor-length': '',
+ 'hot-water-cost-potential': '292', 'construction-age-band': 'England and Wales: 1950-1966',
+ 'potential-energy-rating': 'C', 'mainheat-energy-eff': 'Very Good', 'windows-env-eff': 'Good',
+ 'lighting-energy-eff': 'Very Good', 'environment-impact-potential': '78',
+ 'glazed-type': 'double glazing installed during or after 2002', 'heating-cost-current': '861',
+ 'address3': '', 'mainheatcont-description': 'Time and temperature zone control',
+ 'sheating-energy-eff': 'N/A', 'property-type': 'House', 'local-authority-label': 'Wigan',
+ 'fixed-lighting-outlets-count': '9', 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural',
+ 'hot-water-cost-current': '434', 'county': '', 'postcode': 'WN6 8RG', 'solar-water-heating-flag': 'N',
+ 'constituency': 'E14001039', 'co2-emissions-potential': '2.0', 'number-heated-rooms': '4',
+ 'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '147',
+ 'local-authority': 'E08000010', 'built-form': 'Mid-Terrace', 'number-open-fireplaces': '0',
+ 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2022-05-11',
+ 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '43', 'address1': '430 Gidlow Lane',
+ 'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Wigan',
+ 'roof-energy-eff': 'Very Poor', 'total-floor-area': '80.0', 'building-reference-number': '10002334112',
+ 'environment-impact-current': '63', 'co2-emissions-current': '3.4',
+ 'roof-description': 'Pitched, no insulation (assumed)', 'floor-energy-eff': 'N/A',
+ 'number-habitable-rooms': '4', 'address2': '', 'hot-water-env-eff': 'Poor', 'posttown': 'WIGAN',
+ 'mainheatc-energy-eff': 'Very Good', 'main-fuel': 'electricity (not community)',
+ 'lighting-env-eff': 'Very Good', 'windows-energy-eff': 'Good', 'floor-env-eff': 'N/A',
+ 'sheating-env-eff': 'N/A', 'lighting-description': 'Low energy lighting in all fixed outlets',
+ 'roof-env-eff': 'Very Poor', 'walls-energy-eff': 'Average', 'photo-supply': '0.0',
+ 'lighting-cost-potential': '67', 'mainheat-env-eff': 'Very Good', 'multi-glaze-proportion': '100',
+ 'main-heating-controls': '', 'lodgement-datetime': '2022-06-06 13:01:20', 'flat-top-storey': '',
+ 'current-energy-rating': 'E', 'secondheat-description': 'Room heaters, mains gas',
+ 'walls-env-eff': 'Average', 'transaction-type': 'ECO assessment', 'uprn': '100011776843',
+ 'current-energy-efficiency': '53', 'energy-consumption-current': '252',
+ 'mainheat-description': 'Air source heat pump, radiators, electric', 'lighting-cost-current': '67',
+ 'lodgement-date': '2022-06-06', 'extension-count': '1', 'mainheatc-env-eff': 'Very Good',
+ 'lmk-key': '672d5947f3d4a55d97255af71651d6127a939418fa66a687070af77e0ba90df2', 'wind-turbine-count': '0',
+ 'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '70',
+ 'hot-water-energy-eff': 'Very Poor', 'low-energy-lighting': '100',
+ 'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system'
+ }
+
+ # differences = []
+ # for k, v in ending_epc.items():
+ # if v != starting_epc[k]:
+ # differences.append(
+ # {
+ # "variable": k,
+ # "starting_value": starting_epc[k],
+ # "ending_value": v
+ # }
+ # )
+ # differences = pd.DataFrame(differences)
+ #
+ # diffs = differences[
+ # differences["variable"].isin(
+ # [
+ # "mainheat-energy-eff",
+ # "mainheatcont-description",
+ # "mainheatc-energy-eff",
+ # "main-fuel",
+ # "mainheat-env-eff",
+ # "mainheat-description",
+ # "hot-water-energy-eff",
+ # "hotwater-description"
+ # ]
+ # )
+ # ]
+
+ cleaning_data = read_dataframe_from_s3_parquet(
+ bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+ )
+
+ cleaned = read_from_s3(
+ s3_file_name="cleaned_epc_data/cleaned.bson",
+ bucket_name="retrofit-data-dev"
+ )
+ cleaned = msgpack.unpackb(cleaned, raw=False)
+
+ photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+ epc = EPCRecord(
+ epc_records={
+ 'original_epc': starting_epc,
+ 'full_sap_epc': {},
+ 'old_data': []
+ },
+ run_mode="newdata",
+ cleaning_data=cleaning_data
+ )
+
+ home = Property(
+ id=0,
+ address="",
+ postcode="",
+ epc_record=epc,
+ already_installed={},
+ non_invasive_recommendations={},
+ )
+ home.in_conservation_area = False
+ home.is_listed = False
+ home.is_heritage = False
+ home.restricted_measures = True
+ home.get_components(
+ cleaned=cleaned,
+ photo_supply_lookup=photo_supply_lookup,
+ floor_area_decile_thresholds=floor_area_decile_thresholds
+ )
+
+ recommender = HeatingRecommender(property_instance=home)
+ recommender.recommend_air_source_heat_pump(phase=0, has_cavity_or_loft_recommendations=False)
+
+ # Patch - for this property, the hot water energy efficiency is very poor. it's not clear why this is,
+ # but we insert this for this test
+ recommender.heating_recommendations[0]["simulation_config"]["hot_water_energy_eff_ending"] = "Very Poor"
+
+ property_recommendations = Recommendations.insert_temp_recommendation_id([recommender.heating_recommendations])
+
+ assert len(recommender.heating_recommendations) == 1
+
+ home.create_base_difference_epc_record(cleaned_lookup=cleaned)
+ home.adjust_difference_record_with_recommendations(
+ property_recommendations, []
+ )
+
+ scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop(
+ columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+ "carbon_ending"]
+ )
+
+ model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
+ model_api.MODEL_PREFIXES = ["sap_change_predictions"]
+
+ predictions_dict = model_api.predict_all(
+ df=scoring_data,
+ bucket="retrofit-data-dev",
+ prediction_buckets={
+ "sap_change_predictions": "retrofit-sap-predictions-dev",
+ }
+ )
+ assert predictions_dict["sap_change_predictions"]["predictions"].values[0] == 52.2
+
+ def test_air_source_heat_pump_gas_boiler_starting_2(self):
+ """
+ This property seems to have miniscule movement in SAP - just 2 poins
+ :return:
+ """
+
+ starting_epc = {
+ 'low-energy-fixed-light-count': '', 'address': '31 Whinney Hill Park', 'uprn-source': 'Energy Assessor',
+ 'floor-height': '2.3', 'heating-cost-potential': '394', 'unheated-corridor-length': '',
+ 'hot-water-cost-potential': '48', 'construction-age-band': 'England and Wales: 1967-1975',
+ 'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average',
+ 'lighting-energy-eff': 'Good', 'environment-impact-potential': '87',
+ 'glazed-type': 'double glazing, unknown install date', 'heating-cost-current': '487', 'address3': '',
+ 'mainheatcont-description': 'Programmer, room thermostat and TRVs', 'sheating-energy-eff': 'N/A',
+ 'property-type': 'Bungalow', 'local-authority-label': 'Calderdale', 'fixed-lighting-outlets-count': '5',
+ 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '86',
+ 'county': '', 'postcode': 'HD6 2PX', 'solar-water-heating-flag': 'N', 'constituency': 'E14000614',
+ 'co2-emissions-potential': '0.8', 'number-heated-rooms': '2',
+ 'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '105',
+ 'local-authority': 'E08000033', 'built-form': 'End-Terrace', 'number-open-fireplaces': '0',
+ 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2021-11-25',
+ 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '56', 'address1': '31 Whinney Hill Park',
+ 'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Calder Valley',
+ 'roof-energy-eff': 'Good', 'total-floor-area': '44.0', 'building-reference-number': '10001772583',
+ 'environment-impact-current': '62', 'co2-emissions-current': '2.5',
+ 'roof-description': 'Pitched, 250 mm loft insulation', 'floor-energy-eff': 'N/A',
+ 'number-habitable-rooms': '2', 'address2': '', 'hot-water-env-eff': 'Good', 'posttown': 'BRIGHOUSE',
+ 'mainheatc-energy-eff': 'Good', 'main-fuel': 'mains gas (not community)', 'lighting-env-eff': 'Good',
+ 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
+ 'lighting-description': 'Low energy lighting in 60% of fixed outlets', 'roof-env-eff': 'Good',
+ 'walls-energy-eff': 'Average', 'photo-supply': '0.0', 'lighting-cost-potential': '40',
+ 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
+ 'lodgement-datetime': '2021-11-25 11:39:35', 'flat-top-storey': '', 'current-energy-rating': 'D',
+ 'secondheat-description': 'Room heaters, electric', 'walls-env-eff': 'Average',
+ 'transaction-type': 'rental', 'uprn': '100051304421', 'current-energy-efficiency': '62',
+ 'energy-consumption-current': '322', 'mainheat-description': 'Boiler and radiators, mains gas',
+ 'lighting-cost-current': '56', 'lodgement-date': '2021-11-25', 'extension-count': '0',
+ 'mainheatc-env-eff': 'Good', 'lmk-key': '077f70657e9c3f1f0ce5392798398398616b159493b2a8ca2338961596631c27',
+ 'wind-turbine-count': '0', 'tenure': 'Rented (social)', 'floor-level': '',
+ 'potential-energy-efficiency': '86', 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '60',
+ 'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system'
+ }
+
+ ending_epc = {
+ 'low-energy-fixed-light-count': '', 'address': '31 Whinney Hill Park',
+ 'uprn-source': 'Energy Assessor', 'floor-height': '2.3', 'heating-cost-potential': '277',
+ 'unheated-corridor-length': '', 'hot-water-cost-potential': '266',
+ 'construction-age-band': 'England and Wales: 1967-1975', 'potential-energy-rating': 'B',
+ 'mainheat-energy-eff': 'Very Good', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Good',
+ 'environment-impact-potential': '90', 'glazed-type': 'double glazing, unknown install date',
+ 'heating-cost-current': '331', 'address3': '',
+ 'mainheatcont-description': 'Programmer and room thermostat', 'sheating-energy-eff': 'N/A',
+ 'property-type': 'Bungalow', 'local-authority-label': 'Calderdale',
+ 'fixed-lighting-outlets-count': '5', 'energy-tariff': 'Single',
+ 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '404', 'county': '',
+ 'postcode': 'HD6 2PX', 'solar-water-heating-flag': 'N', 'constituency': 'E14000614',
+ 'co2-emissions-potential': '0.7', 'number-heated-rooms': '2',
+ 'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '92',
+ 'local-authority': 'E08000033', 'built-form': 'End-Terrace', 'number-open-fireplaces': '0',
+ 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal',
+ 'inspection-date': '2021-11-25', 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '48',
+ 'address1': '31 Whinney Hill Park', 'heat-loss-corridor': '', 'flat-storey-count': '',
+ 'constituency-label': 'Calder Valley', 'roof-energy-eff': 'Good', 'total-floor-area': '44.0',
+ 'building-reference-number': '10001772583', 'environment-impact-current': '68',
+ 'co2-emissions-current': '2.1', 'roof-description': 'Pitched, 250 mm loft insulation',
+ 'floor-energy-eff': 'N/A', 'number-habitable-rooms': '2', 'address2': '',
+ 'hot-water-env-eff': 'Poor', 'posttown': 'BRIGHOUSE', 'mainheatc-energy-eff': 'Average',
+ 'main-fuel': 'electricity (not community)', 'lighting-env-eff': 'Good',
+ 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
+ 'lighting-description': 'Low energy lighting in 60% of fixed outlets', 'roof-env-eff': 'Good',
+ 'walls-energy-eff': 'Average', 'photo-supply': '0.0', 'lighting-cost-potential': '40',
+ 'mainheat-env-eff': 'Very Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
+ 'lodgement-datetime': '2022-03-23 16:06:21', 'flat-top-storey': '', 'current-energy-rating': 'D',
+ 'secondheat-description': 'Room heaters, electric', 'walls-env-eff': 'Average',
+ 'transaction-type': 'rental', 'uprn': '100051304421', 'current-energy-efficiency': '64',
+ 'energy-consumption-current': '283',
+ 'mainheat-description': 'Air source heat pump, radiators, electric',
+ 'lighting-cost-current': '57', 'lodgement-date': '2022-03-23', 'extension-count': '0',
+ 'mainheatc-env-eff': 'Average',
+ 'lmk-key': '6296248141447b53426a40f1c39da17dad5f4786485db55ee38737891111a4d4',
+ 'wind-turbine-count': '0', 'tenure': 'Rented (social)', 'floor-level': '',
+ 'potential-energy-efficiency': '89', 'hot-water-energy-eff': 'Very Poor',
+ 'low-energy-lighting': '60', 'walls-description': 'Cavity wall, filled cavity',
+ 'hotwater-description': 'From main system'
+ }
+
+ # differences = []
+ # for k, v in ending_epc.items():
+ # if v != starting_epc[k]:
+ # differences.append(
+ # {
+ # "variable": k,
+ # "starting_value": starting_epc[k],
+ # "ending_value": v
+ # }
+ # )
+ # differences = pd.DataFrame(differences)
+ #
+ # diffs = differences[
+ # differences["variable"].isin(
+ # [
+ # "mainheat-energy-eff",
+ # "mainheatcont-description",
+ # "mainheatc-energy-eff",
+ # "main-fuel",
+ # "mainheat-env-eff",
+ # "mainheat-description",
+ # "hot-water-energy-eff",
+ # "hotwater-description"
+ # ]
+ # )
+ # ]
+
+ cleaning_data = read_dataframe_from_s3_parquet(
+ bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+ )
+
+ cleaned = read_from_s3(
+ s3_file_name="cleaned_epc_data/cleaned.bson",
+ bucket_name="retrofit-data-dev"
+ )
+ cleaned = msgpack.unpackb(cleaned, raw=False)
+
+ photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+ epc = EPCRecord(
+ epc_records={
+ 'original_epc': starting_epc,
+ 'full_sap_epc': {},
+ 'old_data': []
+ },
+ run_mode="newdata",
+ cleaning_data=cleaning_data
+ )
+
+ home = Property(
+ id=0,
+ address="",
+ postcode="",
+ epc_record=epc,
+ already_installed={},
+ non_invasive_recommendations={},
+ )
+ home.in_conservation_area = False
+ home.is_listed = False
+ home.is_heritage = False
+ home.restricted_measures = True
+ home.get_components(
+ cleaned=cleaned,
+ photo_supply_lookup=photo_supply_lookup,
+ floor_area_decile_thresholds=floor_area_decile_thresholds
+ )
+
+ recommender = HeatingRecommender(property_instance=home)
+ recommender.recommend_air_source_heat_pump(phase=0, has_cavity_or_loft_recommendations=False)
+ property_recommendations = Recommendations.insert_temp_recommendation_id([recommender.heating_recommendations])
+
+ assert len(recommender.heating_recommendations) == 1
+
+ home.create_base_difference_epc_record(cleaned_lookup=cleaned)
+ home.adjust_difference_record_with_recommendations(
+ property_recommendations, []
+ )
+
+ scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop(
+ columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+ "carbon_ending"]
+ )
+
+ model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
+ model_api.MODEL_PREFIXES = ["sap_change_predictions"]
+
+ predictions_dict = model_api.predict_all(
+ df=scoring_data,
+ bucket="retrofit-data-dev",
+ prediction_buckets={
+ "sap_change_predictions": "retrofit-sap-predictions-dev",
+ }
+ )
+ assert predictions_dict["sap_change_predictions"]["predictions"].values[0] == 69.3
+
+ # In actuality with this property, the heating controls get downgraded, so we test a manual patch of this
+ patched_simulation_config = {
+ 'mainheat_energy_eff_ending': "Very Good",
+ 'hot_water_energy_eff_ending': 'Very Poor',
+ 'has_boiler_ending': False,
+ 'has_air_source_heat_pump_ending': True,
+ 'has_electric_ending': True,
+ 'has_mains_gas_ending': False,
+ 'fuel_type_ending': 'electricity',
+ 'trvs_ending': None,
+ "mainheatc_energy_eff_ending": 'Average'
+ }
+
+ # PATCHING
+ property_recommendations_patch = Recommendations.insert_temp_recommendation_id(
+ [recommender.heating_recommendations]
+ )
+ property_recommendations_patch[0][0]["simulation_config"] = patched_simulation_config
+
+ home.create_base_difference_epc_record(cleaned_lookup=cleaned)
+ home.adjust_difference_record_with_recommendations(
+ property_recommendations_patch, []
+ )
+
+ scoring_data_patch = pd.DataFrame(home.recommendations_scoring_data).drop(
+ columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+ "carbon_ending"]
+ )
+
+ model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
+ model_api.MODEL_PREFIXES = ["sap_change_predictions"]
+
+ predictions_dict_patch = model_api.predict_all(
+ df=scoring_data_patch,
+ bucket="retrofit-data-dev",
+ prediction_buckets={
+ "sap_change_predictions": "retrofit-sap-predictions-dev",
+ }
+ )
+ # The error is only 0.3, so the model is working
+ assert predictions_dict_patch["sap_change_predictions"]["predictions"].values[0] == 64.3
+ assert ending_epc["current-energy-efficiency"] == '64'
+
+ def test_air_source_heat_pump_lpg_boiler(self):
+ starting_epc = {
+ 'low-energy-fixed-light-count': '', 'address': 'Holly Lodge, The Drive, Perry',
+ 'uprn-source': 'Energy Assessor', 'floor-height': '2.8', 'heating-cost-potential': '1628',
+ 'unheated-corridor-length': '', 'hot-water-cost-potential': '175',
+ 'construction-age-band': 'England and Wales: 1950-1966', 'potential-energy-rating': 'D',
+ 'mainheat-energy-eff': 'Poor', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Average',
+ 'environment-impact-potential': '70', 'glazed-type': 'double glazing, unknown install date',
+ 'heating-cost-current': '2158', 'address3': 'Perry',
+ 'mainheatcont-description': 'No time or thermostatic control of room temperature',
+ 'sheating-energy-eff': 'N/A', 'property-type': 'Bungalow', 'local-authority-label': 'Huntingdonshire',
+ 'fixed-lighting-outlets-count': '12', 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural',
+ 'hot-water-cost-current': '257', 'county': 'Cambridgeshire', 'postcode': 'PE28 0SX',
+ 'solar-water-heating-flag': 'N', 'constituency': 'E14000757', 'co2-emissions-potential': '3.3',
+ 'number-heated-rooms': '5', 'floor-description': 'Solid, no insulation (assumed)',
+ 'energy-consumption-potential': '128', 'local-authority': 'E07000011', 'built-form': 'Semi-Detached',
+ 'number-open-fireplaces': '0', 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal',
+ 'inspection-date': '2023-08-31', 'mains-gas-flag': 'N', 'co2-emiss-curr-per-floor-area': '51',
+ 'address1': 'Holly Lodge', 'heat-loss-corridor': '', 'flat-storey-count': '',
+ 'constituency-label': 'Huntingdon', 'roof-energy-eff': 'Good', 'total-floor-area': '117.0',
+ 'building-reference-number': '10005199915', 'environment-impact-current': '50',
+ 'co2-emissions-current': '5.9', 'roof-description': 'Pitched, 270 mm loft insulation',
+ 'floor-energy-eff': 'N/A', 'number-habitable-rooms': '5', 'address2': 'The Drive',
+ 'hot-water-env-eff': 'Good', 'posttown': 'HUNTINGDON', 'mainheatc-energy-eff': 'Very Poor',
+ 'main-fuel': 'LPG (not community)', 'lighting-env-eff': 'Average', 'windows-energy-eff': 'Average',
+ 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
+ 'lighting-description': 'Low energy lighting in 33% of fixed outlets', 'roof-env-eff': 'Good',
+ 'walls-energy-eff': 'Average', 'photo-supply': '0.0', 'lighting-cost-potential': '166',
+ 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
+ 'lodgement-datetime': '2023-10-30 13:46:54', 'flat-top-storey': '', 'current-energy-rating': 'F',
+ 'secondheat-description': 'Room heaters, electric', 'walls-env-eff': 'Average',
+ 'transaction-type': 'ECO assessment', 'uprn': '100091200828', 'current-energy-efficiency': '32',
+ 'energy-consumption-current': '243', 'mainheat-description': 'Boiler and radiators, LPG',
+ 'lighting-cost-current': '277', 'lodgement-date': '2023-10-30', 'extension-count': '0',
+ 'mainheatc-env-eff': 'Very Poor',
+ 'lmk-key': 'f1d3bd4b8b50bc9b006231ccb158537c408523b748b3f4ef7e98cd03b144afa5', 'wind-turbine-count': '0',
+ 'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '56',
+ 'hot-water-energy-eff': 'Poor', 'low-energy-lighting': '33',
+ 'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system'
+ }
+
+ ending_epc = {
+ 'low-energy-fixed-light-count': '', 'address': 'Holly Lodge, The Drive, Perry',
+ 'uprn-source': 'Energy Assessor', 'floor-height': '2.8', 'heating-cost-potential': '917',
+ 'unheated-corridor-length': '', 'hot-water-cost-potential': '328',
+ 'construction-age-band': 'England and Wales: 1950-1966', 'potential-energy-rating': 'A',
+ 'mainheat-energy-eff': 'Very Good', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Average',
+ 'environment-impact-potential': '96', 'glazed-type': 'double glazing, unknown install date',
+ 'heating-cost-current': '1098', 'address3': 'Perry',
+ 'mainheatcont-description': 'Programmer, TRVs and bypass', 'sheating-energy-eff': 'N/A',
+ 'property-type': 'Bungalow', 'local-authority-label': 'Huntingdonshire',
+ 'fixed-lighting-outlets-count': '12', 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural',
+ 'hot-water-cost-current': '328', 'county': 'Cambridgeshire', 'postcode': 'PE28 0SX',
+ 'solar-water-heating-flag': 'N', 'constituency': 'E14000757', 'co2-emissions-potential': '0.3',
+ 'number-heated-rooms': '5', 'floor-description': 'Solid, no insulation (assumed)',
+ 'energy-consumption-potential': '16', 'local-authority': 'E07000011', 'built-form': 'Semi-Detached',
+ 'number-open-fireplaces': '0', 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal',
+ 'inspection-date': '2023-10-05', 'mains-gas-flag': 'N', 'co2-emiss-curr-per-floor-area': '6',
+ 'address1': 'Holly Lodge', 'heat-loss-corridor': '', 'flat-storey-count': '',
+ 'constituency-label': 'Huntingdon', 'roof-energy-eff': 'Good', 'total-floor-area': '117.0',
+ 'building-reference-number': '10005199915', 'environment-impact-current': '92',
+ 'co2-emissions-current': '0.7', 'roof-description': 'Pitched, 270 mm loft insulation',
+ 'floor-energy-eff': 'N/A', 'number-habitable-rooms': '5', 'address2': 'The Drive',
+ 'hot-water-env-eff': 'Very Good', 'posttown': 'HUNTINGDON', 'mainheatc-energy-eff': 'Average',
+ 'main-fuel': 'electricity (not community)', 'lighting-env-eff': 'Average', 'windows-energy-eff': 'Average',
+ 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
+ 'lighting-description': 'Low energy lighting in 33% of fixed outlets', 'roof-env-eff': 'Good',
+ 'walls-energy-eff': 'Average', 'photo-supply': '', 'lighting-cost-potential': '166',
+ 'mainheat-env-eff': 'Very Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
+ 'lodgement-datetime': '2023-11-01 16:29:16', 'flat-top-storey': '', 'current-energy-rating': 'A',
+ 'secondheat-description': 'Room heaters, electric', 'walls-env-eff': 'Average',
+ 'transaction-type': 'ECO assessment', 'uprn': '100091200828', 'current-energy-efficiency': '92',
+ 'energy-consumption-current': '37', 'mainheat-description': 'Air source heat pump, radiators, electric',
+ 'lighting-cost-current': '277', 'lodgement-date': '2023-11-01', 'extension-count': '0',
+ 'mainheatc-env-eff': 'Average',
+ 'lmk-key': 'cb7f2838b727907767c8c2a385cd22f722b1e4745463391d910d228e52124515', 'wind-turbine-count': '0',
+ 'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '95',
+ 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '33',
+ 'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system'
+ }
+
+ cleaning_data = read_dataframe_from_s3_parquet(
+ bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+ )
+
+ cleaned = read_from_s3(
+ s3_file_name="cleaned_epc_data/cleaned.bson",
+ bucket_name="retrofit-data-dev"
+ )
+ cleaned = msgpack.unpackb(cleaned, raw=False)
+
+ photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+ epc = EPCRecord(
+ epc_records={
+ 'original_epc': starting_epc,
+ 'full_sap_epc': {},
+ 'old_data': []
+ },
+ run_mode="newdata",
+ cleaning_data=cleaning_data
+ )
+
+ home = Property(
+ id=0,
+ address="",
+ postcode="",
+ epc_record=epc,
+ already_installed={},
+ non_invasive_recommendations={},
+ )
+ home.in_conservation_area = False
+ home.is_listed = False
+ home.is_heritage = False
+ home.restricted_measures = True
+ home.get_components(
+ cleaned=cleaned,
+ photo_supply_lookup=photo_supply_lookup,
+ floor_area_decile_thresholds=floor_area_decile_thresholds
+ )
+
+ recommender = HeatingRecommender(property_instance=home)
+ recommender.recommend_air_source_heat_pump(phase=0, has_cavity_or_loft_recommendations=False)
+ property_recommendations = Recommendations.insert_temp_recommendation_id([recommender.heating_recommendations])
+
+ assert len(recommender.heating_recommendations) == 1
+
+ home.create_base_difference_epc_record(cleaned_lookup=cleaned)
+ home.adjust_difference_record_with_recommendations(
+ property_recommendations, []
+ )
+
+ scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop(
+ columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+ "carbon_ending"]
+ )
+
+ model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
+ model_api.MODEL_PREFIXES = ["sap_change_predictions"]
+
+ predictions_dict = model_api.predict_all(
+ df=scoring_data,
+ bucket="retrofit-data-dev",
+ prediction_buckets={
+ "sap_change_predictions": "retrofit-sap-predictions-dev",
+ }
+ )
+ # We predict a huge uplift but not quite as much as the EPC, due to some distinct differences between our
+ # recommendation and the EPC
+ assert predictions_dict["sap_change_predictions"]["predictions"].values[0] == 81.3
+ assert ending_epc['current-energy-efficiency'] == '92'
+
+ # PATCH
+ # We patch the simulation config, to reflect the ending EPC, to see if we get the ending EPC's config
+ patched_simulation_config = {
+ 'mainheat_energy_eff_ending': "Very Good",
+ 'hot_water_energy_eff_ending': 'Good',
+ 'has_boiler_ending': False,
+ 'has_air_source_heat_pump_ending': True,
+ 'has_electric_ending': True,
+ 'has_lpg_ending': False,
+ 'fuel_type_ending': 'electricity',
+ 'switch_system_ending': 'programmer',
+ 'no_control_ending': None,
+ 'auxiliary_systems_ending': 'bypass',
+ 'trvs_ending': 'trvs',
+ "mainheatc_energy_eff_ending": 'Average'
+ }
+
+ # PATCHING
+ property_recommendations_patch = Recommendations.insert_temp_recommendation_id(
+ [recommender.heating_recommendations]
+ )
+ property_recommendations_patch[0][0]["simulation_config"] = patched_simulation_config
+
+ home.create_base_difference_epc_record(cleaned_lookup=cleaned)
+ home.adjust_difference_record_with_recommendations(
+ property_recommendations_patch, []
+ )
+
+ scoring_data_patch = pd.DataFrame(home.recommendations_scoring_data).drop(
+ columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+ "carbon_ending"]
+ )
+
+ model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
+ model_api.MODEL_PREFIXES = ["sap_change_predictions"]
+
+ predictions_dict_patch = model_api.predict_all(
+ df=scoring_data_patch,
+ bucket="retrofit-data-dev",
+ prediction_buckets={
+ "sap_change_predictions": "retrofit-sap-predictions-dev",
+ }
+ )
+
+ assert predictions_dict_patch["sap_change_predictions"]["predictions"].values[0] == 88.9
+ # We still underpredict but the improvement is notable
+
+ def test_offgrid(self):
+ """
+ We test on a property we've worked with before, where we compare two options
+ a) Upgrading to a boiler
+ b) Upgrading to a heat pump
+ :return:
+ """
+
+ starting_epc = {
+ 'low-energy-fixed-light-count': '', 'address': '6 Beech Road', 'uprn-source': 'Energy Assessor',
+ 'floor-height': '2.4', 'heating-cost-potential': '612', 'unheated-corridor-length': '',
+ 'hot-water-cost-potential': '123', 'construction-age-band': 'England and Wales: 1930-1949',
+ 'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Very Poor', 'windows-env-eff': 'Good',
+ 'lighting-energy-eff': 'Good', 'environment-impact-potential': '87',
+ 'glazed-type': 'double glazing installed during or after 2002', 'heating-cost-current': '2278',
+ 'address3': '', 'mainheatcont-description': 'Appliance thermostats', 'sheating-energy-eff': 'N/A',
+ 'property-type': 'House', 'local-authority-label': 'Dudley', 'fixed-lighting-outlets-count': '9',
+ 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '604',
+ 'county': '', 'postcode': 'DY1 4BP', 'solar-water-heating-flag': 'N', 'constituency': 'E14000671',
+ 'co2-emissions-potential': '1.0', 'number-heated-rooms': '4',
+ 'floor-description': 'Solid, no insulation (assumed)', 'energy-consumption-potential': '93',
+ 'local-authority': 'E08000027', 'built-form': 'End-Terrace', 'number-open-fireplaces': '0',
+ 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2024-03-13',
+ 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '83', 'address1': '6 Beech Road',
+ 'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Dudley North',
+ 'roof-energy-eff': 'Very Poor', 'total-floor-area': '60.0', 'building-reference-number': '10005780080',
+ 'environment-impact-current': '41', 'co2-emissions-current': '5.0',
+ 'roof-description': 'Pitched, 12 mm loft insulation', 'floor-energy-eff': 'N/A',
+ 'number-habitable-rooms': '4', 'address2': '', 'hot-water-env-eff': 'Poor', 'posttown': 'DUDLEY',
+ 'mainheatc-energy-eff': 'Good', 'main-fuel': 'electricity (not community)', 'lighting-env-eff': 'Good',
+ 'windows-energy-eff': 'Good', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
+ 'lighting-description': 'Low energy lighting in 67% of fixed outlets', 'roof-env-eff': 'Very Poor',
+ 'walls-energy-eff': 'Average', 'photo-supply': '0.0', 'lighting-cost-potential': '113',
+ 'mainheat-env-eff': 'Poor', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
+ 'lodgement-datetime': '2024-03-13 11:29:11', 'flat-top-storey': '', 'current-energy-rating': 'F',
+ 'secondheat-description': 'None', 'walls-env-eff': 'Average', 'transaction-type': 'rental',
+ 'uprn': '90055152', 'current-energy-efficiency': '32', 'energy-consumption-current': '491',
+ 'mainheat-description': 'Room heaters, electric', 'lighting-cost-current': '113',
+ 'lodgement-date': '2024-03-13', 'extension-count': '1', 'mainheatc-env-eff': 'Good',
+ 'lmk-key': '78ddf851b660e599a0894924d0e6b503980f5e0ad1aa711f8411718dc2989c44', 'wind-turbine-count': '0',
+ 'tenure': 'Rented (social)', 'floor-level': '', 'potential-energy-efficiency': '87',
+ 'hot-water-energy-eff': 'Very Poor', 'low-energy-lighting': '67',
+ 'walls-description': 'Cavity wall, filled cavity',
+ 'hotwater-description': 'Electric immersion, standard tariff'
+ }
+
+ cleaning_data = read_dataframe_from_s3_parquet(
+ bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+ )
+
+ cleaned = read_from_s3(
+ s3_file_name="cleaned_epc_data/cleaned.bson",
+ bucket_name="retrofit-data-dev"
+ )
+ cleaned = msgpack.unpackb(cleaned, raw=False)
+
+ photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+ epc = EPCRecord(
+ epc_records={
+ 'original_epc': starting_epc,
+ 'full_sap_epc': {},
+ 'old_data': []
+ },
+ run_mode="newdata",
+ cleaning_data=cleaning_data
+ )
+
+ home = Property(
+ id=0,
+ address="",
+ postcode="",
+ epc_record=epc,
+ already_installed={},
+ non_invasive_recommendations={},
+ )
+ home.in_conservation_area = False
+ home.is_listed = False
+ home.is_heritage = False
+ home.restricted_measures = True
+ home.get_components(
+ cleaned=cleaned,
+ photo_supply_lookup=photo_supply_lookup,
+ floor_area_decile_thresholds=floor_area_decile_thresholds
+ )
+
+ recommender = HeatingRecommender(property_instance=home)
+ recommender.recommend_air_source_heat_pump(phase=0, has_cavity_or_loft_recommendations=False)
+ recommender.recommend_boiler_upgrades(phase=0, system_change=True, exising_room_heaters=False)
+
+ assert len(recommender.heating_recommendations) == 3
+
+ property_recommendations = Recommendations.insert_temp_recommendation_id([recommender.heating_recommendations])
+
+ home.create_base_difference_epc_record(cleaned_lookup=cleaned)
+ home.adjust_difference_record_with_recommendations(
+ property_recommendations, []
+ )
+
+ scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop(
+ columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+ "carbon_ending"]
+ )
+
+ model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
+ model_api.MODEL_PREFIXES = ["sap_change_predictions"]
+
+ predictions_dict = model_api.predict_all(
+ df=scoring_data,
+ bucket="retrofit-data-dev",
+ prediction_buckets={
+ "sap_change_predictions": "retrofit-sap-predictions-dev",
+ }
+ )
+
+ # The ASHP isn't better under SAP, compared to a gas boiler with good heat controls
+ assert predictions_dict["sap_change_predictions"]["predictions"].tolist() == [66.9, 65.5, 65.9]
diff --git a/recommendations/tests/test_solar_pv_recommendations.py b/recommendations/tests/test_solar_pv_recommendations.py
index 5481cb17..fbbfe3a1 100644
--- a/recommendations/tests/test_solar_pv_recommendations.py
+++ b/recommendations/tests/test_solar_pv_recommendations.py
@@ -2,6 +2,13 @@ import pytest
from recommendations.SolarPvRecommendations import SolarPvRecommendations
from backend.Property import Property
from etl.epc.Record import EPCRecord
+import pandas as pd
+from datetime import datetime
+from utils.s3 import read_dataframe_from_s3_parquet, read_from_s3
+from etl.solar.SolarPhotoSupply import SolarPhotoSupply
+from recommendations.Recommendations import Recommendations
+from backend.ml_models.api import ModelApi
+import msgpack
class TestSolarPvRecommendations:
@@ -82,3 +89,321 @@ class TestSolarPvRecommendations:
'photo_supply': 4000
}
]
+
+ def test_model(self):
+ """
+ This function tests the recommendation engine, in conjunction with the model
+ :return:
+ """
+
+ starting_epc = {
+ 'low-energy-fixed-light-count': '', 'address': '27 Cromwell Street', 'uprn-source': 'Energy Assessor',
+ 'floor-height': '2.5', 'heating-cost-potential': '443', 'unheated-corridor-length': '',
+ 'hot-water-cost-potential': '53', 'construction-age-band': 'England and Wales: before 1900',
+ 'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average',
+ 'lighting-energy-eff': 'Very Poor', 'environment-impact-potential': '85',
+ 'glazed-type': 'double glazing installed before 2002', 'heating-cost-current': '904', 'address3': '',
+ 'mainheatcont-description': 'Programmer, room thermostat and TRVs', 'sheating-energy-eff': 'N/A',
+ 'property-type': 'House', 'local-authority-label': 'West Lindsey', 'fixed-lighting-outlets-count': '10',
+ 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '79',
+ 'county': 'Lincolnshire', 'postcode': 'DN21 1DH', 'solar-water-heating-flag': 'N',
+ 'constituency': 'E14000707', 'co2-emissions-potential': '1.5', 'number-heated-rooms': '5',
+ 'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '92',
+ 'local-authority': 'E07000142', 'built-form': 'Mid-Terrace', 'number-open-fireplaces': '0',
+ 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2021-11-17',
+ 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '61', 'address1': '27 Cromwell Street',
+ 'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Gainsborough',
+ 'roof-energy-eff': 'Very Poor', 'total-floor-area': '89.0', 'building-reference-number': '10001989430',
+ 'environment-impact-current': '47', 'co2-emissions-current': '5.4',
+ 'roof-description': 'Pitched, no insulation (assumed)', 'floor-energy-eff': 'N/A',
+ 'number-habitable-rooms': '5', 'address2': '', 'hot-water-env-eff': 'Good', 'posttown': 'GAINSBOROUGH',
+ 'mainheatc-energy-eff': 'Good', 'main-fuel': 'mains gas (not community)', 'lighting-env-eff': 'Very Poor',
+ 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
+ 'lighting-description': 'No low energy lighting', 'roof-env-eff': 'Very Poor',
+ 'walls-energy-eff': 'Very Poor', 'photo-supply': '0.0', 'lighting-cost-potential': '72',
+ 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
+ 'lodgement-datetime': '2021-12-01 10:12:23', 'flat-top-storey': '', 'current-energy-rating': 'E',
+ 'secondheat-description': 'Room heaters, mains gas', 'walls-env-eff': 'Very Poor',
+ 'transaction-type': 'ECO assessment', 'uprn': '100030949912', 'current-energy-efficiency': '54',
+ 'energy-consumption-current': '346', 'mainheat-description': 'Boiler and radiators, mains gas',
+ 'lighting-cost-current': '144', 'lodgement-date': '2021-12-01', 'extension-count': '2',
+ 'mainheatc-env-eff': 'Good', 'lmk-key': '3ec5533af02ec78361c1f9bea8dd2e878c2c6fa6cf59e5cc505c3eeb038e0f91',
+ 'wind-turbine-count': '0', 'tenure': 'Owner-occupied', 'floor-level': '',
+ 'potential-energy-efficiency': '86', 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '0',
+ 'walls-description': 'Solid brick, as built, no insulation (assumed)',
+ 'hotwater-description': 'From main system'
+ }
+
+ ending_epc = {
+ 'low-energy-fixed-light-count': '', 'address': '27 Cromwell Street', 'uprn-source': 'Energy Assessor',
+ 'floor-height': '2.5', 'heating-cost-potential': '443', 'unheated-corridor-length': '',
+ 'hot-water-cost-potential': '53', 'construction-age-band': 'England and Wales: before 1900',
+ 'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average',
+ 'lighting-energy-eff': 'Very Poor', 'environment-impact-potential': '86',
+ 'glazed-type': 'double glazing installed before 2002', 'heating-cost-current': '904', 'address3': '',
+ 'mainheatcont-description': 'Programmer, room thermostat and TRVs', 'sheating-energy-eff': 'N/A',
+ 'property-type': 'House', 'local-authority-label': 'West Lindsey', 'fixed-lighting-outlets-count': '10',
+ 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '79',
+ 'county': 'Lincolnshire', 'postcode': 'DN21 1DH', 'solar-water-heating-flag': 'N',
+ 'constituency': 'E14000707', 'co2-emissions-potential': '1.4', 'number-heated-rooms': '5',
+ 'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '84',
+ 'local-authority': 'E07000142', 'built-form': 'Mid-Terrace', 'number-open-fireplaces': '0',
+ 'windows-description': 'Fully double glazed', 'glazed-area': 'Normal', 'inspection-date': '2021-12-21',
+ 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '49', 'address1': '27 Cromwell Street',
+ 'heat-loss-corridor': '', 'flat-storey-count': '', 'constituency-label': 'Gainsborough',
+ 'roof-energy-eff': 'Very Poor', 'total-floor-area': '89.0', 'building-reference-number': '10001989430',
+ 'environment-impact-current': '55', 'co2-emissions-current': '4.4',
+ 'roof-description': 'Pitched, no insulation (assumed)', 'floor-energy-eff': 'N/A',
+ 'number-habitable-rooms': '5', 'address2': '', 'hot-water-env-eff': 'Good', 'posttown': 'GAINSBOROUGH',
+ 'mainheatc-energy-eff': 'Good', 'main-fuel': 'mains gas (not community)', 'lighting-env-eff': 'Very Poor',
+ 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
+ 'lighting-description': 'No low energy lighting', 'roof-env-eff': 'Very Poor',
+ 'walls-energy-eff': 'Very Poor', 'photo-supply': '50.0', 'lighting-cost-potential': '72',
+ 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
+ 'lodgement-datetime': '2021-12-21 17:33:09', 'flat-top-storey': '', 'current-energy-rating': 'D',
+ 'secondheat-description': 'Room heaters, mains gas', 'walls-env-eff': 'Very Poor',
+ 'transaction-type': 'ECO assessment', 'uprn': '100030949912', 'current-energy-efficiency': '65',
+ 'energy-consumption-current': '277', 'mainheat-description': 'Boiler and radiators, mains gas',
+ 'lighting-cost-current': '144', 'lodgement-date': '2021-12-21', 'extension-count': '2',
+ 'mainheatc-env-eff': 'Good', 'lmk-key': 'b0b19583c59afbc69db12f4d6c98cd8837e80da3214d577c426eb3e672d424fc',
+ 'wind-turbine-count': '0', 'tenure': 'Owner-occupied', 'floor-level': '',
+ 'potential-energy-efficiency': '88', 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '0',
+ 'walls-description': 'Solid brick, as built, no insulation (assumed)',
+ 'hotwater-description': 'From main system'
+ }
+
+ cleaning_data = read_dataframe_from_s3_parquet(
+ bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+ )
+
+ cleaned = read_from_s3(
+ s3_file_name="cleaned_epc_data/cleaned.bson",
+ bucket_name="retrofit-data-dev"
+ )
+ cleaned = msgpack.unpackb(cleaned, raw=False)
+
+ photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+ epc = EPCRecord(
+ epc_records={
+ 'original_epc': starting_epc,
+ 'full_sap_epc': {},
+ 'old_data': []
+ },
+ run_mode="newdata",
+ cleaning_data=cleaning_data
+ )
+
+ home = Property(
+ id=0,
+ address="",
+ postcode="",
+ epc_record=epc,
+ already_installed={},
+ non_invasive_recommendations={},
+ )
+ home.in_conservation_area = False
+ home.is_listed = False
+ home.is_heritage = False
+ home.restricted_measures = True
+ home.get_components(
+ cleaned=cleaned,
+ photo_supply_lookup=photo_supply_lookup,
+ floor_area_decile_thresholds=floor_area_decile_thresholds
+ )
+
+ recommender = SolarPvRecommendations(property_instance=home)
+ recommender.recommend(phase=0)
+
+ coverage_50_percent = [x for x in recommender.recommendation if x["photo_supply"] == 50]
+ assert len(coverage_50_percent) == 2
+
+ property_recommendations = Recommendations.insert_temp_recommendation_id([coverage_50_percent])
+
+ home.create_base_difference_epc_record(cleaned_lookup=cleaned)
+ home.adjust_difference_record_with_recommendations(
+ property_recommendations, []
+ )
+
+ scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop(
+ columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+ "carbon_ending"]
+ )
+
+ model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
+ model_api.MODEL_PREFIXES = ["sap_change_predictions"]
+
+ predictions_dict = model_api.predict_all(
+ df=scoring_data,
+ bucket="retrofit-data-dev",
+ prediction_buckets={
+ "sap_change_predictions": "retrofit-sap-predictions-dev",
+ }
+ )
+
+ assert predictions_dict["sap_change_predictions"]["predictions"].tolist() == [65.9, 65.9]
+ assert ending_epc["current-energy-efficiency"] == '65'
+
+ def test_model2(self):
+ data[["uprn", "sap_ending"]]
+ #
+
+ searcher = SearchEpc(
+ address1="",
+ postcode="",
+ auth_token="a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA=",
+ os_api_key="",
+ full_address="",
+ uprn=100030952942,
+ )
+ searcher.find_property(False)
+
+ ending_epc = {
+ 'low-energy-fixed-light-count': '', 'address': '6 Kenmare Crescent',
+ 'uprn-source': 'Energy Assessor', 'floor-height': '2.49', 'heating-cost-potential': '464',
+ 'unheated-corridor-length': '', 'hot-water-cost-potential': '46',
+ 'construction-age-band': 'England and Wales: 1967-1975', 'potential-energy-rating': 'B',
+ 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average', 'lighting-energy-eff': 'Very Good',
+ 'environment-impact-potential': '91', 'glazed-type': 'not defined', 'heating-cost-current': '535',
+ 'address3': '', 'mainheatcont-description': 'Programmer, room thermostat and TRVs',
+ 'sheating-energy-eff': 'N/A', 'property-type': 'Bungalow',
+ 'local-authority-label': 'West Lindsey', 'fixed-lighting-outlets-count': '9',
+ 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '69',
+ 'county': 'Lincolnshire', 'postcode': 'DN21 1PR', 'solar-water-heating-flag': 'N',
+ 'constituency': 'E14000707', 'co2-emissions-potential': '0.7', 'number-heated-rooms': '3',
+ 'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '56',
+ 'local-authority': 'E07000142', 'built-form': 'Semi-Detached', 'number-open-fireplaces': '0',
+ 'windows-description': 'Fully double glazed', 'glazed-area': 'Much More Than Typical',
+ 'inspection-date': '2022-08-24', 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '18',
+ 'address1': '6 Kenmare Crescent', 'heat-loss-corridor': '', 'flat-storey-count': '',
+ 'constituency-label': 'Gainsborough', 'roof-energy-eff': 'Very Good', 'total-floor-area': '66.0',
+ 'building-reference-number': '10002845316', 'environment-impact-current': '85',
+ 'co2-emissions-current': '1.2', 'roof-description': 'Pitched, 300 mm loft insulation',
+ 'floor-energy-eff': 'N/A', 'number-habitable-rooms': '3', 'address2': '',
+ 'hot-water-env-eff': 'Good', 'posttown': 'GAINSBOROUGH', 'mainheatc-energy-eff': 'Good',
+ 'main-fuel': 'mains gas (not community)', 'lighting-env-eff': 'Very Good',
+ 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A', 'sheating-env-eff': 'N/A',
+ 'lighting-description': 'Low energy lighting in all fixed outlets', 'roof-env-eff': 'Very Good',
+ 'walls-energy-eff': 'Average', 'photo-supply': '40.0', 'lighting-cost-potential': '65',
+ 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100', 'main-heating-controls': '',
+ 'lodgement-datetime': '2022-08-24 15:39:42', 'flat-top-storey': '', 'current-energy-rating': 'B',
+ 'secondheat-description': 'Room heaters, electric', 'walls-env-eff': 'Average',
+ 'transaction-type': 'ECO assessment', 'uprn': '100030952942', 'current-energy-efficiency': '87',
+ 'energy-consumption-current': '100', 'mainheat-description': 'Boiler and radiators, mains gas',
+ 'lighting-cost-current': '65', 'lodgement-date': '2022-08-24', 'extension-count': '0',
+ 'mainheatc-env-eff': 'Good',
+ 'lmk-key': 'e20be883431b1fed15db7fa1f52634fb7655d2b80c2fdad37df779f93ec4dafd',
+ 'wind-turbine-count': '0', 'tenure': 'Owner-occupied', 'floor-level': '',
+ 'potential-energy-efficiency': '91', 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '100',
+ 'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system'
+ }
+ starting_epc = {
+ 'low-energy-fixed-light-count': '', 'address': '6 Kenmare Crescent', 'uprn-source': 'Energy Assessor',
+ 'floor-height': '2.49', 'heating-cost-potential': '464', 'unheated-corridor-length': '',
+ 'hot-water-cost-potential': '46', 'construction-age-band': 'England and Wales: 1967-1975',
+ 'potential-energy-rating': 'B', 'mainheat-energy-eff': 'Good', 'windows-env-eff': 'Average',
+ 'lighting-energy-eff': 'Very Good', 'environment-impact-potential': '85', 'glazed-type': 'not defined',
+ 'heating-cost-current': '535', 'address3': '',
+ 'mainheatcont-description': 'Programmer, room thermostat and TRVs', 'sheating-energy-eff': 'N/A',
+ 'property-type': 'Bungalow', 'local-authority-label': 'West Lindsey', 'fixed-lighting-outlets-count': '9',
+ 'energy-tariff': 'Single', 'mechanical-ventilation': 'natural', 'hot-water-cost-current': '69',
+ 'county': 'Lincolnshire', 'postcode': 'DN21 1PR', 'solar-water-heating-flag': 'N',
+ 'constituency': 'E14000707', 'co2-emissions-potential': '1.2', 'number-heated-rooms': '3',
+ 'floor-description': 'Suspended, no insulation (assumed)', 'energy-consumption-potential': '102',
+ 'local-authority': 'E07000142', 'built-form': 'Semi-Detached', 'number-open-fireplaces': '0',
+ 'windows-description': 'Fully double glazed', 'glazed-area': 'Much More Than Typical',
+ 'inspection-date': '2022-05-31', 'mains-gas-flag': 'Y', 'co2-emiss-curr-per-floor-area': '40',
+ 'address1': '6 Kenmare Crescent', 'heat-loss-corridor': '', 'flat-storey-count': '',
+ 'constituency-label': 'Gainsborough', 'roof-energy-eff': 'Very Good', 'total-floor-area': '66.0',
+ 'building-reference-number': '10002845316', 'environment-impact-current': '68',
+ 'co2-emissions-current': '2.6', 'roof-description': 'Pitched, 300 mm loft insulation',
+ 'floor-energy-eff': 'N/A', 'number-habitable-rooms': '3', 'address2': '', 'hot-water-env-eff': 'Good',
+ 'posttown': 'GAINSBOROUGH', 'mainheatc-energy-eff': 'Good', 'main-fuel': 'mains gas (not community)',
+ 'lighting-env-eff': 'Very Good', 'windows-energy-eff': 'Average', 'floor-env-eff': 'N/A',
+ 'sheating-env-eff': 'N/A', 'lighting-description': 'Low energy lighting in all fixed outlets',
+ 'roof-env-eff': 'Very Good', 'walls-energy-eff': 'Average', 'photo-supply': '0.0',
+ 'lighting-cost-potential': '65', 'mainheat-env-eff': 'Good', 'multi-glaze-proportion': '100',
+ 'main-heating-controls': '', 'lodgement-datetime': '2022-06-15 08:38:02', 'flat-top-storey': '',
+ 'current-energy-rating': 'D', 'secondheat-description': 'Room heaters, electric',
+ 'walls-env-eff': 'Average', 'transaction-type': 'ECO assessment', 'uprn': '100030952942',
+ 'current-energy-efficiency': '68', 'energy-consumption-current': '227',
+ 'mainheat-description': 'Boiler and radiators, mains gas', 'lighting-cost-current': '65',
+ 'lodgement-date': '2022-06-15', 'extension-count': '0', 'mainheatc-env-eff': 'Good',
+ 'lmk-key': 'ce181970b7077cb9b4626242bfb010b30a0e48541b5f22427e81f1adbeeec4f2', 'wind-turbine-count': '0',
+ 'tenure': 'Owner-occupied', 'floor-level': '', 'potential-energy-efficiency': '85',
+ 'hot-water-energy-eff': 'Good', 'low-energy-lighting': '100',
+ 'walls-description': 'Cavity wall, filled cavity', 'hotwater-description': 'From main system'
+ }
+
+ cleaning_data = read_dataframe_from_s3_parquet(
+ bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
+ )
+
+ cleaned = read_from_s3(
+ s3_file_name="cleaned_epc_data/cleaned.bson",
+ bucket_name="retrofit-data-dev"
+ )
+ cleaned = msgpack.unpackb(cleaned, raw=False)
+
+ photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
+
+ epc = EPCRecord(
+ epc_records={
+ 'original_epc': starting_epc,
+ 'full_sap_epc': {},
+ 'old_data': []
+ },
+ run_mode="newdata",
+ cleaning_data=cleaning_data
+ )
+
+ home = Property(
+ id=0,
+ address="",
+ postcode="",
+ epc_record=epc,
+ already_installed={},
+ non_invasive_recommendations={},
+ )
+ home.in_conservation_area = False
+ home.is_listed = False
+ home.is_heritage = False
+ home.restricted_measures = True
+ home.get_components(
+ cleaned=cleaned,
+ photo_supply_lookup=photo_supply_lookup,
+ floor_area_decile_thresholds=floor_area_decile_thresholds
+ )
+
+ recommender = SolarPvRecommendations(property_instance=home)
+ recommender.recommend(phase=0)
+
+ coverage_40_percent = [x for x in recommender.recommendation if x["photo_supply"] == 40]
+ assert len(coverage_40_percent) == 2
+
+ property_recommendations = Recommendations.insert_temp_recommendation_id([coverage_40_percent])
+
+ home.create_base_difference_epc_record(cleaned_lookup=cleaned)
+ home.adjust_difference_record_with_recommendations(
+ property_recommendations, []
+ )
+
+ scoring_data = pd.DataFrame(home.recommendations_scoring_data).drop(
+ columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
+ "carbon_ending"]
+ )
+
+ model_api = ModelApi(portfolio_id="ashp-test", timestamp=datetime.now().isoformat())
+ model_api.MODEL_PREFIXES = ["sap_change_predictions"]
+
+ predictions_dict = model_api.predict_all(
+ df=scoring_data,
+ bucket="retrofit-data-dev",
+ prediction_buckets={
+ "sap_change_predictions": "retrofit-sap-predictions-dev",
+ }
+ )
+
+ assert predictions_dict["sap_change_predictions"]["predictions"].tolist() == [87.1, 87.1]
+ assert ending_epc["current-energy-efficiency"] == '87'
+ assert starting_epc["current-energy-efficiency"] == '68'
diff --git a/utils/s3.py b/utils/s3.py
index cb55094a..fd5992ce 100644
--- a/utils/s3.py
+++ b/utils/s3.py
@@ -1,9 +1,10 @@
import pickle
import boto3
-from io import BytesIO, StringIO
-from botocore.exceptions import NoCredentialsError, PartialCredentialsError
+import csv
import pandas as pd
+from io import BytesIO, StringIO
from utils.logger import setup_logger
+from botocore.exceptions import NoCredentialsError, PartialCredentialsError
logger = setup_logger()
@@ -184,7 +185,7 @@ def read_pickle_from_s3(bucket_name, s3_file_name):
logger.errpr("Incomplete credentials provided.")
return None
except Exception as e:
- logger.errpr(f'Failed to download data from {bucket_name}/{s3_file_name}: {str(e)}')
+ logger.error(f'Failed to download data from {bucket_name}/{s3_file_name}: {str(e)}')
return None
# Deserialize data from pickle format
@@ -224,3 +225,22 @@ def read_excel_from_s3(bucket_name, file_key, header_row):
df.reset_index(drop=True, inplace=True)
return df
+
+
+def read_csv_from_s3(bucket_name, filepath):
+ s3 = boto3.client('s3')
+
+ # Get the object from s3
+ s3_object = s3.get_object(Bucket=bucket_name, Key=filepath)
+
+ # Read the CSV body from the s3 object
+ body = s3_object['Body'].read()
+
+ # Use StringIO to create a file-like object from the string
+ csv_data = StringIO(body.decode('utf-8'))
+
+ # Use csv library to read it into a list of dictionaries
+ reader = csv.DictReader(csv_data)
+ data = list(reader)
+
+ return data