Merge pull request #336 from Hestia-Homes/main

Dev deployment
This commit is contained in:
KhalimCK 2024-09-09 12:44:58 +01:00 committed by GitHub
commit 85951e62fb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
66 changed files with 11044 additions and 1773 deletions

View file

@ -2,13 +2,13 @@ import os
import ast
from itertools import groupby
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from etl.epc.Dataset import TrainingDataset
from etl.epc.Record import EPCRecord
from etl.epc.settings import LATEST_FIELD, MANDATORY_FIXED_FEATURES
from etl.epc_clean.epc_attributes.all_cleaners import all_cleaner_map
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
from utils.logger import setup_logger
from utils.s3 import read_dataframe_from_s3_parquet
from etl.epc.settings import DATA_ANOMALY_MATCHES
@ -17,10 +17,11 @@ from recommendations.recommendation_utils import (
estimate_perimeter,
get_wall_type,
estimate_external_wall_area,
esimtate_pitched_roof_area,
estimate_windows,
)
from backend.ml_models.AnnualBillSavings import AnnualBillSavings
from backend.app.utils import sap_to_epc
import backend.app.assumptions as assumptions
ENVIRONMENT = os.environ.get("ENVIRONMENT", "dev")
DATA_BUCKET = os.environ.get(
@ -76,18 +77,22 @@ class Property:
already_installed=None,
non_invasive_recommendations=None,
measures=None,
energy_assessment=None,
is_new=True,
**kwargs
):
self.epc_record = epc_record
self.id = id
self.is_new = is_new
self.address = address
self.postcode = postcode
self.data = {
k.replace("_", "-"): v for k, v in epc_record.get("prepared_epc").items()
}
self.old_data = epc_record.get("old_data")
self.property_dimensions = None
# This is a list of measures that have already been installed in the property, typically found as a result
@ -158,17 +163,18 @@ class Property:
self.floor_height = epc_record.prepared_epc.get("floor_height")
self.insulation_wall_area = None
self.floor_area = epc_record.prepared_epc.get("total_floor_area")
self.pitched_roof_area = None
self.roof_area = None
self.insulation_floor_area = None
self.number_lighting_outlets = epc_record.prepared_epc.get(
"fixed_lighting_outlets_count"
)
self.floor_level = None
self.number_of_windows = None
self.windows_area = None
self.solar_pv_percentage = None
self.current_adjusted_energy = None
self.expected_adjusted_energy = None
self.current_energy_consumption = None
self.current_energy_consumption_heating_hotwater = None
self.current_energy_bill = None
self.expected_energy_bill = None
@ -177,7 +183,14 @@ class Property:
self.recommendations_scoring_data = []
self.simulation_epcs = {}
self.updated_simulation_epcs = []
# This additional condition data should change how we pass kwargs to this. We should no longer need to pass
# kwargs to this class, but instead, we should pass the energy assessment condition data
self.energy_assessment_condition_data = energy_assessment["condition"]
self.energy_assessment_is_newer = energy_assessment["energy_assessment_is_newer"]
# TODO: We keep this but only temporarily until we add bathrooms, bedrooms, building id to the condition data
self.parse_kwargs(kwargs)
@classmethod
@ -188,6 +201,10 @@ class Property:
:param kwargs:
:return:
"""
# Note - none of this data is contained in an energy asssessment, but we should consider how this is done
# as we collect more data from the energy assessment
n_bathrooms = kwargs.get("n_bathrooms", None)
if n_bathrooms not in [None, ""]:
# We add on a small value to ensure that the number of bathrooms is rounded up, in case the value is 0.5
@ -197,17 +214,32 @@ class Property:
if n_bedrooms not in [None, ""]:
n_bedrooms = int(round(float(n_bedrooms) + 1e-5))
number_of_floors = kwargs.get("number_of_floors", None)
if number_of_floors not in [None, ""]:
number_of_floors = int(round(float(number_of_floors) + 1e-5))
insulation_floor_area = kwargs.get("insulation_floor_area", None)
if insulation_floor_area not in [None, ""]:
insulation_floor_area = float(insulation_floor_area)
insulation_wall_area = kwargs.get("insulation_wall_area", None)
if insulation_wall_area not in [None, ""]:
insulation_wall_area = float(insulation_wall_area)
return {
"n_bathrooms": n_bathrooms,
"n_bedrooms": n_bedrooms,
"number_of_floors": number_of_floors,
"insulation_floor_area": insulation_floor_area,
"insulation_wall_area": insulation_wall_area,
"building_id": kwargs.get("building_id", None),
}
def parse_kwargs(self, kwargs):
# We extract the elements from kwargs that we recognise. Anything additional is ignored
self.n_bathrooms = kwargs.get("n_bathrooms", None)
self.n_bedrooms = kwargs.get("n_bedrooms", None)
self.building_id = kwargs.get("building_id", None)
for arg, val in kwargs.items():
if val is not None:
setattr(self, arg, val)
def create_base_difference_epc_record(self, cleaned_lookup: dict):
"""
@ -333,7 +365,7 @@ class Property:
for rec in property_recommendations_by_phase:
# We simulate the impact of the recommendation at this current phase, and all of the prior phases
if rec["type"] == "mechanical_ventilation":
if rec["type"] in ["mechanical_ventilation", "trickle_vents", "draught_proofing"]:
continue
scoring_dict = self.create_recommendation_scoring_data(
@ -345,65 +377,88 @@ class Property:
)
self.recommendations_scoring_data.append(scoring_dict)
# We also use the representative recommendations to produce transformed EPCs
represenative_recs_to_this_phase = [
r for r in property_representative_recommendations
if r["phase"] <= phase
]
simulation_epc = self.epc_record.prepared_epc.copy()
# Insert static values
simulation_epc["lodgement_date"] = simulation_lodgment_date
simulation_epc = {k.replace("_", "-"): v for k, v in simulation_epc.items()}
# TODO: This is placeholder, but it's to handle the case of having both internal and external wall
# insulation as options. This will cause the process below to fall over, so we take just
# external wall insulation in epc_transformations, if we have both
types = [
x["type"] for x in represenative_recs_to_this_phase
]
if "external_wall_insulation" in types and "internal_wall_insulation" in types:
types = [x["type"] for x in previous_phase_representatives]
if "external_wall_insulation" in types and "internal_wall_insulation" in types:
raise Exception("We shouldn't have this in the representative recommendations")
# We include previous phases + the recommendation itself in the EPC transformations
epc_transformations = [
x["description_simulation"] for x in represenative_recs_to_this_phase if
x["type"] != "internal_wall_insulation"
x["description_simulation"] for x in previous_phase_representatives + [rec]
]
else:
epc_transformations = [x["description_simulation"] for x in represenative_recs_to_this_phase]
# It is possible that we could have two simulations applied to the same descriptions
# We extract these out
phase_epc_transformation = {}
for config in epc_transformations:
for k, v in config.items():
if k in phase_epc_transformation:
if "-energy-eff" in k:
# We take the highest value
if phase_epc_transformation[k] == "Very Good":
# It is possible that we could have two simulations applied to the same descriptions
# We extract these out
phase_epc_transformation = {}
for config in epc_transformations:
for k, v in config.items():
if k in phase_epc_transformation:
if "-energy-eff" in k:
# We take the highest value
if phase_epc_transformation[k] == "Very Good":
continue
elif phase_epc_transformation[k] == "Good":
if v == "Very Good":
phase_epc_transformation[k] = v
elif phase_epc_transformation[k] == "Average":
if v in ["Good", "Very Good"]:
phase_epc_transformation[k] = v
elif phase_epc_transformation[k] == "Poor":
if v in ["Average", "Good", "Very Good"]:
phase_epc_transformation[k] = v
else:
phase_epc_transformation[k] = v
continue
elif phase_epc_transformation[k] == "Good":
if v == "Very Good":
phase_epc_transformation[k] = v
elif phase_epc_transformation[k] == "Average":
if v in ["Good", "Very Good"]:
phase_epc_transformation[k] = v
elif phase_epc_transformation[k] == "Poor":
if v in ["Average", "Good", "Very Good"]:
phase_epc_transformation[k] = v
else:
phase_epc_transformation[k] = v
continue
if phase_epc_transformation[k] == v:
continue
if phase_epc_transformation[k] == v:
continue
raise NotImplementedError(
"Already have this key in the phase_epc_transformation - implement me"
)
phase_epc_transformation[k] = v
simulation_epc.update(phase_epc_transformation)
self.simulation_epcs[rec["recommendation_id"]] = simulation_epc
raise NotImplementedError(
"Already have this key in the phase_epc_transformation - implement me")
phase_epc_transformation[k] = v
def update_simulation_epcs(self, impact_summary):
"""
This method will insert the high level measures, such as SAP, heat demand, carbon, etc
:return:
"""
if self.simulation_epcs is None:
raise ValueError("Simulation EPCs have not been created")
simulation_epc = self.epc_record.prepared_epc.copy()
# Insert static values
simulation_epc["lodgement_date"] = simulation_lodgment_date
rec_ids = sorted(list(self.simulation_epcs.keys()))
updated_simulation_epcs = []
for rec_id in rec_ids:
sim_epc = self.simulation_epcs[rec_id].copy()
rec_impact = [x for x in impact_summary if x["recommendation_id"] == rec_id][0]
# We update all of the features that should have an impact on the kwh model
# Replace the understores with hyphens
simulation_epc = {k.replace("_", "-"): v for k, v in simulation_epc.items()}
simulation_epc.update(phase_epc_transformation)
self.simulation_epcs[phase] = simulation_epc
sim_epc.update(
{
# CO₂ emissions per square metre floor area per year in kg/m². Since CO₂ emissions are in tonnes
# per year, we multiply by 1000 to get kg/m²
"co2-emiss-curr-per-floor-area": round(
1000 * (rec_impact["carbon"] / self.data["total-floor-area"])
),
"co2-emissions-current": rec_impact["carbon"],
"current-energy-rating": sap_to_epc(rec_impact["sap"]),
"current-energy-efficiency": int(np.floor(rec_impact["sap"])),
"energy-consumption-current": rec_impact["heat_demand"],
"id": "+".join([str(self.id), rec_id])
}
)
updated_simulation_epcs.append(sim_epc)
# Now we havet this data inthe
self.updated_simulation_epcs = updated_simulation_epcs
return updated_simulation_epcs
@staticmethod
def create_recommendation_scoring_data(
@ -425,7 +480,6 @@ class Property:
"""
output = recommendation_record.copy()
non_invasive_recommendations = [] if non_invasive_recommendations is None else non_invasive_recommendations
for col in [
"walls_insulation_thickness",
@ -438,81 +492,6 @@ class Property:
for recommendation in recommendations:
# For the list of recommendations we have, we iteratively update the output
# Update description to indicate it's insulate
if recommendation["type"] in [
"solid_floor_insulation",
"suspended_floor_insulation",
"exposed_floor_insulation",
]:
if len(recommendation["parts"]) > 1:
raise NotImplementedError(
"Have more than 1 floor insulation part - handle this case"
)
# We don't really see above average for this in the training data
output["floor_insulation_thickness_ending"] = "average"
else:
if output["floor_thermal_transmittance_ending"] is None:
raise ValueError("We should not have a None value for the u value")
if output["floor_insulation_thickness_ending"] is None:
output["floor_insulation_thickness_ending"] = "none"
if recommendation["type"] in [
"loft_insulation",
"room_roof_insulation",
"flat_roof_insulation",
]:
output["roof_thermal_transmittance_ending"] = recommendation[
"new_u_value"
]
parts = recommendation["parts"]
if len(parts) != 1:
raise ValueError(
"More than one part for roof insulation - investiage me"
)
# This is based on the values we have in the training data
valid_numeric_values = [
12,
25,
50,
75,
100,
150,
200,
250,
270,
300,
350,
400,
]
proposed_depth = recommendation["new_thickness"]
if proposed_depth not in valid_numeric_values:
# Take the nearest value for scoring
proposed_depth = min(
valid_numeric_values, key=lambda x: abs(x - proposed_depth)
)
output["roof_insulation_thickness_ending"] = str(int(proposed_depth))
if recommendation["type"] == "loft_insulation":
if proposed_depth >= 270:
output["roof_energy_eff_ending"] = "Very Good"
else:
if output["roof_energy_eff_ending"] not in ["Good", "Very Good"]:
output["roof_energy_eff_ending"] = "Good"
else:
output["roof_energy_eff_ending"] = "Very Good"
else:
# Fill missing roof u-values - this fill is not based on recommended upgrades
if output["roof_thermal_transmittance_ending"] is None:
raise ValueError("We should not have a None value for the u value")
if output["roof_insulation_thickness_ending"] is None:
output["roof_insulation_thickness_ending"] = "none"
if recommendation["type"] == "sealing_open_fireplace":
output["number_open_fireplaces_ending"] = 0
@ -556,13 +535,15 @@ class Property:
if recommendation["type"] in [
"heating", "hot_water_tank_insulation", "heating_control", "secondary_heating",
"internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
"cylinder_thermostat"
"cylinder_thermostat", "loft_insulation", "room_roof_insulation", "flat_roof_insulation",
"solid_floor_insulation", "suspended_floor_insulation", "mixed_glazing"
]:
# We update the data, as defined in the recommendaton
if output["walls_insulation_thickness_ending"] is None:
output["walls_insulation_thickness_ending"] = "none"
for prefix in ["walls", "roof", "floor"]:
if output[f"{prefix}_insulation_thickness_ending"] is None:
output[f"{prefix}_insulation_thickness_ending"] = "none"
simulation_config = recommendation["simulation_config"]
simulation_config = recommendation["simulation_config"].copy()
# If any entries in simulation_config are None, we will set them to "Unknown" which is the cleaning
# value
for key, value in simulation_config.items():
@ -578,9 +559,9 @@ class Property:
"sealing_open_fireplace", "low_energy_lighting",
"internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
"loft_insulation", "room_roof_insulation", "flat_roof_insulation",
"solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation",
"solid_floor_insulation", "suspended_floor_insulation",
"windows_glazing", "solar_pv", "heating", "hot_water_tank_insulation",
"heating_control", "secondary_heating", "cylinder_thermostat"
"heating_control", "secondary_heating", "cylinder_thermostat", "mixed_glazing"
]:
raise NotImplementedError(
"Implement me, given type %s" % recommendation["type"]
@ -590,23 +571,18 @@ class Property:
return output
def get_components(
def set_features(
self,
cleaned,
photo_supply_lookup,
floor_area_decile_thresholds,
energy_consumption_client
kwh_client,
kwh_predictions
):
"""
Given the cleaning that has been performed, we'll use this to identify the property
components, from roof to walls to windows, heating and hot water
:param cleaned: This is the dictionary of components found in cleaner.cleaned
:param photo_supply_lookup: This is the lookup table for the photo supply, used to estimate the percentage
of the roof that is suitable for solar panels
:param floor_area_decile_thresholds: This is the decile thresholds for the floor area, used in estimating the
solar pv roof area
:param energy_consumption_client: Contains the heating and hot water kwh models - used to predict current
energy annual consumption in kWh
:param kwh_client: The client that will be used to convert the energy costs to today's costs
:param kwh_predictions: Contains the kwh predictions for heating and hot water
:return:
"""
@ -669,21 +645,22 @@ class Property:
self.set_floor_type()
self.set_floor_level()
self.set_windows_count()
self.set_solar_panel_area(
photo_supply_lookup=photo_supply_lookup,
floor_area_decile_thresholds=floor_area_decile_thresholds,
)
self.set_energy_source()
self.find_energy_sources()
self.set_current_energy_bill(energy_consumption_client)
self.set_current_energy_bill(kwh_client, kwh_predictions)
def set_solar_panel_configuration(self, solar_panel_configuration):
def set_solar_panel_configuration(
self, solar_panel_configuration, roof_area
):
"""
This funtion inserts the solar panel configuration into the property object
"""
self.solar_panel_configuration = solar_panel_configuration
def set_current_energy_bill(self, energy_consumption_client):
# We also set the roof area
self.roof_area = roof_area
def set_current_energy_bill(self, kwh_client, kwh_predictions):
"""
Given what we know about the property now, estimates the current energy consumption using the UCL paper
https://www.sciencedirect.com/science/article/pii/S0378778823002542
@ -695,103 +672,58 @@ class Property:
# 2) Predicted KwH
# Today's costs
todays_heating_cost = energy_consumption_client.convert_cost_to_today(
original_cost=float(self.data["heating-cost-current"]),
lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"])
)
todays_hot_water_cost = energy_consumption_client.convert_cost_to_today(
original_cost=float(self.data["hot-water-cost-current"]),
lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"])
)
todays_lighting_cost = energy_consumption_client.convert_cost_to_today(
todays_lighting_cost = kwh_client.convert_cost_to_today(
original_cost=float(self.data["lighting-cost-current"]),
lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"])
lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"]).tz_localize(None)
)
scoring_df = pd.DataFrame([self.epc_record.prepared_epc])
# Change columns from underscores to hyphens
scoring_df.columns = [
x.lower().replace("_", "-") for x in scoring_df.columns
]
for col in ["heating_kwh", "hot_water_kwh"]:
scoring_df[col] = None
# If we have the kwh figures, we don't need to predict them
condition_data = self.energy_assessment_condition_data.copy()
energy_consumption_client.data = None
heating_prediction = energy_consumption_client.score_new_data(
new_data=scoring_df, target="heating_kwh"
)[0]
heating_kwh_predictions = kwh_predictions["heating_kwh_predictions"]
hotwater_kwh_predictions = kwh_predictions["hotwater_kwh_predictions"]
hot_water_prediction = energy_consumption_client.score_new_data(
new_data=scoring_df, target="hot_water_kwh"
)[0]
heating_prediction = (
condition_data.get("space_heating_kwh") if condition_data.get("space_heating_kwh") is not None else
heating_kwh_predictions[
heating_kwh_predictions["id"].astype(int) == self.uprn
]["predictions"].values[0]
)
hot_water_prediction = (
condition_data.get("water_heating_kwh") if condition_data.get("water_heating_kwh") is not None else
hotwater_kwh_predictions[
hotwater_kwh_predictions["id"].astype(int) == self.uprn
]["predictions"].values[0]
)
# We convert the lighting cost into kwh, just using the price cap
lighting_kwh = float(self.data["lighting-cost-current"]) / AnnualBillSavings.ELECTRICITY_PRICE_CAP
lighting_kwh = todays_lighting_cost / AnnualBillSavings.ELECTRICITY_PRICE_CAP
appliances_kwh = AnnualBillSavings.estimate_appliances_energy_use(total_floor_area=self.floor_area)
adjusted_heating_kwh = AnnualBillSavings.adjust_energy_to_metered(
epc_energy=heating_prediction,
current_epc_rating=self.data["current-energy-rating"],
)
unadjusted_kwh_estimates = {
"heating": float(heating_prediction),
"hot_water": float(hot_water_prediction),
"lighting": float(lighting_kwh),
"appliances": float(appliances_kwh)
}
adjusted_hot_water_kwh = AnnualBillSavings.adjust_energy_to_metered(
epc_energy=hot_water_prediction,
current_epc_rating=self.data["current-energy-rating"],
)
adjusted_lighting_kwh = AnnualBillSavings.adjust_energy_to_metered(
epc_energy=lighting_kwh,
current_epc_rating=self.data["current-energy-rating"],
)
adjusted_applicances_kwh = AnnualBillSavings.adjust_energy_to_metered(
epc_energy=appliances_kwh,
current_epc_rating=self.data["current-energy-rating"],
)
# Adjust today's cost figures with the UCL model
adjusted_heating_cost = AnnualBillSavings.adjust_energy_to_metered(
epc_energy=todays_heating_cost,
current_epc_rating=self.data["current-energy-rating"],
)
adjusted_hot_water_cost = AnnualBillSavings.adjust_energy_to_metered(
epc_energy=todays_hot_water_cost,
current_epc_rating=self.data["current-energy-rating"],
)
adjusted_lighting_cost = AnnualBillSavings.adjust_energy_to_metered(
epc_energy=todays_lighting_cost,
current_epc_rating=self.data["current-energy-rating"],
)
adjusted_appliances_cost = AnnualBillSavings.adjust_energy_to_metered(
epc_energy=appliances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP,
current_epc_rating=self.data["current-energy-rating"],
)
unadjusted_heating_costs = {
"heating": None,
"hot_water": None,
"lighting": float(todays_lighting_cost),
"appliances": float(appliances_kwh) * AnnualBillSavings.ELECTRICITY_PRICE_CAP
}
# Sum up the adjusted kwh figures
self.current_adjusted_energy = (
adjusted_heating_kwh + adjusted_hot_water_kwh + adjusted_lighting_kwh + adjusted_applicances_kwh
)
self.current_energy_bill = (
adjusted_heating_cost + adjusted_hot_water_cost + adjusted_lighting_cost + adjusted_appliances_cost
self.current_energy_consumption = sum(list(unadjusted_kwh_estimates.values()))
self.current_energy_consumption_heating_hotwater = (
unadjusted_kwh_estimates["heating"] + unadjusted_kwh_estimates["hot_water"]
)
self.energy_cost_estimates = {
"adjusted": {
"heating": adjusted_heating_cost,
"hot_water": adjusted_hot_water_cost,
"lighting": adjusted_lighting_cost,
"appliances": adjusted_appliances_cost
},
"unadjusted": {
"heating": todays_heating_cost,
"hot_water": todays_hot_water_cost,
"lighting": todays_lighting_cost,
"appliances": appliances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
},
"unadjusted": unadjusted_heating_costs,
"epc": {
"heating": float(self.data["heating-cost-current"]),
"hot_water": float(self.data["hot-water-cost-current"]),
@ -800,18 +732,7 @@ class Property:
}
self.energy_consumption_estimates = {
"adjusted": {
"heating": adjusted_heating_kwh,
"hot_water": adjusted_hot_water_kwh,
"lighting": adjusted_lighting_kwh,
"appliances": adjusted_applicances_kwh
},
"unadjusted": {
"heating": heating_prediction,
"hot_water": hot_water_prediction,
"lighting": lighting_kwh,
"appliances": appliances_kwh
}
"unadjusted": unadjusted_kwh_estimates
}
def set_spatial(self, spatial: pd.DataFrame):
@ -861,7 +782,10 @@ class Property:
property_data = {
"creation_status": "READY",
"uprn": int(self.data["uprn"]),
"building_reference_number": int(self.data["building-reference-number"]),
"building_reference_number": (
int(self.data["building-reference-number"]) if
self.data["building-reference-number"] is not None else None
),
"has_pre_condition_report": True,
"has_recommendations": True,
"property_type": self.data["property-type"],
@ -947,7 +871,8 @@ class Property:
"energy_tariff": self.data["energy-tariff"],
"primary_energy_consumption": self.energy["primary_energy_consumption"],
"co2_emissions": self.energy["co2_emissions"],
"adjusted_energy_consumption": self.current_adjusted_energy,
"current_energy_demand": self.current_energy_consumption,
"current_energy_demand_heating_hotwater": self.current_energy_consumption_heating_hotwater,
"estimated": self.data.get("estimated", False),
}
@ -1030,27 +955,40 @@ class Property:
medians across the EPC data
:return:
"""
# Many of these pieces of information are now contained in the condition data
condition_data = self.energy_assessment_condition_data.copy()
# TODO: These functions should work on an EPCRecord object, so that the format is more standardised.
# They could also be added as attributes to the EPC Record
# We can update the number of floors if we have this information in the condition data
self.number_of_floors = int(self.energy_assessment_condition_data["number_of_floors"]) \
if (condition_data.get("number_of_floors") is not None) and (self.number_of_floors is not None) \
else self.number_of_floors
self.perimeter = estimate_perimeter(
self.floor_area / self.number_of_floors,
self.number_of_rooms / self.number_of_floors,
)
# If we already have this, we re-engineer the perimeter
if self.insulation_floor_area is not None:
self.perimeter = np.sqrt(self.insulation_floor_area) * 4
else:
self.perimeter = float(self.energy_assessment_condition_data["perimeter"]) \
if condition_data.get("perimeter") is not None \
else estimate_perimeter(
floor_area=self.floor_area / self.number_of_floors,
num_rooms=self.number_of_rooms / self.number_of_floors
)
self.insulation_wall_area = estimate_external_wall_area(
self.insulation_wall_area = float(self.energy_assessment_condition_data["insulation_wall_area"]) \
if (condition_data.get("insulation_wall_area") is not None) and (self.insulation_wall_area is not None) \
else estimate_external_wall_area(
num_floors=self.number_of_floors,
floor_height=self.floor_height,
perimeter=self.perimeter,
built_form=self.data["built-form"],
)
self.insulation_floor_area = self.floor_area / self.number_of_floors
self.pitched_roof_area = esimtate_pitched_roof_area(
floor_area=self.insulation_floor_area, floor_height=self.floor_height
)
if self.insulation_floor_area is None:
self.insulation_floor_area = float(
self.energy_assessment_condition_data["main_dwelling_ground_floor_area"]
) if (condition_data.get("main_dwelling_ground_floor_area") is not None) else (
self.floor_area / self.number_of_floors
)
def set_floor_level(self):
self.floor_level = (
@ -1132,23 +1070,17 @@ class Property:
return component_data
def set_adjusted_energy(
self, expected_adjusted_energy, expected_energy_bill
):
"""
Stores these values for usage later
"""
self.expected_adjusted_energy = expected_adjusted_energy
self.expected_energy_bill = expected_energy_bill
def set_windows_count(self):
"""
Using the estimate_windows function, this method will set the number of windows in the property
:return:
"""
self.number_of_windows = estimate_windows(
condition_data = self.energy_assessment_condition_data.copy()
self.number_of_windows = int(condition_data["number_of_windows"]) \
if condition_data.get("number_of_windows") is not None \
else estimate_windows(
property_type=self.data["property-type"],
built_form=self.data["built-form"],
construction_age_band=self.construction_age_band,
@ -1156,47 +1088,9 @@ class Property:
number_habitable_rooms=self.number_of_rooms,
)
def set_solar_panel_area(self, photo_supply_lookup, floor_area_decile_thresholds):
"""
Sets the approximate area of the solar panels
:return:
"""
if (self.insulation_floor_area is None) and (self.pitched_roof_area is None):
raise ValueError(
"Need to set insulation floor area and pitched roof area before setting solar pv roof area"
)
photo_supply_matched = SolarPhotoSupply.filter_photo_supply_lookup(
photo_supply_lookup=photo_supply_lookup,
floor_area_decile_thresholds=floor_area_decile_thresholds,
tenure=self.data["tenure"],
built_form=self.data["built-form"],
property_type=self.data["property-type"],
construction_age_band=self.construction_age_band,
is_flat=self.roof["is_flat"],
is_pitched=self.roof["is_pitched"],
is_roof_room=self.roof["is_roof_room"],
floor_area=self.floor_area,
)
percentage_of_roof = photo_supply_matched["photo_supply_median"].mean()
percentage_of_roof = percentage_of_roof / 100
self.solar_pv_percentage = percentage_of_roof
def get_solar_pv_roof_area(self, percentage_of_roof):
"""
Given a percentage of the roof, this method will return the estimated area of the solar panels
:param percentage_of_roof:
:return:
"""
return (
self.insulation_floor_area * percentage_of_roof
if self.roof["is_flat"]
else self.pitched_roof_area * percentage_of_roof
)
self.windows_area = float(condition_data["windows_area"]) \
if condition_data.get("windows_area") is not None \
else None
def set_energy_source(self):
"""
@ -1240,7 +1134,9 @@ class Property:
'has_exhaust_source_heat_pump': 'Electricity',
'has_community_heat_pump': 'Electricity',
'has_wood_pellets': 'Wood Pellets',
'has_community_scheme': 'Varied (Community Scheme)'
'has_community_scheme': 'Varied (Community Scheme)',
"has_dual_fuel_mineral_and_wood": 'Wood Logs',
"has_electricaire": 'Electricity',
}
# Hot water
@ -1266,19 +1162,144 @@ class Property:
'community scheme': 'Community Scheme'
}
self.heating_energy_source = [
self.heating_energy_source = list({
fuel for key, fuel in heating_fuel_mapping.items() if self.main_heating.get(key, False)
]
})
if set(self.heating_energy_source) == {'Electricity', 'Natural Gas'}:
# It means they have mixed heating so we take the primary one, based on main fuel
# This will probably happen in the case of an extension
if self.main_fuel["clean_description"] in ["Mains gas not community", "Mains gas community"]:
self.heating_energy_source = ['Natural Gas']
else:
self.heating_energy_source = ['Electricity']
if set(self.heating_energy_source) == {'Natural Gas', 'Wood Logs'}:
# It means they have mixed heating so we take the primary one, based on main fuel
# This will probably happen in the case of an extension
if self.main_fuel["clean_description"] in ["Mains gas not community", "Mains gas community"]:
self.heating_energy_source = ['Natural Gas']
else:
self.heating_energy_source = ['Wood Logs']
if len(self.heating_energy_source) == 0 or len(self.heating_energy_source) > 1:
raise Exception("Investigate em")
raise Exception("Investigate me")
self.heating_energy_source = self.heating_energy_source[0]
if self.heating_energy_source == "Varied (Community Scheme)":
if self.main_fuel["fuel_type"] == "mains gas":
self.heating_energy_source = "Natural Gas (Community Scheme)"
else:
raise Exception("Implement me")
if self.hotwater["heater_type"] is not None:
self.hot_water_energy_source = heater_type_to_fuel[self.hotwater["heater_type"]]
if self.hotwater["extra_features"] == "plus solar":
self.hot_water_energy_source = self.heating_energy_source + " + Solar Thermal"
return
else:
fuel = system_type_modification[self.hotwater["system_type"]]
if fuel == 'Main System':
if self.hotwater["extra_features"] == "plus solar":
self.hot_water_energy_source = self.heating_energy_source + " + Solar Thermal"
return
if fuel in ['Main System', "Community Scheme"]:
self.hot_water_energy_source = self.heating_energy_source
elif fuel in ['Secondary System']:
# Check the secondary heating system
secondary_heating = self.data["secondheat-description"]
self.hot_water_energy_source = assumptions.DESCRIPTIONS_TO_FUEL_TYPES[secondary_heating]["fuel"]
else:
raise Exception("Investiage me")
def is_ashp_valid(self, measures):
if "air_source_heat_pump" in self.non_invasive_recommendations:
return True
if "air_source_heat_pump" not in measures:
return False
suitable_property_type = self.data["property-type"] in ["House", "Bungalow"]
has_air_source_heat_pump = self.main_heating["has_air_source_heat_pump"]
return suitable_property_type and not has_air_source_heat_pump
def is_solar_pv_valid(self):
# If the property is a flat but we are looking at building solar potential, we can include this
if (self.building_id is not None) and (self.solar_panel_configuration is not None):
return True
is_valid_property_type = self.data["property-type"] in ["House", "Bungalow", "Maisonette"]
is_valid_roof_type = (
self.roof["is_flat"] or self.roof["is_pitched"] or self.roof["is_roof_room"]
)
# If there is no existing solar PV, the photo-supply field will be None or a missing value
has_no_existing_solar_pv = self.data["photo-supply"] in [
None, 0, self.DATA_ANOMALY_MATCHES
]
return is_valid_property_type and is_valid_roof_type and has_no_existing_solar_pv
def estimate_electrical_consumption(self, assumed_ashp_efficiency, exclusions):
"""
Given a property, this method estimates the electrical consumption of the property, based on the energy
consumption, the assumed efficiency of an ASHP and the exclusions.
What we're trying to do here is size up the future electricicty demand of the property, assuming that the
home is eligible for an ASHP. If the property is not eligible for an ASHP, we don't need to adjust the
consumption.
This figure is used to size up solar panels, so they can cover heat generation, even if the property
today doesn't generate its heat from electricity
:param assumed_ashp_efficiency:
:param exclusions:
:return:
"""
exclusions = [] if exclusions is None else exclusions
if "air_source_heat_pump" in exclusions:
return self.current_energy_consumption
# If the property currently has an ASHP, we don't gain from any efficiency improvements
if not self.is_ashp_valid(measures=["air_source_heat_pump"]):
return self.current_energy_consumption
# If the property currently has an electric boiler, it will still benefit from the ASHP efficiency gain
remap_fuel_sources = [
"Natural Gas", "LPG", "Wood Logs", "Oil", "Electricity", "Coal", "Smokeless Fuel",
"Natural Gas + Solar Thermal", "Anthracite", "Wood Pellets", "LPG + Solar Thermal"
]
heating_energy_source = self.heating_energy_source
hot_water_energy_source = self.hot_water_energy_source
heating_consumption = self.energy_consumption_estimates["unadjusted"]["heating"]
hotwater_consumption = self.energy_consumption_estimates["unadjusted"]["hot_water"]
if (heating_energy_source not in remap_fuel_sources) or (
hot_water_energy_source not in remap_fuel_sources + ["Electricity + Solar Thermal"]
):
raise NotImplementedError("Have not implemented estimating electrical consumption for this fuel type")
if heating_energy_source in remap_fuel_sources:
# Adjust the heating consumption to reflect the expected efficiency of an ASHP
heating_consumption = heating_consumption / (assumed_ashp_efficiency / 100)
if hot_water_energy_source in remap_fuel_sources:
# Adjust the hot water consumption to reflect the expected efficiency of an ASHP
hotwater_consumption = hotwater_consumption / (assumed_ashp_efficiency / 100)
electric_consumption = (
heating_consumption +
hotwater_consumption +
self.energy_consumption_estimates["unadjusted"]["lighting"] +
self.energy_consumption_estimates["unadjusted"]["appliances"]
)
return electric_consumption

View file

@ -292,8 +292,7 @@ class SearchEpc:
"error": str(e)
}
@staticmethod
def filter_rows(rows, property_type=None, address=None):
def filter_rows(self, rows, property_type=None, address=None):
"""
This method should not be used when property_type and address are both not None
:param rows:
@ -321,8 +320,21 @@ class SearchEpc:
if address is not None:
# We can do a filter on the property type
best_match = process.extractOne(address, [r["address"] for r in rows], score_cutoff=0)
rows_filtered = [r for r in rows if r["address"] == best_match[0]]
# We check if the full address contains the postcode and if it does, remove
if self.postcode in address:
address = address.replace(self.postcode, "").strip().rstrip(",")
# We check if post town is included in the address
if any([r["posttown"].lower() in address.lower() for r in rows]):
best_match = process.extractOne(
address, [", ".join([r["address"], r["posttown"]]) for r in rows], score_cutoff=0
)
# Get all of the scores
rows_filtered = [r for r in rows if ", ".join([r["address"], r["posttown"]]) == best_match[0]]
else:
best_match = process.extractOne(address, [r["address"] for r in rows], score_cutoff=0)
# Get all of the scores
rows_filtered = [r for r in rows if r["address"] == best_match[0]]
if rows_filtered:
return rows_filtered

View file

@ -8,6 +8,8 @@ import time
from backend.app.db.functions.solar_functions import get_solar_data, store_batch_data
from utils.logger import setup_logger
from sklearn.preprocessing import MinMaxScaler
from recommendations.Costs import Costs
from math import sin, cos, sqrt, atan2, radians
logger = setup_logger()
@ -15,10 +17,6 @@ logger = setup_logger()
class GoogleSolarApi:
NORTH_FACING_AZIMUTH_RANGE = (-30, 30)
# Conservative estimate of the proportion of electricity that will be consumed, whereas the rest will
# be exported
SOLAR_CONSUMPTION_PROPORTION = 0.5
# These are variables, described in the documentation for cost analysis for non-us locations, seen here
# https://developers.google.com/maps/documentation/solar/calculate-costs-non-us
# We use the default figures that the API uses for US locations
@ -69,6 +67,9 @@ class GoogleSolarApi:
# Indicates if we need to store the data to the db
self.need_to_store = False
# Indicates if we think we have both units attached to a semi-detached property
self.double_property = False
def get_building_insights(self, longitude, latitude, required_quality="MEDIUM", max_retries=None):
"""
Make an API request to retrieve building insights based on the given longitude and latitude, with retry
@ -107,15 +108,24 @@ class GoogleSolarApi:
@lru_cache(maxsize=128)
def get(
self, longitude, latitude, energy_consumption, required_quality="MEDIUM", is_building=False, session=None,
uprn=None
self,
longitude,
latitude,
energy_consumption,
property_instance=None,
required_quality="MEDIUM",
is_building=False,
session=None,
uprn=None,
):
"""
Wrapper function that calls get_building_insights and extracts roof segments, with caching.
:param longitude: The longitude of the location.
:param latitude: The latitude of the location.
:param energy_consumption: The energy consumption of the building/unit associated to the longitude and latitude.
:param energy_consumption: The energy consumption of the building/unit associated to the longitude and latitude,
that we wish to size the solar panels up against
:param property_instance: The property instance associated to the longitude and latitude.
:param required_quality: The required quality of the data (default is "MEDIUM").
:param is_building: Whether the energy consumption is for a building or a unit.
:param session: The database session to use for the query (default is None).
@ -137,6 +147,14 @@ class GoogleSolarApi:
# Extract key data from the insights response
self.roof_segments = self.insights_data["solarPotential"].get('roofSegmentStats', [])
# Automatically exclude north-facing segments
self.exclude_north_facing_segments(property_instance=property_instance)
# If a property is semi-detached, it's possible for us to include segments from an attached unit
if (property_instance.data["built-form"] == "Semi-Detached") and (
property_instance.data["extension-count"] == 0
):
self.exclude_likely_duplicate_surfaces()
self.roof_area = self.insights_data["solarPotential"]["wholeRoofStats"]['areaMeters2']
self.floor_area = self.insights_data["solarPotential"]["wholeRoofStats"]['groundAreaMeters2']
self.panel_area = (
@ -152,13 +170,17 @@ class GoogleSolarApi:
# It should be straightforward, but I'd rather see an actual instance of this happening
raise NotImplementedError("Panel wattage is not 400W - implement me")
# Automatically exclude north-facing segments
self.exclude_north_facing_segments()
self.roof_segment_indexes = [segment['segmentIndex'] for segment in self.roof_segments]
# We now start finding the solar panel configurations
self.optimise_solar_configuration(energy_consumption=energy_consumption, is_building=is_building)
self.optimise_solar_configuration(
energy_consumption=energy_consumption, is_building=is_building, property_instance=property_instance
)
# Finally, if we have a double property, we half the data we stored area
if self.double_property:
self.roof_area = self.roof_area / 2
self.floor_area = self.floor_area / 2
def save_to_db(self, session, uprns_to_location, scenario_type):
if self.insights_data is None:
@ -178,7 +200,7 @@ class GoogleSolarApi:
"yearly_dc_energy",
"total_cost",
"panneled_roof_area",
"array_warrage",
"array_wattage",
"initial_ac_kwh_per_year",
"lifetime_ac_kwh",
"roi",
@ -191,7 +213,7 @@ class GoogleSolarApi:
"yearly_dc_energy": "yearly_dc_kwh",
"total_cost": "cost",
"panneled_roof_area": "panelled_roof_area",
"array_warrage": "array_kwhp",
"array_wattage": "array_kwhp",
"initial_ac_kwh_per_year": "yearly_ac_kwh",
}
)
@ -226,15 +248,17 @@ class GoogleSolarApi:
installation_life_span)) /
(1 - efficiency_depreciation_factor))
def optimise_solar_configuration(self, energy_consumption, is_building=False):
def optimise_solar_configuration(self, energy_consumption, is_building=False, property_instance=None):
"""
Optimise the solar panel configuration for the building.
:return:
"""
cost_instance = Costs(property_instance=property_instance) if property_instance is not None else None
# Remove any north facing roof segments
panel_performance = []
for config in self.insights_data["solarPotential"]["solarPanelConfigs"]:
for config in self.insights_data["solarPotential"].get("solarPanelConfigs", []):
roof_segment_summaries = config["roofSegmentSummaries"]
# Filter on just the segments in self.roof_segment_indexes
roof_segment_summaries = [
@ -246,7 +270,14 @@ class GoogleSolarApi:
wattage = segment["panelsCount"] * self.insights_data["solarPotential"]["panelCapacityWatts"]
generated_dc_energy = segment["yearlyEnergyDcKwh"]
ratio = generated_dc_energy / wattage
cost = MCS_SOLAR_PV_COST_DATA["average_cost_per_kwh"] * (wattage / 1000)
if cost_instance is None:
cost = MCS_SOLAR_PV_COST_DATA["average_cost_per_kwh"] * (wattage / 1000)
else:
cost = cost_instance.solar_pv(
wattage=wattage, has_battery=False
)["total"]
roi_summary.append(
{
"segmentIndex": segment["segmentIndex"],
@ -260,6 +291,8 @@ class GoogleSolarApi:
)
roi_summary = pd.DataFrame(roi_summary)
if roi_summary.empty:
continue
weighted_ratio = np.average(
roi_summary["ratio"].values, weights=roi_summary["generated_dc_energy"].values
@ -274,23 +307,59 @@ class GoogleSolarApi:
"total_cost": total_cost,
"weighted_ratio": weighted_ratio,
"panneled_roof_area": roi_summary["panneled_roof_area"].sum(),
"array_warrage": roi_summary["n_panels"].sum() * self.panel_wattage
"array_wattage": roi_summary["n_panels"].sum() * self.panel_wattage
}
)
panel_performance = pd.DataFrame(panel_performance)
# We can have duplicate configurations
if panel_performance.empty:
self.panel_performance = pd.DataFrame(
columns=[
"n_panels",
"yearly_dc_energy",
"total_cost",
"panneled_roof_area",
"array_wattage",
"initial_ac_kwh_per_year",
"lifetime_ac_kwh",
"roi",
"expected_payback_years",
"lifetime_dc_kwh"
]
)
return
# We can have duplicate configurations
panel_performance = panel_performance.drop_duplicates()
# If we look at the building level, we don't include any projects fewer than 10 panels, otherwise the
# minimum is 4
min_panels = 10 if is_building else 4
panel_performance = panel_performance[panel_performance["n_panels"] >= min_panels]
if panel_performance.empty:
self.panel_performance = pd.DataFrame(
columns=[
"n_panels",
"yearly_dc_energy",
"total_cost",
"panneled_roof_area",
"array_wattage",
"initial_ac_kwh_per_year",
"lifetime_ac_kwh",
"roi",
"expected_payback_years",
"lifetime_dc_kwh"
]
)
return
panel_performance["initial_ac_kwh_per_year"] = panel_performance["yearly_dc_energy"] * self.dc_to_ac_rate
# Remove anything where the total ac energy is less than half of the array wattage
panel_performance = panel_performance[
(panel_performance["initial_ac_kwh_per_year"] / panel_performance["array_warrage"]) >= 0.5
(panel_performance["initial_ac_kwh_per_year"] / panel_performance["array_wattage"]) >= 0.5
]
# 2) Calculate the liftime solar energy production
@ -311,12 +380,25 @@ class GoogleSolarApi:
)
# Now that we know the lifetime cnsumption of ac kwh, we can estimate the roi
lifetime_energy_consumption = energy_consumption * self.installation_life_span
# Key things we estimate:
# - generation_value: this is the gbp value of the electricity generated
# - roi: the return on investment, calcualated as generation_value / total_cost
# - surplus: this is the amount of additional energy generated, and therefore how much will be exported
# - surplus_value: the value of the surplus energy - this feeds into generation_value, when relevant
# - expected_payback_years: the number of years it will take to pay back the initial investment
# If we have a double property (i.e. the solar api has returned data for two units) we size up the solar panels
# for double the consumption, as if for two units.
if self.double_property:
lifetime_energy_consumption = energy_consumption * 2 * self.installation_life_span
else:
lifetime_energy_consumption = energy_consumption * self.installation_life_span
roi_results = []
for _, panel_config in panel_performance.iterrows():
lifetime_ac_kwh = panel_config["lifetime_ac_kwh"]
surplus = 0
generation_deficit = 0
if lifetime_ac_kwh < lifetime_energy_consumption:
# We estimate the amount of electricity generated, based on the price cap
generation_value = lifetime_ac_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
@ -329,7 +411,6 @@ class GoogleSolarApi:
surplus_value = surplus * AnnualBillSavings.ELECTRICITY_EXPORT_PAYMENT
generation_value = lifetime_energy_consumption * AnnualBillSavings.ELECTRICITY_PRICE_CAP
roi = (generation_value + surplus_value) / panel_config["total_cost"]
generation_deficit = surplus_value
# Calculate expected payback years
if generation_value > 0:
@ -381,9 +462,34 @@ class GoogleSolarApi:
panel_performance["expected_payback_years"] = np.ceil(panel_performance["expected_payback_years"]).astype(int)
if self.double_property:
# Now that we've optimise to an energy consumption that is double the original, we need to half the
# results
panel_performance["n_panels_halved"] = panel_performance["n_panels"] / 2
n_panels_required = {int(x) for x in np.floor(panel_performance["n_panels"] / 2)}
# We filter the data on this number of panels
panel_performance = panel_performance[panel_performance["n_panels_halved"].isin(n_panels_required)]
# We half the generation values
for col in [
"yearly_dc_energy",
"total_cost",
"panneled_roof_area",
"array_wattage",
"initial_ac_kwh_per_year",
"lifetime_ac_kwh",
"lifetime_dc_kwh",
"generation_value",
"generation_deficit",
"surplus"
]:
panel_performance[col] = panel_performance[col] / 2
panel_performance["n_panels"] = panel_performance["n_panels_halved"]
panel_performance = panel_performance.drop(columns=["n_panels_halved"])
self.panel_performance = panel_performance
def exclude_north_facing_segments(self):
def exclude_north_facing_segments(self, property_instance):
"""
Filter out any north-facing roof segments from the roof_segments attribute.
@ -394,9 +500,86 @@ class GoogleSolarApi:
for segment_index, segment in enumerate(self.roof_segments):
segment["segmentIndex"] = segment_index
# Check if the segment is north-facing
if self.NORTH_FACING_AZIMUTH_RANGE[0] <= segment['azimuthDegrees'] <= self.NORTH_FACING_AZIMUTH_RANGE[1]:
if (
self.NORTH_FACING_AZIMUTH_RANGE[0] <= segment['azimuthDegrees'] <= self.NORTH_FACING_AZIMUTH_RANGE[1]
) and not property_instance.roof["is_flat"]:
continue
filtered_segments.append(segment)
self.roof_segments = filtered_segments
@staticmethod
def haversine(lat1, lon1, lat2, lon2):
"""
Calculate the great-circle distance between two points on the Earth
given their latitude and longitude in decimal degrees. Using haversine formula.
"""
R = 6373.0 # approximate radius of earth in km
lat1 = radians(lat1)
lon1 = radians(lon1)
lat2 = radians(lat2)
lon2 = radians(lon2)
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
c = 2 * atan2(sqrt(a), sqrt(1 - a))
distance = R * c
return distance
def exclude_likely_duplicate_surfaces(self):
"""
By checking the azimuth of the segments, we can exclude any segments that are likely to be duplicates
:return:
"""
def is_similar(segment1, segment2, azimuth_tol=20):
azimuth_diff = abs(segment1['azimuthDegrees'] - segment2['azimuthDegrees'])
return azimuth_diff <= azimuth_tol
property_center = self.insights_data["center"]
deduped_segments = []
dropped_segments = []
for segment in self.roof_segments:
if not deduped_segments:
deduped_segments.append(segment)
continue
similar_segments = [s for s in deduped_segments if is_similar(segment, s)]
if not similar_segments:
deduped_segments.append(segment)
else:
# Compare distances to the property center and keep the closer segment
for similar_segment in similar_segments:
current_dist = self.haversine(
property_center['latitude'], property_center['longitude'],
segment['center']['latitude'], segment['center']['longitude']
)
similar_dist = self.haversine(
property_center['latitude'], property_center['longitude'],
similar_segment['center']['latitude'], similar_segment['center']['longitude']
)
if current_dist < similar_dist:
deduped_segments.remove(similar_segment)
deduped_segments.append(segment)
dropped_segments.append(similar_segment)
else:
dropped_segments.append(segment)
# If we have a semi-detached property that has duplicated segments, we should expect to half the number of
# segments
if len(deduped_segments) < len(self.roof_segments):
if len(deduped_segments) != len(self.roof_segments) / 2:
# We don't perform any dropping in this case
return
# Because the segments are duplicated, but the sizes aren't necessarily split perfectly in half, what
# we need to do is perform the solar analysis and then half the results. We set an indicator which
# implies we should do this
self.double_property = True

View file

@ -0,0 +1,44 @@
# Assumes that the average efficiency of an air source heat pump is 250%, taking the median of the 200-400% range,
# which is often quoted as a sensible efficiency range for air source heat pumps.
PESSIMISTIC_ASHP_EFFICIENCY = 200
AVERAGE_ASHP_EFFICIENCY = 300
# Conservative estimate of the proportion of electricity that will be consumed, whereas the rest will
# be exported
SOLAR_CONSUMPTION_PROPORTION = 0.5
DESCRIPTIONS_TO_FUEL_TYPES = {
"Air source heat pump, radiators, electric": {
"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100
},
"Boiler and radiators, mains gas": {"fuel": 'Natural Gas', "cop": 0.9},
'Electric storage heaters': {"fuel": 'Electricity', "cop": 1},
"Electric immersion, off-peak": {"fuel": 'Electricity', "cop": 1},
"Electric storage heaters, radiators": {"fuel": 'Electricity', "cop": 1},
"Room heaters, electric": {"fuel": 'Electricity', "cop": 1},
"Electric immersion, standard tariff": {"fuel": 'Electricity', "cop": 1},
"Portable electric heaters assumed for most rooms": {"fuel": 'Electricity', "cop": 1},
"Boiler and radiators, LPG": {"fuel": 'LPG', "cop": 0.9},
"Room heaters, dual fuel (mineral and wood)": {"fuel": 'Wood Logs', "cop": 1},
"Room heaters, mains gas": {"fuel": 'Natural Gas', "cop": 0.9},
"Warm air, mains gas": {"fuel": 'Natural Gas', "cop": 0.9},
"Boiler, mains gas": {"fuel": 'Natural Gas', "cop": 0.9},
"Gas multipoint": {"fuel": "Natural Gas", "cop": 0.9},
"Warm air, Electricaire": {"fuel": "Electricity", "cop": 1},
"Gas boiler/circulator": {"fuel": "Natural Gas", "cop": 0.9},
"Boiler and underfloor heating, mains gas": {"fuel": "Natural Gas", "cop": 0.9},
"No system present: electric heaters assumed": {"fuel": "Electricity", "cop": 1},
"Electric instantaneous at point of use": {"fuel": "Electricity", "cop": 1},
"Boiler and radiators, oil": {"fuel": "Oil", "cop": 0.9},
"Electric storage heaters, Electric storage heaters": {"fuel": "Electricity", "cop": 1},
"Boiler and radiators, electric": {"fuel": "Electricity", "cop": 0.9},
"Gas boiler/circulator, no cylinder thermostat": {"fuel": "Natural Gas", "cop": 0.9},
"Boiler and radiators, dual fuel (mineral and wood)": {"fuel": "Wood Logs", "cop": 0.9},
"Electric immersion, standard tariff, plus solar": {"fuel": "Electricity + Solar Thermal", "cop": 1},
"From main system, flue gas heat recovery": {"fuel": "Natural Gas", "cop": 0.9},
"Electric underfloor heating": {"fuel": "Electricity", "cop": 1},
"No system present: electric immersion assumed": {"fuel": "Electricity", "cop": 1},
"Air source heat pump, underfloor, electric": {
"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100
},
}

View file

@ -30,6 +30,11 @@ class Settings(BaseSettings):
LIGHTING_COST_PREDICTIONS_BUCKET: str
HEATING_COST_PREDICTIONS_BUCKET: str
HOT_WATER_COST_PREDICTIONS_BUCKET: str
HEATING_KWH_PREDICTIONS_BUCKET: str
HOTWATER_KWH_PREDICTIONS_BUCKET: str
# Other S3 buckts
ENERGY_ASSESSMENTS_BUCKET: str
class Config:
env_file = "backend/.env"
@ -48,5 +53,7 @@ def get_prediction_buckets():
"carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET,
"lighting_cost_predictions": get_settings().LIGHTING_COST_PREDICTIONS_BUCKET,
"heating_cost_predictions": get_settings().HEATING_COST_PREDICTIONS_BUCKET,
"hot_water_cost_predictions": get_settings().HOT_WATER_COST_PREDICTIONS_BUCKET
"hot_water_cost_predictions": get_settings().HOT_WATER_COST_PREDICTIONS_BUCKET,
"heating_kwh_predictions": get_settings().HEATING_KWH_PREDICTIONS_BUCKET,
"hotwater_kwh_predictions": get_settings().HOTWATER_KWH_PREDICTIONS_BUCKET,
}

View file

@ -0,0 +1,158 @@
from backend.app.db.models.energy_assessments import (
EnergyAssessment, EnergyAssessmentScenarios, EnergyAssessmentDocuments, DocumentTypeEnum
)
from sqlalchemy.orm import Session
from sqlalchemy.exc import IntegrityError
from typing import Optional, List, Dict
from sqlalchemy import desc
from utils.logger import setup_logger
logger = setup_logger()
def bulk_insert_energy_assessments(session: Session, data_list: List[dict]) -> Dict[int, int]:
"""
This function inserts or updates multiple energy assessment records into the database and returns a mapping of
uprn to energy_assessment_id.
:param session: The SQLAlchemy session.
:param data_list: A list of dictionaries containing energy assessment data.
:return: A dictionary mapping each uprn to its corresponding energy_assessment_id.
"""
uprn_to_assessment_id = {}
try:
for data in data_list:
uprn = data.get('uprn')
inspection_date = data.get('inspection_date')
# Check if a record with the same uprn and inspection_date exists
existing_record = session.query(EnergyAssessment).filter_by(
uprn=uprn,
inspection_date=inspection_date
).first()
if existing_record:
# Update the existing record with new data
for key, value in data.items():
setattr(existing_record, key, value)
session.add(existing_record)
# Map the uprn to the existing record's ID
uprn_to_assessment_id[uprn] = existing_record.id
else:
# Insert a new record
new_assessment = EnergyAssessment(**data)
session.add(new_assessment)
# Flush the session to get the newly created ID before commit
session.flush()
# Map the uprn to the new record's ID
uprn_to_assessment_id[uprn] = new_assessment.id
# Commit the transaction
session.commit()
logger.info("All records inserted or updated successfully.")
except IntegrityError as e:
# Rollback the session in case of error
session.rollback()
logger.info(f"Error occurred: {e}")
return uprn_to_assessment_id
def get_latest_assessment_by_uprn(session: Session, uprn: int) -> Optional[EnergyAssessment]:
"""
Retrieve the latest energy assessment for a given UPRN based on the inspection date.
:param session: The database session
:param uprn: The unique property reference number
:return: The latest EnergyAssessment object or None if not found
"""
try:
# Query the EnergyAssessment model, filter by uprn, order by inspection_date in descending order
latest_assessment = session.query(EnergyAssessment).filter_by(uprn=uprn).order_by(
desc(EnergyAssessment.inspection_date)).first()
return latest_assessment.to_dict() if latest_assessment else EnergyAssessment.empty_response()
except Exception as e:
logger.info(f"An error occurred: {e}")
return None
def create_scenarios_for_documents(session: Session, document_list: List[dict], uprn_to_assessment_id: dict):
"""
Creates scenarios for documents by UPRN and links them to the energy assessments.
:param session: The SQLAlchemy session.
:param document_list: A list of dictionaries containing document data.
:param uprn_to_assessment_id: A dictionary mapping UPRN to energy_assessment_id.
"""
try:
for document in document_list:
uprn = document.get('uprn')
scenario_name = document.get('scenario_id')
if scenario_name:
# Get the associated energy_assessment_id for the UPRN
energy_assessment_id = uprn_to_assessment_id.get(uprn)
# Check if the scenario already exists
existing_scenario = session.query(EnergyAssessmentScenarios).filter_by(
scenario_name=scenario_name,
energy_assessment_id=energy_assessment_id
).first()
if not existing_scenario:
# Create the scenario
new_scenario = EnergyAssessmentScenarios(
scenario_name=scenario_name,
energy_assessment_id=energy_assessment_id
)
session.add(new_scenario)
session.flush() # Get the new scenario ID
# Update document with new scenario ID
document['scenario_id'] = new_scenario.id
else:
# If the scenario already exists, just use its ID
document['scenario_id'] = existing_scenario.id
# Commit the scenarios
session.commit()
logger.info("Scenarios created successfully.")
except IntegrityError as e:
session.rollback()
logger.info(f"Error occurred: {e}")
def create_documents(session: Session, document_list: List[dict]):
"""
Inserts documents into the energy_assessment_documents table, linking them to scenarios and assessments.
:param session: The SQLAlchemy session.
:param document_list: A list of dictionaries containing document data.
"""
try:
for document in document_list:
# Ensure the document_type is cast to Enum
new_document = EnergyAssessmentDocuments(
uprn=document['uprn'],
document_type=DocumentTypeEnum(document['document_type']).value,
document_location=document['document_location'],
energy_assessment_id=document['energy_assessment_id'],
scenario_id=document.get('scenario_id') # Might be None if no scenario
)
session.add(new_document)
# Commit all document insertions
session.commit()
logger.info("Documents created successfully.")
except IntegrityError as e:
session.rollback()
logger.info(f"Error occurred: {e}")

View file

@ -1,10 +1,14 @@
from sqlalchemy import func
from backend.app.db.models.recommendations import Plan, PlanRecommendations, Recommendation
from backend.app.db.models.portfolio import Portfolio
from backend.app.db.models.recommendations import Plan, PlanRecommendations, Recommendation, Scenario
def aggregate_portfolio_recommendations(
session, portfolio_id: int, total_valuation_increase: float, labour_days: float, aggregated_data: dict
session,
portfolio_id: int,
scenario_id: int,
total_valuation_increase: float,
labour_days: float,
aggregated_data: dict
):
# Aggregate multiple fields
aggregates = (
@ -17,7 +21,11 @@ def aggregate_portfolio_recommendations(
)
.join(PlanRecommendations, PlanRecommendations.recommendation_id == Recommendation.id)
.join(Plan, Plan.id == PlanRecommendations.plan_id)
.filter(Plan.portfolio_id == portfolio_id, Plan.is_default == True, Recommendation.default == True)
.filter(
Plan.portfolio_id == portfolio_id,
Plan.scenario_id == scenario_id,
Recommendation.default == True
)
.one()
)
@ -30,16 +38,17 @@ def aggregate_portfolio_recommendations(
**aggregated_data
}
# Get the portfolio and update the fields
portfolio = session.query(Portfolio).filter_by(id=portfolio_id).one()
# Get the scenario and update the fields. This data needs to be stored against the scenario, not the portfolio
portfolio_scenario = session.query(Scenario).filter_by(id=scenario_id).one()
# Update the data
for key, value in aggregates_dict.items():
setattr(portfolio, key, value)
setattr(portfolio_scenario, key, value)
# Insert total valuation increase and labour days
portfolio.property_valuation_increase = total_valuation_increase
portfolio.labour_days = labour_days
portfolio_scenario.property_valuation_increase = total_valuation_increase
portfolio_scenario.labour_days = labour_days
# Merge the updated portfolio back into the session
session.merge(portfolio)
# Merge the updated portfolio plan back into the session
session.merge(portfolio_scenario)
session.flush()

View file

@ -1,8 +1,12 @@
from sqlalchemy import insert, delete
from sqlalchemy.orm import Session
from backend.app.db.models.recommendations import Plan, Recommendation, RecommendationMaterials, PlanRecommendations
from backend.app.db.models.portfolio import PropertyModel, PropertyTargetsModel, PropertyDetailsMeter, \
PropertyDetailsEpcModel
from sqlalchemy.exc import SQLAlchemyError
from backend.app.db.models.recommendations import (
Plan, Recommendation, RecommendationMaterials, PlanRecommendations, Scenario
)
from backend.app.db.models.portfolio import (
PropertyModel, PropertyTargetsModel, PropertyDetailsMeter, PropertyDetailsEpcModel
)
def create_plan(session: Session, plan):
@ -11,12 +15,38 @@ def create_plan(session: Session, plan):
:param session: The database session
:param plan: dictionary of data representing a plan to be created
"""
try:
new_plan = Plan(**plan)
session.add(new_plan)
session.flush()
session.commit()
return new_plan.id
except SQLAlchemyError as e:
session.rollback()
raise e
new_plan = Plan(**plan)
session.add(new_plan)
session.flush()
return new_plan.id
def create_scenario(session: Session, scenario):
"""
This function will create a record for the scenario in the database if it does not exist.
:param session: The database session
:param scenario: dictionary of data representing a scenario to be created
"""
try:
# Before creating a new scenario, we check if there is a scenario for this portfolio id already
# If there is, it means that any new scnario created will NOT be the default scenario
existing_scenario = session.query(Scenario).filter_by(portfolio_id=scenario["portfolio_id"]).first()
scenario["is_default"] = True if not existing_scenario else False
new_scenario = Scenario(**scenario)
session.add(new_scenario)
session.flush()
session.commit()
return new_scenario
except SQLAlchemyError as e:
session.rollback()
raise e
def create_recommendation(session: Session, recommendation):
@ -25,12 +55,15 @@ def create_recommendation(session: Session, recommendation):
:param session: The database session
:param recommendation: dictionary of data representing a recommendation to be created
"""
new_recommendation = Recommendation(**recommendation)
session.add(new_recommendation)
session.flush()
return new_recommendation.id
try:
new_recommendation = Recommendation(**recommendation)
session.add(new_recommendation)
session.flush()
session.commit()
return new_recommendation.id
except SQLAlchemyError as e:
session.rollback()
raise e
def create_recommendation_material(session: Session, recommendation_id, material_id, depth):
@ -68,62 +101,68 @@ def create_plan_recommendations(session: Session, plan_id, recommendation_ids):
session.execute(insert(PlanRecommendations).values(data))
def upload_recommendations(session: Session, recommendations_to_upload, property_id):
# Prepare data for bulk insert for Recommendation
recommendations_data = [
{
"property_id": property_id,
"type": rec["type"],
"description": rec["description"],
"estimated_cost": rec["total"],
"default": rec["default"],
"starting_u_value": rec.get("starting_u_value"),
"new_u_value": rec.get("new_u_value"),
"sap_points": rec["sap_points"],
"energy_savings": rec["heat_demand"],
"kwh_savings": rec["kwh_savings"],
"co2_equivalent_savings": rec["co2_equivalent_savings"],
"total_work_hours": rec["labour_hours"],
"energy_cost_savings": rec["energy_cost_savings"],
"labour_days": rec["labour_days"],
"already_installed": rec["already_installed"],
}
for rec in recommendations_to_upload
]
def upload_recommendations(session: Session, recommendations_to_upload, property_id, new_plan_id):
try:
# Prepare data for bulk insert for Recommendation
recommendations_data = [
{
"property_id": property_id,
"type": rec["type"],
"description": rec["description"],
"estimated_cost": rec["total"],
"default": rec["default"],
"starting_u_value": rec.get("starting_u_value"),
"new_u_value": rec.get("new_u_value"),
"sap_points": rec["sap_points"],
"energy_savings": rec["heat_demand"],
"kwh_savings": rec["kwh_savings"],
"co2_equivalent_savings": rec["co2_equivalent_savings"],
"total_work_hours": rec["labour_hours"],
"energy_cost_savings": rec["energy_cost_savings"],
"labour_days": rec["labour_days"],
"already_installed": rec["already_installed"],
}
for rec in recommendations_to_upload
]
session.bulk_insert_mappings(Recommendation, recommendations_data)
# Insert the recommendations, get back the IDs
stmt = insert(Recommendation).returning(Recommendation.id).values(recommendations_data)
result = session.execute(stmt)
uploaded_recommendation_ids = [row[0] for row in result]
# To get the IDs of the newly inserted recommendations, we need to flush the session
session.flush()
# Prepare data for bulk insert for RecommendationMaterials
recommendation_materials_data = [
{
"recommendation_id": recommendation_id,
"material_id": part["id"],
"depth": int(part["depth"]) if part["depth"] else None,
"quantity": part["quantity"],
"quantity_unit": part["quantity_unit"],
"estimated_cost": part["total"],
}
for rec, recommendation_id in zip(recommendations_to_upload, uploaded_recommendation_ids)
for part in rec["parts"]
]
# Map the uploaded_recommendation_ids with the original data for reference
uploaded_recommendation_ids = [rec.id for rec in session.query(Recommendation).filter(
Recommendation.property_id == property_id,
Recommendation.description.in_([rec["description"] for rec in recommendations_to_upload])
)]
session.bulk_insert_mappings(RecommendationMaterials, recommendation_materials_data)
# Prepare data for bulk insert for RecommendationMaterials
# We can have multiple materials per recommendation. The aggregation of the materials will total the
# recommendation figures
recommendation_materials_data = [
{
"recommendation_id": recommendation_id,
"material_id": part["id"],
"depth": int(part["depth"]) if part["depth"] else None,
"quantity": part["quantity"],
"quantity_unit": part["quantity_unit"],
"estimated_cost": part["total"],
}
for rec, recommendation_id in zip(recommendations_to_upload, uploaded_recommendation_ids)
for part in rec["parts"]
]
# flush the changes to get the newly created IDs
session.flush()
session.bulk_insert_mappings(RecommendationMaterials, recommendation_materials_data)
create_plan_recommendations(
session, plan_id=new_plan_id, recommendation_ids=uploaded_recommendation_ids
)
# flush the changes to get the newly created IDs
session.flush()
# Commit the transaction
session.commit()
return uploaded_recommendation_ids
return True
except SQLAlchemyError as e:
# Rollback the transaction in case of an error
session.rollback()
print(f"An error occurred: {e}")
return False
def clear_portfolio(session: Session, portfolio_id: int):
@ -148,6 +187,9 @@ def clear_portfolio(session: Session, portfolio_id: int):
# Delete all Plans associated with the portfolio
session.execute(delete(Plan).where(Plan.portfolio_id == portfolio_id))
# Delete all Scenarios associated with the portfolio
session.execute(delete(Scenario).where(Scenario.portfolio_id == portfolio_id))
# Delete all Recommendations associated with the properties
session.execute(delete(Recommendation).where(Recommendation.property_id.in_(property_ids)))

View file

@ -0,0 +1,207 @@
from sqlalchemy import Column, Integer, BigInteger, Text, Float, DateTime, Boolean, Date, ForeignKey
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.dialects.postgresql import ENUM as PgEnum
import enum
from datetime import datetime
Base = declarative_base()
class EnergyAssessment(Base):
__tablename__ = 'energy_assessments'
id = Column(BigInteger, primary_key=True, autoincrement=True)
uprn = Column(BigInteger, nullable=False)
uprn_source = Column(Text, nullable=False)
property_type = Column(Text, nullable=False)
building_reference_number = Column(Text)
current_energy_efficiency = Column(Text, nullable=False)
current_energy_rating = Column(Text, nullable=False)
address1 = Column(Text, nullable=False)
address2 = Column(Text, nullable=False)
address3 = Column(Text)
posttown = Column(Text, nullable=False)
postcode = Column(Text, nullable=False)
address = Column(Text, nullable=False)
county = Column(Text)
constituency = Column(Text)
constituency_label = Column(Text)
low_energy_fixed_light_count = Column(Text, nullable=False)
construction_age_band = Column(Text, nullable=False)
mainheat_energy_eff = Column(Text, nullable=False)
windows_env_eff = Column(Text, nullable=False)
lighting_energy_eff = Column(Text, nullable=False)
environment_impact_potential = Column(Text, nullable=False)
mainheatcont_description = Column(Text, nullable=False)
sheating_energy_eff = Column(Text, nullable=False)
local_authority = Column(Text, nullable=False)
local_authority_label = Column(Text, nullable=False)
fixed_lighting_outlets_count = Column(Text, nullable=False)
energy_tariff = Column(Text, nullable=False)
mechanical_ventilation = Column(Text, nullable=False)
solar_water_heating_flag = Column(Text, nullable=False)
co2_emissions_potential = Column(Text, nullable=False)
number_heated_rooms = Column(Text, nullable=False)
floor_description = Column(Text, nullable=False)
energy_consumption_potential = Column(Text, nullable=False)
built_form = Column(Text, nullable=False)
number_open_fireplaces = Column(Text, nullable=False)
windows_description = Column(Text, nullable=False)
glazed_area = Column(Text, nullable=False)
inspection_date = Column(DateTime(timezone=True), nullable=False)
mains_gas_flag = Column(Text, nullable=False)
co2_emiss_curr_per_floor_area = Column(Text, nullable=False)
heat_loss_corridor = Column(Text, nullable=False)
unheated_corridor_length = Column(Text)
flat_storey_count = Column(Text)
roof_energy_eff = Column(Text, nullable=False)
total_floor_area = Column(Text, nullable=False)
environment_impact_current = Column(Text, nullable=False)
roof_description = Column(Text, nullable=False)
floor_energy_eff = Column(Text, nullable=False)
number_habitable_rooms = Column(Text, nullable=False)
hot_water_env_eff = Column(Text, nullable=False)
mainheatc_energy_eff = Column(Text, nullable=False)
main_fuel = Column(Text, nullable=False)
lighting_env_eff = Column(Text, nullable=False)
windows_energy_eff = Column(Text, nullable=False)
floor_env_eff = Column(Text, nullable=False)
sheating_env_eff = Column(Text, nullable=False)
lighting_description = Column(Text, nullable=False)
roof_env_eff = Column(Text, nullable=False)
walls_energy_eff = Column(Text, nullable=False)
photo_supply = Column(Text, nullable=False)
lighting_cost_potential = Column(Text, nullable=False)
mainheat_env_eff = Column(Text, nullable=False)
multi_glaze_proportion = Column(Text, nullable=False)
main_heating_controls = Column(Text, nullable=False)
flat_top_storey = Column(Text)
secondheat_description = Column(Text, nullable=False)
walls_env_eff = Column(Text, nullable=False)
transaction_type = Column(Text, nullable=False)
extension_count = Column(Text, nullable=False)
mainheatc_env_eff = Column(Text, nullable=False)
lmk_key = Column(Text)
wind_turbine_count = Column(Text, nullable=False)
tenure = Column(Text, nullable=False)
floor_level = Column(Text, nullable=False)
potential_energy_efficiency = Column(Text, nullable=False)
potential_energy_rating = Column(Text, nullable=False)
hot_water_energy_eff = Column(Text, nullable=False)
low_energy_lighting = Column(Text, nullable=False)
walls_description = Column(Text, nullable=False)
hotwater_description = Column(Text, nullable=False)
co2_emissions_current = Column(Text, nullable=False)
heating_cost_current = Column(Text, nullable=False)
heating_cost_potential = Column(Text, nullable=False)
hot_water_cost_current = Column(Text, nullable=False)
hot_water_cost_potential = Column(Text, nullable=False)
lighting_cost_current = Column(Text, nullable=False)
energy_consumption_current = Column(Text, nullable=False)
lodgement_date = Column(Date, nullable=False)
lodgement_datetime = Column(DateTime(timezone=False), nullable=False)
mainheat_description = Column(Text, nullable=False)
floor_height = Column(Float, nullable=False)
glazed_type = Column(Text, nullable=False)
file_location = Column(Text, nullable=False)
surveyor_name = Column(Text, nullable=False)
surveyor_company = Column(Text, nullable=False)
space_heating_kwh = Column(Text, nullable=False)
water_heating_kwh = Column(Text, nullable=False)
number_of_doors = Column(Integer, nullable=False)
number_of_insulated_doors = Column(Integer, nullable=False)
number_of_floors = Column(Integer, nullable=False)
insulation_wall_area = Column(Float, nullable=False)
heat_loss_perimeter = Column(Float, nullable=False)
party_wall_length = Column(Float, nullable=False)
perimeter = Column(Float, nullable=False)
rooms_with_bath_and_or_shower = Column(Integer)
rooms_with_mixer_shower_no_bath = Column(Integer)
room_with_bath_and_mixer_shower = Column(Integer)
percent_draftproofed = Column(Integer)
has_hot_water_cylinder = Column(Boolean)
cylinder_insulation_type = Column(Text)
cylinder_insulation_thickness = Column(Integer)
cylinder_thermostat = Column(Boolean)
main_dwelling_ground_floor_area = Column(Float)
number_of_windows = Column(Integer)
windows_area = Column(Float)
EPC_KEYS = [
'low_energy_fixed_light_count', 'address', 'uprn_source', 'floor_height', 'heating_cost_potential',
'unheated_corridor_length', 'hot_water_cost_potential', 'construction_age_band', 'potential_energy_rating',
'mainheat_energy_eff', 'windows_env_eff', 'lighting_energy_eff', 'environment_impact_potential', 'glazed_type',
'heating_cost_current', 'address3', 'mainheatcont_description', 'sheating_energy_eff', 'property_type',
'local_authority_label', 'fixed_lighting_outlets_count', 'energy_tariff', 'mechanical_ventilation',
'hot_water_cost_current', 'county', 'postcode', 'solar_water_heating_flag', 'constituency',
'co2_emissions_potential', 'number_heated_rooms', 'floor_description', 'energy_consumption_potential',
'local_authority', 'built_form', 'number_open_fireplaces', 'windows_description', 'glazed_area',
'inspection_date', 'mains_gas_flag', 'co2_emiss_curr_per_floor_area', 'address1', 'heat_loss_corridor',
'flat_storey_count', 'constituency_label', 'roof_energy_eff', 'total_floor_area', 'building_reference_number',
'environment_impact_current', 'co2_emissions_current', 'roof_description', 'floor_energy_eff',
'number_habitable_rooms', 'address2', 'hot_water_env_eff', 'posttown', 'mainheatc_energy_eff', 'main_fuel',
'lighting_env_eff', 'windows_energy_eff', 'floor_env_eff', 'sheating_env_eff', 'lighting_description',
'roof_env_eff', 'walls_energy_eff', 'photo_supply', 'lighting_cost_potential', 'mainheat_env_eff',
'multi_glaze_proportion', 'main_heating_controls', 'lodgement_datetime', 'flat_top_storey',
'current_energy_rating', 'secondheat_description', 'walls_env_eff', 'transaction_type', 'uprn',
'current_energy_efficiency', 'energy_consumption_current', 'mainheat_description', 'lighting_cost_current',
'lodgement_date', 'extension_count', 'mainheatc_env_eff', 'lmk_key', 'wind_turbine_count', 'tenure',
'floor_level', 'potential_energy_efficiency', 'hot_water_energy_eff', 'low_energy_lighting',
'walls_description', 'hotwater_description'
]
def to_dict(self):
"""
Convert the SQLAlchemy object to a dictionary.
"""
epc = {key.replace("_", "-"): getattr(self, key) for key in self.EPC_KEYS}
# Get everything else
condition = {
column.name: getattr(self, column.name)
for column in self.__table__.columns if column.name not in self.EPC_KEYS
}
return {"epc": epc, "condition": condition}
@staticmethod
def empty_response():
return {"epc": {}, "condition": {}}
class EnergyAssessmentScenarios(Base):
__tablename__ = 'energy_assessment_scenarios'
id = Column(BigInteger, primary_key=True, autoincrement=True)
scenario_name = Column(Text, nullable=False)
energy_assessment_id = Column(BigInteger, ForeignKey('energy_assessments.id'), nullable=False)
class DocumentTypeEnum(enum.Enum):
EPR = "EPR"
ConditionReport = "Condition Report"
EvidenceReport = "Evidence Report"
SummaryInformation = "Summary Information"
FloorPlan = "Floor Plan"
ScenarioDraftEPC = "Scenario Draft EPC"
ScenarioSiteNotes = "Scenario Site Notes"
class EnergyAssessmentDocuments(Base):
__tablename__ = 'energy_assessment_documents'
id = Column(BigInteger, primary_key=True, autoincrement=True)
uprn = Column(BigInteger, nullable=False)
energy_assessment_id = Column(BigInteger, ForeignKey('energy_assessments.id'), nullable=False)
document_type = Column(PgEnum(DocumentTypeEnum, name="document_type", create_type=False), nullable=False)
document_location = Column(Text, nullable=False)
uploaded_at = Column(DateTime(timezone=True), nullable=False, default=datetime.utcnow)
scenario_id = Column(BigInteger, ForeignKey('energy_assessment_scenarios.id'), nullable=True)
@staticmethod
def empty_response():
return {
"id": None,
"uprn": None,
"document_type": None,
"document_location": None,
"uploaded_at": None,
"scenario_id": None
}

View file

@ -3,6 +3,7 @@ import pytz
import datetime
from sqlalchemy import Column, Integer, Text, Boolean, Float, DateTime, Enum, ForeignKey, CheckConstraint
from sqlalchemy.ext.declarative import declarative_base
from backend.app.db.models.users import UserModel # noqa
Base = declarative_base()
@ -168,7 +169,8 @@ class PropertyDetailsEpcModel(Base):
energy_tariff = Column(Text)
primary_energy_consumption = Column(Float)
co2_emissions = Column(Float)
adjusted_energy_consumption = Column(Float)
current_energy_demand = Column(Float)
current_energy_demand_heating_hotwater = Column(Float)
estimated = Column(Boolean, default=False)
@ -204,3 +206,13 @@ class PropertyTargetsModel(Base):
created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
epc = Column(Enum(Epc))
heat_demand = Column(Text)
class PortfolioUsers(Base):
__tablename__ = "portfolioUsers"
id = Column(Integer, primary_key=True, autoincrement=True)
user_id = Column(Integer, ForeignKey('user.id'), nullable=False)
portfolioId = Column(Integer, ForeignKey('portfolio.id'), nullable=False)
role = Column(Text, nullable=False)
created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))

View file

@ -50,8 +50,10 @@ class Plan(Base):
__tablename__ = 'plan'
id = Column(BigInteger, primary_key=True, autoincrement=True)
name = Column(String, nullable=True, default="")
portfolio_id = Column(BigInteger, ForeignKey(Portfolio.id), nullable=False)
property_id = Column(BigInteger, ForeignKey(PropertyModel.id), nullable=False)
scenario_id = Column(BigInteger, ForeignKey('scenario.id')) # Doesn't have to be linked to a scenario
created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
is_default = Column(Boolean, nullable=False)
valuation_increase_lower_bound = Column(Float)
@ -65,3 +67,46 @@ class PlanRecommendations(Base):
id = Column(BigInteger, primary_key=True, autoincrement=True)
plan_id = Column(BigInteger, ForeignKey('plan.id'), nullable=False)
recommendation_id = Column(BigInteger, ForeignKey('recommendation.id'), nullable=False)
class Scenario(Base):
__tablename__ = 'scenario'
id = Column(BigInteger, primary_key=True, autoincrement=True)
name = Column(String, nullable=False)
created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
budget = Column(Float)
portfolio_id = Column(BigInteger, ForeignKey(Portfolio.id), nullable=False)
housing_type = Column(String, nullable=False)
goal = Column(String, nullable=False)
trigger_file_path = Column(String, nullable=False)
already_installed_file_path = Column(String)
patches_file_path = Column(String)
non_invasive_recommendations_file_path = Column(String)
exclusions = Column(String)
multi_plan = Column(Boolean, default=False)
is_default = Column(Boolean, default=False, nullable=False)
# Add in the fields we need, which were previously sitting at the portfolio level
cost = Column(Float)
total_work_hours = Column(Float)
energy_savings = Column(Float)
co2_equivalent_savings = Column(Float)
energy_cost_savings = Column(Float)
epc_breakdown_pre_retrofit = Column(String)
epc_breakdown_post_retrofit = Column(String)
number_of_properties = Column(BigInteger)
n_units_to_retrofit = Column(BigInteger)
co2_per_unit_pre_retrofit = Column(String)
co2_per_unit_post_retrofit = Column(String)
energy_bill_per_unit_pre_retrofit = Column(String)
energy_bill_per_unit_post_retrofit = Column(String)
energy_consumption_per_unit_pre_retrofit = Column(String)
energy_consumption_per_unit_post_retrofit = Column(String)
valuation_improvement_per_unit = Column(String)
cost_per_unit = Column(String)
cost_per_co2_saved = Column(String)
cost_per_sap_point = Column(String)
valuation_return_on_investment = Column(String)
property_valuation_increase = Column(Float)
labour_days = Column(Float)

View file

@ -0,0 +1,273 @@
import os
from io import BytesIO
from typing import List
from fastapi import APIRouter, Depends
from starlette.responses import Response
from backend.app.config import get_settings
from backend.app.dependencies import validate_token
from backend.app.energy_assessments.schemas import EnergyAssessmentUploadPayload
from sqlalchemy.orm import sessionmaker
from sqlalchemy.exc import IntegrityError, OperationalError
from backend.app.db.connection import db_engine
from backend.app.db.functions.energy_assessment_functions import (
bulk_insert_energy_assessments, create_scenarios_for_documents, create_documents
)
from etl.xml_survey_extraction.XmlParser import XmlParser
from utils.s3 import (
read_from_s3, list_files_and_subfolders_in_s3_folder, list_xmls_in_s3_folder, save_csv_to_s3,
list_files_in_s3_folder
)
from utils.logger import setup_logger
logger = setup_logger()
def insert_energy_assessment_documents(document_list: List[dict], uprn_to_assessment_id: dict):
"""
Inserts or updates energy assessment documents, assigning the correct energy_assessment_id.
:param document_list: A list of dictionaries containing document data.
:param uprn_to_assessment_id: A dictionary mapping UPRN to energy_assessment_id.
"""
for document in document_list:
uprn = document['uprn']
# Assign the energy_assessment_id based on uprn
energy_assessment_id = uprn_to_assessment_id.get(uprn)
if not energy_assessment_id:
logger.info(f"No energy_assessment_id found for UPRN: {uprn}. Skipping document.")
continue
# Attach energy_assessment_id to each document
document['energy_assessment_id'] = energy_assessment_id
logger.info("Energy Assessment IDs assigned to documents.")
router = APIRouter(
prefix="/energy-assessments",
tags=["energy-assessments"],
dependencies=[Depends(validate_token)],
responses={404: {"description": "Not found"}}
)
@router.post("/upload")
async def upload(body: EnergyAssessmentUploadPayload):
"""
Given a location in S3, this service will retrieve the data in s3 and perform the following:
1) Extract the data and store it to the data
2) Extract the links to other artefacts collected during the energy assessment, such as EPRs, floor plans and
condition reports
This will allow us to do the following:
1) Present the findings of the energy assessment to the client
2) Allow the end use to download the artefacts collected during the energy assessment
Eventually, we will this service to collect the key documents from the service where they're uploaded
(e.g. Onedrive) and store them to S3, but for the moment, this is sufficient
"""
logger.info("Connecting to db")
session = sessionmaker(bind=db_engine)()
try:
logger.info("Extracting energy assessment data")
energy_assessments = list_files_and_subfolders_in_s3_folder(
bucket_name=get_settings().ENERGY_ASSESSMENTS_BUCKET,
folder_name=f"{body.surveyor}/{body.project_code}/"
)
logger.info(
f"Found {len(energy_assessments)} energy assessments for {body.surveyor} and {body.project_code}"
)
assessments_map = {}
for assessment in energy_assessments:
uploaded_xmls = list_xmls_in_s3_folder(
bucket_name=get_settings().ENERGY_ASSESSMENTS_BUCKET,
folder_name=os.path.join(assessment, "docs & plans")
)
energy_assessment_files = list_files_in_s3_folder(
bucket_name=get_settings().ENERGY_ASSESSMENTS_BUCKET,
folder_name=os.path.join(assessment, "docs & plans")
)
# Remove xmls from the list of files
energy_assessment_files = [file for file in energy_assessment_files if file not in uploaded_xmls]
# We now split this into the different types of files
# EPR
eprs = [
file for file in energy_assessment_files if "epr.pdf" in file.split("/")[-1].replace(" ", "").lower()
]
# Condition report
condition_reports = [
file for file in energy_assessment_files if "cr.pdf" in file.split("/")[-1].replace(" ", "").lower()
]
# Evidence report
evidence_reports = [
file for file in energy_assessment_files
if "evidence.pdf" in file.split("/")[-1].replace(" ", "").lower()
]
# Summary report
summary_reports = [
file for file in energy_assessment_files
if "sn.pdf" in file.split("/")[-1].replace(" ", "").lower()
]
# Floor plans - these are just the jpgs
floor_plans = [file for file in energy_assessment_files if file.endswith(".jpg")]
# We now retrieve scenarios
scenario_folders = list_files_and_subfolders_in_s3_folder(
bucket_name=get_settings().ENERGY_ASSESSMENTS_BUCKET,
folder_name=assessment
)
# filter folders that contain the word scenario
scenario_folders = [
folder for folder in scenario_folders if "scenario" in folder.rstrip("/").split("/")[-1].lower()
]
scenario_documents = []
for sf in scenario_folders:
scenario_files = list_files_in_s3_folder(
bucket_name=get_settings().ENERGY_ASSESSMENTS_BUCKET,
folder_name=sf
)
notes = [
file for file in scenario_files if "sitenotes" in file.split("/")[-1].replace(" ", "").lower()
]
# This should be the leftovers
draft_epc = [file for file in scenario_files if file not in notes]
scenario_documents.append(
{
"identifier": sf.rstrip("/").split("/")[-1],
"Scenario Site Notes": notes,
"Scenario Draft EPC": draft_epc
}
)
uprn = int(assessment.rstrip("/").split("/")[-1])
assessments_map[uprn] = {
"xmls": uploaded_xmls,
"EPR": eprs,
"Condition Report": condition_reports,
"Evidence Report": evidence_reports,
"Summary Information": summary_reports,
"Floor Plan": floor_plans,
"scenario_documents": scenario_documents
}
logger.info("Extracted energy assessment data and storing file locations to database")
xml_data_to_store = []
energy_assessment_documents = []
for uprn, files in assessments_map.items():
# Create the rows of data to insert into the energy assessment documents
property_ea_docs = []
for doc_type, doc_files in files.items():
if doc_type == "xmls":
continue
if doc_type == "scenario_documents":
for doc in doc_files:
# This scenario id is put in as a placeholder means os associating the scenario documents with
# the correct scenario
scenario_id = doc["identifier"]
for sn in doc["Scenario Site Notes"]:
property_ea_docs.append(
{
"uprn": uprn,
"document_type": "Scenario Site Notes",
"document_location": sn,
"scenario_id": scenario_id
}
)
for d_epc in doc["Scenario Draft EPC"]:
property_ea_docs.append(
{
"uprn": uprn,
"document_type": "Scenario Draft EPC",
"document_location": d_epc,
"scenario_id": scenario_id
}
)
continue
for doc in doc_files:
property_ea_docs.append(
{
"uprn": uprn,
"document_type": doc_type,
"document_location": doc,
"scenario_id": None
}
)
energy_assessment_documents.extend(property_ea_docs)
xmls = files["xmls"]
extracted_data = {}
for xml in xmls:
xml_data = read_from_s3(bucket_name=get_settings().ENERGY_ASSESSMENTS_BUCKET, s3_file_name=xml)
xml_data_io = BytesIO(xml_data)
xml_parser = XmlParser(
file=xml_data_io,
filekey=os.path.join(f"s3://{get_settings().ENERGY_ASSESSMENTS_BUCKET}", xml),
uprn=uprn,
surveyor_company=body.surveyor,
)
xml_parser.run()
if xml_parser.is_lig:
logger.info(f"Extracted data from {xml}")
extracted_epc = xml_parser.epc
extracted_additional_data = xml_parser.additional_data
data_to_update = {
**extracted_epc, **extracted_additional_data
}
# We need to update the keys to match the database schema - i.e. we should replace all hyphens with
# underscores
data_to_update = {k.replace("-", "_"): v for k, v in data_to_update.items()}
extracted_data.update(data_to_update)
xml_data_to_store.append(extracted_data)
logger.info("Storing energy assessment xml data to database")
uprn_to_assessment_id = bulk_insert_energy_assessments(session, xml_data_to_store)
# Insert energy assessment id into the documents data
insert_energy_assessment_documents(energy_assessment_documents, uprn_to_assessment_id)
create_scenarios_for_documents(session, energy_assessment_documents, uprn_to_assessment_id)
create_documents(session, energy_assessment_documents)
session.close()
except IntegrityError:
logger.error("Database integrity error occurred", exc_info=True)
session.rollback()
return Response(status_code=500, content="Database integrity error.")
except OperationalError:
logger.error("Database operational error occurred", exc_info=True)
session.rollback()
return Response(status_code=500, content="Database operational error.")
except ValueError:
logger.error("Value error - possibly due to malformed data", exc_info=True)
session.rollback()
return Response(status_code=400, content="Bad request: malformed data.")
except Exception as e: # General exception handling
logger.error(f"An error occurred: {e}")
session.rollback()
return Response(status_code=500, content="An unexpected error occurred.")
finally:
session.close()
return Response(status_code=200)

View file

@ -0,0 +1,10 @@
from pydantic import BaseModel
class EnergyAssessmentUploadPayload(BaseModel):
portfolio_id: int
# This is the energy assessment company/individual that conducted the energy assessment, where the data is uploaded
# against
surveyor: str
# is a code, like VEC001, which is used to identify the project and also where the data is uploaded against
project_code: str

View file

@ -10,6 +10,7 @@ from sqlalchemy.exc import IntegrityError, OperationalError
from sqlalchemy.orm import sessionmaker
from starlette.responses import Response
import backend.app.assumptions as assumptions
from backend.app.config import get_settings, get_prediction_buckets
from backend.app.db.connection import db_engine
from backend.app.db.functions.materials_functions import get_materials
@ -19,8 +20,9 @@ from backend.app.db.functions.property_functions import (
update_or_create_property_spatial_details
)
from backend.app.db.functions.recommendations_functions import (
create_plan, create_plan_recommendations, upload_recommendations
create_plan, upload_recommendations, create_scenario
)
from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn
from backend.app.db.models.portfolio import rating_lookup
from backend.app.dependencies import validate_token
from backend.app.plan.schemas import PlanTriggerRequest, MdsRequest
@ -28,9 +30,9 @@ from backend.app.plan.utils import get_cleaned
from backend.app.utils import epc_to_sap_lower_bound, sap_to_epc
from backend.ml_models.api import ModelApi
from backend.ml_models.AnnualBillSavings import AnnualBillSavings
from backend.Property import Property
from backend.apis.GoogleSolarApi import GoogleSolarApi
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
from recommendations.optimiser.CostOptimiser import CostOptimiser
from recommendations.optimiser.GainOptimiser import GainOptimiser
@ -40,7 +42,11 @@ from recommendations.Mds import Mds
from utils.logger import setup_logger
from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3
from backend.ml_models.Valuation import PropertyValuation
from etl.bill_savings.EnergyConsumptionModel import EnergyConsumptionModel
from etl.bill_savings.KwhData import KwhData
from etl.spatial.OpenUprnClient import OpenUprnClient
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
logger = setup_logger()
@ -127,8 +133,8 @@ def extract_portfolio_aggregation_data(
[r["energy_cost_savings"] for r in default_recommendations]
)
pre_retrofit_energy_consumption = p.current_adjusted_energy
post_retrofit_energy_consumption = p.current_adjusted_energy - sum(
pre_retrofit_energy_consumption = p.current_energy_consumption
post_retrofit_energy_consumption = p.current_energy_consumption - sum(
[r["kwh_savings"] for r in default_recommendations]
)
@ -219,6 +225,142 @@ def extract_portfolio_aggregation_data(
return aggregation_data
def create_epc_records(epc_searcher: SearchEpc, energy_assessment: dict):
"""
This function will set up with epc_records dictionary with the newest EPC, the full SAP EPC and the older EPCs
and will factor in an energy assessment that we have performed for a client.
:param epc_searcher: An instance of the SearchEpc class
:param energy_assessment: The energy assessment we have performed. If we have not performed an energy assessment,
this should be an empty response as defined by the models's
EnergyAssessment.empty_response() method
"""
if not energy_assessment["epc"]:
energy_assessment_is_newer = False
return {
'original_epc': epc_searcher.newest_epc.copy(),
'full_sap_epc': epc_searcher.full_sap_epc.copy(),
'old_data': epc_searcher.older_epcs.copy(),
}, energy_assessment_is_newer
epc = energy_assessment["epc"]
energy_assessment_date = epc["inspection-date"].strftime("%Y-%m-%d")
# We insert county into the epc, since right now this isn't something that we pull out from the energy
# assessment
for col in ["county", "constituency", "constituency-label", "local-authority", "local-authority-label"]:
epc[col] = epc_searcher.newest_epc[col]
# We check if the energy assessment is newer than the newest EPC
if pd.to_datetime(energy_assessment_date) > pd.to_datetime(epc_searcher.newest_epc["inspection-date"]):
# In this case, our energy assessment is newer than the EPCs available for this property
energy_assessment_is_newer = True
return {
"original_epc": epc,
"full_sap_epc": epc_searcher.full_sap_epc.copy(),
"old_data": epc_searcher.older_epcs.copy() + [epc_searcher.newest_epc.copy()]
}, energy_assessment_is_newer
# We check if the EPC we have produced is contained in the set of EPCs done for the property
# We do this based on inspection-date and SAP
epc_in_historicals = [
x for x in epc_searcher.older_epcs + [epc_searcher.newest_epc]
if x["inspection-date"] == energy_assessment_date and
x["current-energy-efficiency"] == epc["current-energy-efficiency"]
]
energy_assessment_is_newer = False
if epc_in_historicals:
# Then the EPC we have produced is already in the set of EPCs, and our EPC is older than the newest
return {
"original_epc": epc_searcher.newest_epc.copy(),
"full_sap_epc": epc_searcher.full_sap_epc.copy(),
"old_data": epc_searcher.older_epcs.copy()
}, energy_assessment_is_newer
# In this case, our EPC is older than the newest publically avaible one, but is not contained in
# the historicals, so it can't have been lodged, so we include it in the old data
return {
'original_epc': epc_searcher.newest_epc.copy(),
'full_sap_epc': epc_searcher.full_sap_epc.copy(),
'old_data': epc_searcher.older_epcs.copy() + [epc],
}, energy_assessment_is_newer
def get_on_site_data(body: PlanTriggerRequest):
"""
This function will read in the on-site data from the S3 bucket
:param body: The request body
:return:
"""
patches = []
if body.patches_file_path:
patches = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.patches_file_path)
already_installed = []
if body.already_installed_file_path:
already_installed = read_csv_from_s3(
bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.already_installed_file_path
)
non_invasive_recommendations = []
if body.non_invasive_recommendations_file_path:
non_invasive_recommendations = read_csv_from_s3(
bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.non_invasive_recommendations_file_path
)
return patches, already_installed, non_invasive_recommendations
def extract_property_on_site_recommendations(config, patches, already_installed, non_invasive_recommendations, uprn):
patch_has_uprn = "uprn" in patches[0] if patches else True
if patch_has_uprn:
patch = next((
x for x in patches if str(x["uprn"]) == str(config["uprn"])
), {})
else:
patch = next((
x for x in patches if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
), {})
property_already_installed = next((
x for x in already_installed if
(x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
), {})
# Because we have some non-invasive recommendations that match on address and postcode, but not UPRN
# we need to check existence of uprn
has_uprn = "uprn" in non_invasive_recommendations[0] if non_invasive_recommendations else True
if has_uprn:
property_non_invasive_recommendations = next((
x for x in non_invasive_recommendations if
(str(x["uprn"]) == str(uprn))
), {})
# We patch the non-invasive recs that are ['cavity_extract_and_refill']
else:
property_non_invasive_recommendations = next((
x for x in non_invasive_recommendations if
(x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
), {})
if isinstance(property_non_invasive_recommendations.get("recommendations"), str):
import ast
property_non_invasive_recommendations["recommendations"] = ast.literal_eval(
property_non_invasive_recommendations["recommendations"]
)
transformed = []
for rec in property_non_invasive_recommendations["recommendations"]:
if isinstance(rec, str):
transformed.append({"type": rec, })
else:
transformed.append(rec)
property_non_invasive_recommendations["recommendations"] = str(transformed)
return patch, property_already_installed, property_non_invasive_recommendations
router = APIRouter(
prefix="/plan",
tags=["plan"],
@ -233,9 +375,6 @@ async def trigger_plan(body: PlanTriggerRequest):
session = sessionmaker(bind=db_engine)()
created_at = datetime.now().isoformat()
# TODO: We should store the trigger file path in the database with the plan so we can track the file that
# triggered the plan
# TODO: if the measure is already installed, it should actually be the very first phase
try:
@ -243,21 +382,7 @@ async def trigger_plan(body: PlanTriggerRequest):
logger.info("Getting the inputs")
plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path)
# If we have patches or overrides, we should read them in here
patches = []
if body.patches_file_path:
patches = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.patches_file_path)
already_installed = []
if body.already_installed_file_path:
already_installed = read_csv_from_s3(
bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.already_installed_file_path
)
non_invasive_recommendations = []
if body.non_invasive_recommendations_file_path:
non_invasive_recommendations = read_csv_from_s3(
bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.non_invasive_recommendations_file_path
)
patches, already_installed, non_invasive_recommendations = get_on_site_data(body)
cleaning_data = read_dataframe_from_s3_parquet(
bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
@ -281,30 +406,40 @@ async def trigger_plan(body: PlanTriggerRequest):
epc_searcher.ordnance_survey_client.property_type = config.get("property_type", None)
# For the moment, our OS API access is unavailable, so we skip and interpolate
epc_searcher.find_property(skip_os=True)
# We check for an energy assessment we have performed on this property:
energy_assessment = get_latest_assessment_by_uprn(session, uprn if uprn is not None else epc_searcher.uprn)
# Create a record in db
property_id, is_new = create_property(
session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
)
if not is_new:
if not is_new and not body.multi_plan:
continue
create_property_targets(
session,
property_id=property_id,
portfolio_id=body.portfolio_id,
epc_target=body.goal_value,
heat_demand_target=None
if is_new:
create_property_targets(
session,
property_id=property_id,
portfolio_id=body.portfolio_id,
epc_target=body.goal_value,
heat_demand_target=None
)
# If we have an energy assessment in place, that is newer than all of the previous EPCs, we use that.
# Otherwise, we use the newest EPC
# energy_assessment_is_newer will tell us if the energy assessment is newer than the newest EPC that
# has been publically lodged
epc_records, energy_assessment["energy_assessment_is_newer"] = create_epc_records(
epc_searcher, energy_assessment
)
epc_records = {
'original_epc': epc_searcher.newest_epc.copy(),
'full_sap_epc': epc_searcher.full_sap_epc.copy(),
'old_data': epc_searcher.older_epcs.copy(),
}
patch, property_already_installed, property_non_invasive_recommendations = (
extract_property_on_site_recommendations(
config, patches, already_installed, non_invasive_recommendations, uprn
)
)
patch = next((
x for x in patches if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
), {})
epc_records = patch_epc(patch, epc_records)
prepared_epc = EPCRecord(
@ -313,25 +448,17 @@ async def trigger_plan(body: PlanTriggerRequest):
cleaning_data=cleaning_data
)
property_already_installed = next((
x for x in already_installed if
(x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
), {})
property_non_invasive_recommendations = next((
x for x in non_invasive_recommendations if
(x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
), {})
input_properties.append(
Property(
id=property_id,
is_new=is_new,
address=epc_searcher.address_clean,
postcode=epc_searcher.postcode_clean,
epc_record=prepared_epc,
already_installed=property_already_installed,
non_invasive_recommendations=property_non_invasive_recommendations,
**Property.extract_kwargs(config)
energy_assessment=energy_assessment,
**Property.extract_kwargs(config), # TODO: Depraecate this
)
)
@ -345,12 +472,6 @@ async def trigger_plan(body: PlanTriggerRequest):
materials = get_materials(session)
cleaned = get_cleaned()
uprn_filenames = read_dataframe_from_s3_parquet(
bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
)
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket=get_settings().DATA_BUCKET)
solar_api_client = GoogleSolarApi(api_key=get_settings().GOOGLE_SOLAR_API_KEY)
dataset_version = "2024-07-08"
energy_consumption_client = EnergyConsumptionModel(
model_paths={
@ -363,27 +484,75 @@ async def trigger_plan(body: PlanTriggerRequest):
environment=get_settings().ENVIRONMENT
)
logger.info("Getting spatial data")
for p in input_properties:
p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds, energy_consumption_client)
p.get_spatial_data(uprn_filenames)
kwh_client = KwhData(bucket=get_settings().DATA_BUCKET, read_consumption_data=True)
model_api = ModelApi(
portfolio_id=body.portfolio_id,
timestamp=created_at,
prediction_buckets=get_prediction_buckets()
)
epcs_for_scoring = kwh_client.transform(data=kwh_client.prepare_epc(input_properties), cleaned=cleaned)
kwh_preds = model_api.paginated_predictions(
data=epcs_for_scoring,
bucket=get_settings().DATA_BUCKET,
model_prefixes=["heating_kwh_predictions", "hotwater_kwh_predictions"],
extract_ids=False,
batch_size=SCORING_BATCH_SIZE
)
# Insert the spatial data
logger.info("Getting spatial data")
input_properties = OpenUprnClient.set_spatial_data(input_properties, bucket_name=get_settings().DATA_BUCKET)
[p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=kwh_preds) for p in input_properties]
logger.info("Performing solar analysis")
# TODO: Tidy this up
# TODO: If a property is semi-detached, we might get roof surfaces for the main building + the neighbour
# TODO: If we can't get high image quality, should we use the solar API? Maybe just for semi-detached units with
# extensions, since it doesn't seem to do a great job
# TODO: For simple properties, we should do a comparison/check between the solar API's roof area and the
# basic estimate of roof area
# TODO: Handle the case of modelling some units as buildings and some as properties individually
building_ids = [
{
"building_id": p.building_id,
"longitude": p.spatial["longitude"],
"latitude": p.spatial["latitude"],
# Energy consumption is adjusted for the property's expected post retrofit state
# We set the target rating to EPC C, which is the typical EPC rating we would expect the
# property to achieve post retrofit of just the fabric
"energy_consumption": energy_consumption_client.estimate_new_consumption(
current_rating=p.data["current-energy-rating"],
target_rating=body.goal_value,
current_consumption=p.current_adjusted_energy
current_energy_efficiency=p.data["current-energy-efficiency"],
target_efficiency="69",
current_consumption=p.estimate_electrical_consumption(
assumed_ashp_efficiency=assumptions.AVERAGE_ASHP_EFFICIENCY, exclusions=body.exclusions
)
),
"property_id": p.id,
"uprn": p.uprn
} for p in input_properties if p.building_id is not None
]
individual_units = [
{
"longitude": p.spatial["longitude"],
"latitude": p.spatial["latitude"],
# Energy consumption is adjusted for the property's expected post retrofit state
# We set the target rating to EPC C, which is the typical EPC rating we would expect the
# property to achieve post retrofit of just the fabric
"energy_consumption": energy_consumption_client.estimate_new_consumption(
current_energy_efficiency=p.data["current-energy-efficiency"],
target_efficiency="69",
current_consumption=p.estimate_electrical_consumption(
assumed_ashp_efficiency=assumptions.AVERAGE_ASHP_EFFICIENCY, exclusions=body.exclusions
),
),
"property_id": p.id,
"uprn": p.uprn
} for p in input_properties if p.building_id is None
]
if building_ids:
# Find the unique longitude and latitude pairs for each building id
unique_coordinates = {}
@ -417,6 +586,7 @@ async def trigger_plan(body: PlanTriggerRequest):
energy_consumption = sum(
[entry['energy_consumption'] for entry in building_ids if entry['building_id'] == building_id]
)
solar_api_client = GoogleSolarApi(api_key=get_settings().GOOGLE_SOLAR_API_KEY)
solar_api_client.get(
longitude=coordinates["longitude"],
latitude=coordinates["latitude"],
@ -431,7 +601,8 @@ async def trigger_plan(body: PlanTriggerRequest):
}
# Store the data in the database
# TODO: Rather than just doing a straight insert, we should overwrite what's already there if it exists
# TODO: Rather than just doing a straight insert, we should overwrite what's already there if it
# exists
solar_api_client.save_to_db(
session=session, uprns_to_location=building_uprns[building_id], scenario_type="building"
)
@ -446,23 +617,67 @@ async def trigger_plan(body: PlanTriggerRequest):
energy_consumption
)
p.set_solar_panel_configuration(unit_solar_panel_configuration)
if individual_units:
# Model the solar potential at the property level
for unit in tqdm(individual_units):
else:
# # Model the solar potential at the property level
# for p in input_properties:
# # TODO: Complete me! - we probably won't do this for individual flats
# solar_performance = solar_api_client.get(
# longitude=p.spatial["longitude"], latitude=p.spatial["latitude"]
# )
print("Implement me")
# TODO: Tidy up this code
# We don't need to do this if we have global inclusions that don't include solar
if body.inclusions:
if "solar_pv" not in body.inclusions:
continue
logger.info("Getting components and epc recommendations")
property_instance = [p for p in input_properties if p.id == unit["property_id"]][0]
# At this level, we check if the property is suitable for solar and if now, skip
if not property_instance.is_solar_pv_valid():
continue
# We check if we have a solar non-invasive recommendation
if [r for r in property_instance.non_invasive_recommendations if r["type"] == "solar_pv"]:
continue
solar_api_client = GoogleSolarApi(api_key=get_settings().GOOGLE_SOLAR_API_KEY)
solar_api_client.get(
longitude=unit["longitude"],
latitude=unit["latitude"],
energy_consumption=unit["energy_consumption"],
is_building=False,
session=session,
uprn=unit["uprn"],
property_instance=property_instance
)
# Store the data in the database
# TODO: Rather than just doing a straight insert, we should overwrite what's already there if it
# exists
solar_api_client.save_to_db(
session=session,
uprns_to_location=[
{
"uprn": property_instance.uprn,
"longitude": property_instance.spatial["longitude"],
"latitude": property_instance.spatial["latitude"]
}
],
scenario_type="unit"
)
property_instance.set_solar_panel_configuration(
solar_panel_configuration={
"insights_data": solar_api_client.insights_data,
"panel_performance": solar_api_client.panel_performance,
"unit_share_of_energy": 1
},
roof_area=solar_api_client.roof_area
)
logger.info("Identifying property recommendations")
recommendations = {}
recommendations_scoring_data = []
representative_recommendations = {}
for p in tqdm(input_properties):
recommender = Recommendations(property_instance=p, materials=materials, exclusions=body.exclusions)
recommender = Recommendations(
property_instance=p, materials=materials, exclusions=body.exclusions, inclusions=body.inclusions
)
property_recommendations, property_representative_recommendations = recommender.recommend()
if not property_recommendations:
@ -479,7 +694,6 @@ async def trigger_plan(body: PlanTriggerRequest):
recommendations_scoring_data.extend(p.recommendations_scoring_data)
# TODO: Make sure that number_habitable_rooms has been dropped
logger.info("Preparing data for scoring in sap change api")
recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
@ -488,54 +702,69 @@ async def trigger_plan(body: PlanTriggerRequest):
"carbon_ending"]
)
model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
all_predictions = model_api.paginated_predictions(
data=recommendations_scoring_data,
bucket=get_settings().DATA_BUCKET,
batch_size=SCORING_BATCH_SIZE
)
all_predictions = model_api.predictions_template()
to_loop_over = range(0, recommendations_scoring_data.shape[0], SCORING_BATCH_SIZE)
for chunk in tqdm(to_loop_over, total=len(to_loop_over)):
predictions_dict = model_api.predict_all(
df=recommendations_scoring_data.iloc[chunk:chunk + SCORING_BATCH_SIZE],
bucket=get_settings().DATA_BUCKET,
prediction_buckets=get_prediction_buckets()
# Insert the predictions into the recommendations, and get the impact summary
scoring_epcs = [] # For scoring the kwh models
for property_id in recommendations.keys():
property_instance = [p for p in input_properties if p.id == property_id][0]
recommendations_with_impact, impact_summary = (
Recommendations.calculate_recommendation_impact(
property_instance=property_instance,
all_predictions=all_predictions,
recommendations=recommendations,
)
)
# Append the predictions to the predictions dictionary
for key, scored in predictions_dict.items():
all_predictions[key] = pd.concat([all_predictions[key], scored])
# We use the impact_summary to update the simulation_epcs with the new SAP, heat demand, carbon, cost etc
# at each phase
property_instance.update_simulation_epcs(impact_summary)
scoring_epcs.extend(property_instance.updated_simulation_epcs)
recommendations[property_id] = recommendations_with_impact
# We call the API with the scoring epcs
scoring_epcs = pd.DataFrame(scoring_epcs)
scoring_epcs = kwh_client.transform(data=scoring_epcs, cleaned=cleaned)
kwh_simulation_predictions = model_api.paginated_predictions(
data=scoring_epcs,
bucket=get_settings().DATA_BUCKET,
model_prefixes=["heating_kwh_predictions", "hotwater_kwh_predictions"],
batch_size=SCORING_BATCH_SIZE
)
# We now insert kwh estimates and costs into the recommendations
# TODO: We should join the methodology which maps the heating and hot water descriptions to the fuel types in
# Recommendations, but also the Property class
logger.info("Calculating tenant savings - kwh and bills")
for property_id in tqdm([p.id for p in input_properties]):
property_recommendations = recommendations.get(property_id, [])
property_instance = [p for p in input_properties if p.id == property_id][0]
property_current_energy_bill = Recommendations.calculate_recommendation_tenant_savings(
property_instance=property_instance,
kwh_simulation_predictions=kwh_simulation_predictions,
property_recommendations=property_recommendations
)
property_instance.current_energy_bill = property_current_energy_bill
# Insert the predictions into the recommendations and run the optimiser
# TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a
# possibility with heating system
# TODO: After optimising, if there are any cheap, quick win measures (e.g. insulate water tank with hot water
# cylinder jacket), we should add these to the recommendations as default
logger.info("Optimising recommendations")
for property_id in recommendations.keys():
property_instance = [p for p in input_properties if p.id == property_id][0]
for p in input_properties:
if not recommendations.get(p.id):
continue
input_measures = prepare_input_measures(recommendations[p.id], body.goal)
(
recommendations_with_impact,
expected_adjusted_energy,
expected_energy_bill
) = (
Recommendations.calculate_recommendation_impact(
property_instance=property_instance,
all_predictions=all_predictions,
recommendations=recommendations,
representative_recommendations=representative_recommendations,
energy_consumption_client=energy_consumption_client
)
)
# Store the resulting adjusted energy in the property instance
property_instance.set_adjusted_energy(
expected_adjusted_energy=expected_adjusted_energy,
expected_energy_bill=expected_energy_bill
)
input_measures = prepare_input_measures(recommendations_with_impact, body.goal)
current_sap_points = int(property_instance.data["current-energy-efficiency"])
current_sap_points = int(p.data["current-energy-efficiency"])
target_sap_points = epc_to_sap_lower_bound(body.goal_value)
sap_gain = CostOptimiser.calculate_sap_gain_with_slack(target_sap_points - current_sap_points)
@ -562,7 +791,7 @@ async def trigger_plan(body: PlanTriggerRequest):
"internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation"
]):
ventilation_rec = next(
(r[0] for r in recommendations_with_impact if r[0]["type"] == "mechanical_ventilation"),
(r[0] for r in recommendations[p.id] if r[0]["type"] == "mechanical_ventilation"),
None
)
@ -576,20 +805,35 @@ async def trigger_plan(body: PlanTriggerRequest):
{**rec, "default": True if rec["recommendation_id"] in selected_recommendations else False}
for rec in recommendations_by_type
]
for recommendations_by_type in recommendations_with_impact
for recommendations_by_type in recommendations[p.id]
]
# We'll also unlist the recommendations so they're a bit easier to handle from here onwards
final_recommendations = [
rec for recommendations_by_type in final_recommendations for rec in recommendations_by_type
]
recommendations[property_id] = final_recommendations
# 1) the property data
# 2) the property details (epc)
# 3) the recommendations
recommendations[p.id] = final_recommendations
logger.info("Uploading recommendations to the database")
# If we have any work to do, we create a new scenario
engine_scenario = create_scenario(
session=session,
scenario={
"name": body.scenario_name,
"created_at": created_at,
"budget": body.budget,
"portfolio_id": body.portfolio_id,
"housing_type": body.housing_type,
"goal": body.goal,
"trigger_file_path": body.trigger_file_path,
"already_installed_file_path": body.already_installed_file_path,
"patches_file_path": body.patches_file_path,
"non_invasive_recommendations_file_path": body.non_invasive_recommendations_file_path,
"exclusions": body.exclusions,
"multi_plan": body.multi_plan
}
)
property_valuation_increases = []
session.commit()
new_epc_bands = {}
@ -610,18 +854,18 @@ async def trigger_plan(body: PlanTriggerRequest):
valuations = PropertyValuation.estimate(property_instance=p, target_epc=new_epc)
property_value_increase_ranges[p.id] = valuations
# Your existing operations
property_details_epc = p.get_property_details_epc(
portfolio_id=body.portfolio_id, rating_lookup=rating_lookup,
)
create_property_details_epc(session, property_details_epc)
if p.is_new:
property_details_epc = p.get_property_details_epc(
portfolio_id=body.portfolio_id, rating_lookup=rating_lookup,
)
create_property_details_epc(session, property_details_epc)
update_or_create_property_spatial_details(session, p.uprn, p.spatial)
update_or_create_property_spatial_details(session, p.uprn, p.spatial)
property_data = p.get_full_property_data(current_valuation=valuations["current_value"])
update_property_data(
session, property_id=p.id, portfolio_id=body.portfolio_id, property_data=property_data
)
property_data = p.get_full_property_data(current_valuation=valuations["current_value"])
update_property_data(
session, property_id=p.id, portfolio_id=body.portfolio_id, property_data=property_data
)
if not recommendations_to_upload:
continue
@ -629,7 +873,9 @@ async def trigger_plan(body: PlanTriggerRequest):
new_plan_id = create_plan(session, {
"portfolio_id": body.portfolio_id,
"property_id": p.id,
"is_default": True,
"scenario_id": engine_scenario.id,
"is_default": True if p.is_new else False,
"name": body.scenario_name,
"valuation_increase_lower_bound": (
valuations["lower_bound_increased_value"] - valuations["current_value"]
),
@ -641,10 +887,8 @@ async def trigger_plan(body: PlanTriggerRequest):
),
})
uploaded_recommendation_ids = upload_recommendations(session, recommendations_to_upload, p.id)
create_plan_recommendations(
session, plan_id=new_plan_id, recommendation_ids=uploaded_recommendation_ids
upload_recommendations(
session, recommendations_to_upload, p.id, new_plan_id
)
property_valuation_increases.append(
@ -683,6 +927,7 @@ async def trigger_plan(body: PlanTriggerRequest):
aggregate_portfolio_recommendations(
session,
portfolio_id=body.portfolio_id,
scenario_id=engine_scenario.id,
total_valuation_increase=total_valuation_increase,
labour_days=labour_days,
aggregated_data=aggregated_data
@ -817,6 +1062,7 @@ async def build_mds(body: MdsRequest):
# already_installed=property_already_installed,
# non_invasive_recommendations=property_non_invasive_recommendations,
measures=measures,
is_new=is_new,
**Property.extract_kwargs(config)
)
)
@ -840,7 +1086,7 @@ async def build_mds(body: MdsRequest):
recommendations = {}
for p in tqdm(input_properties):
p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
p.set_features(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
mds = Mds(property_instance=p, materials=materials, optimise_measures=optimise_measures)
mds_recommendations, property_representative_recommendations, errors = mds.build()
@ -889,7 +1135,9 @@ async def build_mds(body: MdsRequest):
"carbon_ending"]
)
model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
model_api = ModelApi(
portfolio_id=body.portfolio_id, timestamp=created_at, prediction_buckets=get_prediction_buckets()
)
all_predictions = {
"sap_change_predictions": pd.DataFrame(),
@ -900,12 +1148,6 @@ async def build_mds(body: MdsRequest):
for chunk in tqdm(to_loop_over, total=len(to_loop_over)):
predictions_dict = model_api.predict_all(
df=recommendations_scoring_data.iloc[chunk:chunk + SCORING_BATCH_SIZE],
bucket=get_settings().DATA_BUCKET,
prediction_buckets={
"sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET,
"heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET,
"carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET
}
)
# Append the predictions to the predictions dictionary

View file

@ -1,6 +1,56 @@
from pydantic import BaseModel, conlist, validator
from typing import Optional
TYPICAL_MEASURE_TYPES = [
"wall_insulation",
"roof_insulation",
"ventilation",
"floor_insulation",
"windows",
"fireplace",
"heating",
"hot_water",
"low_energy_lighting",
"secondary_heating",
"solar_pv"
]
SPECIFIC_MEASURES = [
# Specific measures
# Walls
"internal_wall_insulation",
"external_wall_insulation",
"cavity_wall_insulation"
# Roof
"loft_insulation",
"flat_roof_insulation",
"room_roof_insulation",
# Floor
"suspended_floor_insulation",
"solid_floor_insulation",
# Heating
"boiler_upgrade",
"high_heat_retention_storage_heater",
"air_source_heat_pump",
# Specific measures that will typically come from an energy assessment
"trickle_vents",
"draught_proofing",
"mixed_glazing", # This covers partial double glazing and secondary glazing
"cavity_extract_and_refill",
]
# This allows us to extend high level categories for measures such as "wall_insulation" to the specific measures
# such as "external_wall_insulation", "internal_wall_insulation", "cavity_wall_insulation"
MEASURE_MAP = {
"wall_insulation": [
"internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation", "cavity_extract_and_refill"
],
"roof_insulation": ["loft_insulation", "flat_roof_insulation", "room_roof_insulation"],
"floor_insulation": ["suspended_floor_insulation", "solid_floor_insulation"],
"heating": ["boiler_upgrade", "high_heat_retention_storage_heater", "air_source_heat_pump"],
}
class PlanTriggerRequest(BaseModel):
budget: Optional[float] = None
@ -13,35 +63,30 @@ class PlanTriggerRequest(BaseModel):
patches_file_path: Optional[str] = None
non_invasive_recommendations_file_path: Optional[str] = None
exclusions: Optional[conlist(str, min_items=1)] = None
inclusions: Optional[conlist(str, min_items=1)] = None
# Pre-defined list of possibilities for exclusions
_allowed_exclusions = {
# Measure classes
"wall_insulation",
"ventilation",
"roof_insulation",
"floor_insulation",
"windows",
"fireplace",
"heating",
"hot_water",
"lighting",
"solar_pv",
# Specific measures
"air_source_heat_pump",
}
scenario_name: Optional[str] = ""
# If true, will allow us to create multiple plans for the same portfolio, whereas if this is false, if this property
# exists in the portfolio, it will be ignored
multi_plan: Optional[bool] = False
_allowed_goals = {"Increase EPC"}
_allowed_goals = {"Increasing EPC"}
_allowed_housing_types = {"Social", "Private"}
# Validator to ensure exclusions are within the pre-defined possibilities
@validator('exclusions', each_item=True)
def check_exclusions(cls, v):
if v not in cls._allowed_exclusions:
if v not in TYPICAL_MEASURE_TYPES + SPECIFIC_MEASURES:
raise ValueError(f"{v} is not an allowed exclusion")
return v
@validator('inclusions', each_item=True)
def check_inclusions(cls, v):
if v not in TYPICAL_MEASURE_TYPES + SPECIFIC_MEASURES:
raise ValueError(f"{v} is not an allowed inclusion")
return v
# Validator to ensure that the goal is within the pre-defined possibilities
@validator('goal')
def check_goal(cls, v):

View file

@ -1,4 +1,6 @@
import numpy as np
import pandas as pd
import backend.app.assumptions as assumptions
QUARTERLY_ENERGY_PRICES = [
# 2024 Q1
@ -40,6 +42,53 @@ class AnnualBillSavings:
DAILY_STANDARD_CHARGE_GAS = 0.3143
DAILY_STANDARD_CHARGE_ELECTRICITY = 0.601
# Based on https://www.nottenergy.com/advice-and-tools/project-energy-cost-comparison
# For July 2024. These quotes are based on the east midlands region, so we
FUEL_DATA = pd.DataFrame([
{"Fuel": "Electricity Standard", "Price (p)": 28.58, "Unit": "kWh", "Boiler Efficiency (%)": 100,
"Energy Content, Net Calorific value (kWh/unit)": 1.00, "Price per kWh (p) (inc boiler efficiency)": 28.58,
"CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.275},
{"Fuel": "Mains Gas Standard", "Price (p)": 6.31, "Unit": "kWh", "Boiler Efficiency (%)": 90,
"Energy Content, Net Calorific value (kWh/unit)": 1.00, "Price per kWh (p) (inc boiler efficiency)": 7.01,
"CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.213},
{"Fuel": "Kerosene", "Price (p)": 62.49, "Unit": "Litre", "Boiler Efficiency (%)": 90,
"Energy Content, Net Calorific value (kWh/unit)": 9.79, "Price per kWh (p) (inc boiler efficiency)": 7.09,
"CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.298},
{"Fuel": "Gas oil", "Price (p)": 94.50, "Unit": "Litre", "Boiler Efficiency (%)": 90,
"Energy Content, Net Calorific value (kWh/unit)": 9.96, "Price per kWh (p) (inc boiler efficiency)": 10.54,
"CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.316},
{"Fuel": "LPG", "Price (p)": 55.00, "Unit": "Litre", "Boiler Efficiency (%)": 90,
"Energy Content, Net Calorific value (kWh/unit)": 6.78, "Price per kWh (p) (inc boiler efficiency)": 9.01,
"CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.240},
{"Fuel": "Butane", "Price (p)": 216.58, "Unit": "Litre", "Boiler Efficiency (%)": 90,
"Energy Content, Net Calorific value (kWh/unit)": 6.64, "Price per kWh (p) (inc boiler efficiency)": 36.24,
"CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.248},
{"Fuel": "Propane", "Price (p)": 157.67, "Unit": "Litre", "Boiler Efficiency (%)": 90,
"Energy Content, Net Calorific value (kWh/unit)": 7.22, "Price per kWh (p) (inc boiler efficiency)": 24.25,
"CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.239},
{"Fuel": "Kiln Dried (logs)", "Price (p)": 36.52, "Unit": "kg", "Boiler Efficiency (%)": 85,
"Energy Content, Net Calorific value (kWh/unit)": 4.09, "Price per kWh (p) (inc boiler efficiency)": 10.51,
"CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.024},
{"Fuel": "Pellets (Bagged)", "Price (p)": 39.62, "Unit": "kg", "Boiler Efficiency (%)": 90,
"Energy Content, Net Calorific value (kWh/unit)": 4.80, "Price per kWh (p) (inc boiler efficiency)": 9.17,
"CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.049},
{"Fuel": "Pellets (Blown bulk)", "Price (p)": 33.92, "Unit": "kg", "Boiler Efficiency (%)": 90,
"Energy Content, Net Calorific value (kWh/unit)": 4.80, "Price per kWh (p) (inc boiler efficiency)": 7.85,
"CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.049},
{"Fuel": "Smokeless fuel", "Price (p)": 67.26, "Unit": "kg", "Boiler Efficiency (%)": 75,
"Energy Content, Net Calorific value (kWh/unit)": 6.70, "Price per kWh (p) (inc boiler efficiency)": 13.38,
"CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.404},
{"Fuel": "Coal", "Price (p)": 48.50, "Unit": "kg", "Boiler Efficiency (%)": 75,
"Energy Content, Net Calorific value (kWh/unit)": 7.95, "Price per kWh (p) (inc boiler efficiency)": 8.13,
"CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.404},
{"Fuel": "GSHP", "Price (p)": 28.58, "Unit": "kWh", "Boiler Efficiency (%)": 350,
"Energy Content, Net Calorific value (kWh/unit)": 1.00, "Price per kWh (p) (inc boiler efficiency)": 8.17,
"CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.079},
{"Fuel": "ASHP", "Price (p)": 28.58, "Unit": "kWh", "Boiler Efficiency (%)": 294,
"Energy Content, Net Calorific value (kWh/unit)": 1.00, "Price per kWh (p) (inc boiler efficiency)": 9.72,
"CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.094}
])
EPC_BANDS = ["G", "F", "E", "D", "C", "B", "A"]
@classmethod
@ -199,3 +248,75 @@ class AnnualBillSavings:
return current_epc_rating
return cls.EPC_BANDS[expected_index - 1]
@staticmethod
def cost_per_kwh(price_per_unit, energy_content_per_unit):
"""
Calculate the cost of fuel per kWh given the price per unit in GBP and the energy content per unit in kWh.
"""
cost_per_kwh = price_per_unit / energy_content_per_unit
# Tgis data is returned in pennies so we convert to pounds
return cost_per_kwh / 100
@classmethod
def calculate_recommendation_fuel_cost(cls, kwh, fuel, cop):
if fuel == "Electricity":
return (kwh / cop) * cls.ELECTRICITY_PRICE_CAP
if fuel in ["Natural Gas", "Natural Gas (Community Scheme)"]:
return (kwh / cop) * cls.GAS_PRICE_CAP
if fuel == "LPG":
# Get the cost per kwh
price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "LPG"].squeeze()
cost_per_kwh = cls.cost_per_kwh(
price_data["Price (p)"], price_data["Energy Content, Net Calorific value (kWh/unit)"]
)
return (kwh / cop) * cost_per_kwh
if fuel in ["Wood Logs", "Wood Pellets"]:
price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "Pellets (Bagged)"].squeeze()
cost_per_kwh = cls.cost_per_kwh(
price_data["Price (p)"], price_data["Energy Content, Net Calorific value (kWh/unit)"]
)
return (kwh / cop) * cost_per_kwh
if fuel == "Natural Gas + Solar Thermal":
# The solar thermal covers a % of the heating kwh, so we need to adjust the cost
return (kwh / cop) * assumptions.SOLAR_CONSUMPTION_PROPORTION * cls.GAS_PRICE_CAP
if fuel == "Electricity + Solar Thermal":
# The solar thermal covers a % of the heating kwh, so we need to adjust the cost
return (kwh / cop) * assumptions.SOLAR_CONSUMPTION_PROPORTION * cls.ELECTRICITY_PRICE_CAP
if fuel == "LPG + Solar Thermal":
# The solar thermal covers a % of the heating kwh, so we need to adjust the cost
price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "LPG"].squeeze()
cost_per_kwh = cls.cost_per_kwh(
price_data["Price (p)"], price_data["Energy Content, Net Calorific value (kWh/unit)"]
)
return (kwh / cop) * cost_per_kwh * assumptions.SOLAR_CONSUMPTION_PROPORTION
if fuel == "Oil":
price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "Kerosene"].squeeze()
cost_per_kwh = cls.cost_per_kwh(
price_data["Price (p)"], price_data["Energy Content, Net Calorific value (kWh/unit)"]
)
return (kwh / cop) * cost_per_kwh
if fuel in ["Smokeless Fuel", "Anthracite"]:
price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "Smokeless fuel"].squeeze()
cost_per_kwh = cls.cost_per_kwh(
price_data["Price (p)"], price_data["Energy Content, Net Calorific value (kWh/unit)"]
)
return (kwh / cop) * cost_per_kwh
# We use coal's values for
if fuel == "Coal":
price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "Coal"].squeeze()
cost_per_kwh = cls.cost_per_kwh(
price_data["Price (p)"], price_data["Energy Content, Net Calorific value (kWh/unit)"]
)
return (kwh / cop) * cost_per_kwh
raise Exception("Fuel not recognised")

View file

@ -100,6 +100,9 @@ class PropertyValuation:
200140647: 481_000,
200140648: 373_000,
200140649: 373_000,
# Vander Elliot Intrusive surveys
12103116: 1_537_000,
12103117: 1_404_000,
}
# We base our valuation uplifts on a number of sources

View file

@ -1,4 +1,5 @@
import pandas as pd
from tqdm import tqdm
import requests
from requests.exceptions import RequestException
from utils.logger import setup_logger
@ -12,24 +13,27 @@ class ModelApi:
"sap_change_predictions",
"heat_demand_predictions",
"carbon_change_predictions",
"lighting_cost_predictions",
"heating_cost_predictions",
"hot_water_cost_predictions",
# "lighting_cost_predictions",
# "heating_cost_predictions",
# "hot_water_cost_predictions",
]
MODEL_URLS = {
"sap_change_predictions": "sapmodel",
"heat_demand_predictions": "heatmodel",
"carbon_change_predictions": "carbonmodel",
"lighting_cost_predictions": "lightingmodel",
"heating_cost_predictions": "heatingmodel",
"hot_water_cost_predictions": "hotwatermodel",
"hotwater_kwh_predictions": "hotwaterkwhmodel",
"heating_kwh_predictions": "heatingkwhmodel",
# "lighting_cost_predictions": "lightingmodel",
# "heating_cost_predictions": "heatingmodel",
# "hot_water_cost_predictions": "hotwatermodel",
}
def __init__(
self,
portfolio_id,
timestamp,
prediction_buckets,
base_url="https://api.dev.hestia.homes",
):
"""
@ -44,6 +48,7 @@ class ModelApi:
self.base_url = base_url
self.portfolio_id = portfolio_id
self.timestamp = timestamp
self.prediction_buckets = prediction_buckets
@staticmethod
def predictions_template():
@ -51,9 +56,8 @@ class ModelApi:
"sap_change_predictions": pd.DataFrame(),
"heat_demand_predictions": pd.DataFrame(),
"carbon_change_predictions": pd.DataFrame(),
"lighting_cost_predictions": pd.DataFrame(),
"heating_cost_predictions": pd.DataFrame(),
"hot_water_cost_predictions": pd.DataFrame(),
"hotwater_kwh_predictions": pd.DataFrame(),
"heating_kwh_predictions": pd.DataFrame(),
}
def upload_scoring_data(self, df: pd.DataFrame, bucket: str, model_prefix: str) -> str:
@ -68,8 +72,8 @@ class ModelApi:
:return:
"""
if model_prefix not in self.MODEL_PREFIXES:
raise ValueError(f"Model prefix specified is not in {self.MODEL_PREFIXES}")
# if model_prefix not in self.MODEL_PREFIXES:
# raise ValueError(f"Model prefix specified is not in {self.MODEL_PREFIXES}")
# Store parquet file in s3 for scoring
file_location = f"{model_prefix}/{self.portfolio_id}/{self.timestamp}.parquet"
@ -123,7 +127,7 @@ class ModelApi:
else:
return None
def predict_all(self, df, bucket, prediction_buckets) -> dict:
def predict_all(self, df, bucket, model_prefixes=None, extract_ids=True) -> dict:
"""
For each model prefix, this method will upload the scoring data to s3 and then make a request to the
@ -132,19 +136,24 @@ class ModelApi:
a dictionary of panaas dataframes
:param df: Pandas dataframe with scoring data to be uploaded to s3
:param bucket: Name of the bucket in s3 to upload to
:param prediction_buckets: Dictionary containing the prediction buckets for each model prefix
:param model_prefixes: List of model prefixes to generate predictions for. If None, all model prefixes will be
used
:param extract_ids: Boolean to determine if the property_id and recommendation_id should be extracted from the
id column
:return:
"""
model_prefixes = self.MODEL_PREFIXES if model_prefixes is None else model_prefixes
predictions = {}
for model_prefix in self.MODEL_PREFIXES:
for model_prefix in model_prefixes:
logger.info(f"Scoring for model prefix: {model_prefix}")
file_location = self.upload_scoring_data(df, bucket, model_prefix)
response = self.predict(
"s3://{DATA_BUCKET}/".format(DATA_BUCKET=bucket) + file_location, model_prefix
)
predictions_bucket = prediction_buckets[model_prefix]
predictions_bucket = self.prediction_buckets[model_prefix]
# Retrieve the predictions
predictions_df = pd.DataFrame(
@ -155,16 +164,35 @@ class ModelApi:
)
predictions_df['predictions'] = predictions_df["predictions"].astype(float).round(1)
predictions_df[['property_id', 'recommendation_id']] = predictions_df['id'].str.split('+', expand=True)
# To grab the phase, we pull the integer after "phase=" in the recommendation_id. We can do this with a
# string split on phase= and then grab the second element of the resulting list. We could also use a
# regular expression to do this but we use the string split method here, for safety.
# We may not always have a phase to split on, so we need to handle this case. We can do this by using the
# str[1] method to grab the second element of the resulting list. We then grab the first character of this
# string to get the phase. We then convert this to an integer.
# Convert back to int
predictions_df['phase'] = predictions_df['recommendation_id'].apply(self.extract_phase)
if extract_ids:
predictions_df[['property_id', 'recommendation_id']] = predictions_df['id'].str.split('+', expand=True)
# To grab the phase, we pull the integer after "phase=" in the recommendation_id. We can do this with a
# string split on phase= and then grab the second element of the resulting list. We could also use a
# regular expression to do this but we use the string split method here, for safety.
# We may not always have a phase to split on, so we need to handle this case. We can do this by using
# the str[1] method to grab the second element of the resulting list. We then grab the first
# character of this
# string to get the phase. We then convert this to an integer.
# Convert back to int
predictions_df['phase'] = predictions_df['recommendation_id'].apply(self.extract_phase)
predictions[model_prefix] = predictions_df
return predictions
def paginated_predictions(self, data, bucket, batch_size, model_prefixes=None, extract_ids=True):
all_predictions = self.predictions_template()
to_loop_over = range(0, data.shape[0], batch_size)
for chunk in tqdm(to_loop_over, total=len(to_loop_over)):
predictions_dict = self.predict_all(
df=data.iloc[chunk:chunk + batch_size],
bucket=bucket,
model_prefixes=model_prefixes,
extract_ids=extract_ids
)
# Append the predictions to the predictions dictionary
for key, scored in predictions_dict.items():
all_predictions[key] = pd.concat([all_predictions[key], scored])
return all_predictions

View file

@ -6,6 +6,7 @@ from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percenta
from sklearn.feature_selection import RFECV
from utils.s3 import save_pickle_to_s3, read_pickle_from_s3, read_dataframe_from_s3_parquet, read_csv_from_s3
from utils.logger import setup_logger
from backend.Property import Property
logger = setup_logger()
@ -102,6 +103,7 @@ class EnergyConsumptionModel:
# We also retrieve the newest retail price comparison data which comes from Ofgem:
# https://www.ofgem.gov.uk/energy-data-and-research/data-portal/retail-market-indicators
# We use the detail price comparison by company and tariff type data
print("Reading retail price comparison - make sure this is up-to-date")
self.read_retail_price_comparison()
def read_retail_price_comparison(self):
@ -124,37 +126,6 @@ class EnergyConsumptionModel:
self.retail_price_comparison = pd.DataFrame(data_rows, columns=header)
self.retail_price_comparison['Date'] = pd.to_datetime(self.retail_price_comparison['Date'], errors='coerce')
def convert_cost_to_today(self, original_cost, lodgement_date):
"""
Given energy costs in an EPC, this function converts that energy cost to a figure based on today's energy costs
(or as close to today as possible)
:param original_cost: The original energy cost
:param lodgement_date: The date the EPC was lodged
:return:
"""
closest_date = self.retail_price_comparison.iloc[
(self.retail_price_comparison['Date'] - lodgement_date).abs().argsort()[:1]
]['Date'].values[0]
closest_date = pd.Timestamp(closest_date)
# Extract the tariff price on the closest date
tariff_2024 = self.retail_price_comparison[
self.retail_price_comparison['Date'] == closest_date
]['Average standard variable tariff (Large legacy suppliers)'].values[0]
# Extract the latest available tariff price
latest_tariff = self.retail_price_comparison[
'Average standard variable tariff (Large legacy suppliers)'
].iloc[-1]
# Calculate the ratio
ratio = float(latest_tariff) / float(tariff_2024)
# Calculate the updated heating cost
updated_cost = original_cost * ratio
return updated_cost
def read_dataset(self, file_path):
"""Reads the dataset from the specified file path."""
logger.info(f"Reading dataset from {file_path}")
@ -506,31 +477,36 @@ class EnergyConsumptionModel:
return prediction
@staticmethod
def calculate_percentage_decrease(start_rating, end_rating, consumption_averages):
def calculate_percentage_decrease(start_efficiency, end_efficiency, consumption_averages):
start_consumption = consumption_averages.loc[
consumption_averages["current-energy-rating"] == start_rating, "total_consumption"
consumption_averages["current-energy-efficiency"].astype(str) == str(start_efficiency), "total_consumption"
].values[0]
end_consumption = consumption_averages.loc[
consumption_averages["current-energy-rating"] == end_rating, "total_consumption"
consumption_averages["current-energy-efficiency"].astype(str) == str(end_efficiency), "total_consumption"
].values[0]
percentage_decrease = ((start_consumption - end_consumption) / start_consumption) * 100
# percentage_decrease cannot be nehative
if percentage_decrease < 0:
percentage_decrease = 0
return percentage_decrease
def estimate_new_consumption(self, current_rating, target_rating, current_consumption):
def estimate_new_consumption(self, current_energy_efficiency, target_efficiency, current_consumption):
"""
Given then consumption_averages dataset, which is produced as a result of the data_combining.py script,
Given then consumption_averages dataset, which is produced as a result of the training_data.py script,
for the energy kwh models, this function will estimate the new consumption based on the current consumption,
based on the expected reduction in consumption from the current rating to the target rating.
:param current_rating:
:param target_rating:
:param current_energy_efficiency:
:param target_efficiency:
:param current_consumption:
:param df:
:return:
"""
percentage_decrease = self.calculate_percentage_decrease(
current_rating, target_rating, self.consumption_averages
start_efficiency=current_energy_efficiency,
end_efficiency=target_efficiency,
consumption_averages=self.consumption_averages
)
new_consumption = current_consumption * (1 - percentage_decrease / 100)
return new_consumption

363
etl/bill_savings/KwhData.py Normal file
View file

@ -0,0 +1,363 @@
import re
import pandas as pd
import numpy as np
from datetime import datetime
from tqdm import tqdm
from utils.logger import setup_logger
from utils.s3 import (
list_files_in_s3_folder, read_pickle_from_s3, save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet,
read_csv_from_s3
)
from backend.Property import Property
logger = setup_logger()
class KwhData:
COLS_TO_STRINGIFY = ["main-heating-controls", "floor-level"]
CATEGORICAL_COLUMNS = [
"lodgement-year", "lodgement-month", "main-fuel", "mainheat-description", "number-heated-rooms",
"number-habitable-rooms", "mainheat-energy-eff", "mainheatcont-description", "property-type",
"built-form",
"construction-age-band", "secondheat-description", "hotwater-description", "hot-water-energy-eff",
"walls-description", "walls-energy-eff", "roof-description", "roof-energy-eff", "floor-description",
"county",
"windows-description", "windows-energy-eff", "flat-top-storey",
"flat-storey-count", "unheated-corridor-length", "solar-water-heating-flag", "mechanical-ventilation",
"low-energy-lighting", "environment-impact-current", "energy-tariff", "current-energy-rating",
"floor-level"
]
NUMERICAL_COLUMNS = [
'heating-cost-current', 'total-floor-area', 'co2-emissions-current', 'energy-consumption-current',
'heating-cost-potential', 'hot-water-cost-current', 'current-energy-efficiency'
]
def __init__(self, bucket=None, read_consumption_data=False):
self.run_date = datetime.now().strftime("%Y-%m-%d")
self.bucket = bucket
self.data = None
self.consumption_data_filepath = None
self.consumption_averages_filepath = None
self.model_training_data_filepath = None
self.consumption_averages = None
self.retail_price_comparison = None
if read_consumption_data:
self.get_consumption_data()
self.read_retail_price_comparison()
def get_consumption_data(self):
# Look for the latest version of this file
s3_contents = list_files_in_s3_folder(bucket_name=self.bucket, folder_name="energy_consumption/")
consumption_averages = [
{"run_date": pd.to_datetime(x.split("/")[1]), "filepath": x}
for x in s3_contents if "consumption_averages.parquet" in x
]
# Get the file with the soonest run date
consumption_averages = sorted(consumption_averages, key=lambda x: x["run_date"])
if not consumption_averages:
raise ValueError("No consumption averages data found, something went wrong")
self.consumption_averages = read_dataframe_from_s3_parquet(
bucket_name=self.bucket,
file_key=consumption_averages[-1]["filepath"]
)
def read_retail_price_comparison(self):
data = read_csv_from_s3(
bucket_name=self.bucket,
filepath="energy_consumption/retail-price-comparison.csv"
)
header = ['Date', 'Average standard variable tariff (Large legacy suppliers)',
'Average standard variable tariff (Other suppliers)', 'Average fixed tariff',
'Cheapest tariff (Large legacy suppliers)', 'Cheapest tariff (All suppliers)',
'Cheapest tariff (Basket)', 'Default tariff cap level']
# Extract data rows
data_rows = []
for row in data[1:]:
date = row['\ufeff"']
values = row[None]
data_rows.append([date] + values)
self.retail_price_comparison = pd.DataFrame(data_rows, columns=header)
self.retail_price_comparison['Date'] = pd.to_datetime(self.retail_price_comparison['Date'], errors='coerce')
@staticmethod
def extract_kwh_value(text: str):
"""
Extract the numerical kWh value from a given string.
:param text: The input string containing the kWh value.
:return: The extracted numerical kWh value as an integer.
"""
# Use regular expression to find the numerical value followed by "kWh per year"
match = re.search(r'([\d,]+) kWh per year', text)
if match:
# Remove commas from the extracted value and convert to integer
kwh_value = int(match.group(1).replace(',', ''))
return kwh_value
else:
# If no match is found, return None or raise an exception
return None
def combine(self):
"""
Given the data that is collected containing the kwh values for heating and hot water, this method will combine
and save the data
:return:
"""
# Firstly, list all of the saved files in s3
data_files = list_files_in_s3_folder(bucket_name="retrofit-datalake-dev", folder_name="energy_consumption_data")
complete_data = []
for files in tqdm(data_files):
dataset_run_date = files.split("/")[-1].split(".")[0]
# Extract the date from the file name
dataset_run_date = pd.Timestamp(dataset_run_date)
# Load the data from the file
data = read_pickle_from_s3(bucket_name="retrofit-datalake-dev", s3_file_name=files)
# We check that the retrieved energy consumption sufficiently matches the EPC data
internal_dataset = []
for x in data:
epc_data = x["epc"]
epc_sap = epc_data["current-energy-efficiency"]
epc_potential_sap = epc_data["potential-energy-efficiency"]
# Make sure this matches the extracted sap
if int(epc_sap) != int(x["current_epc_efficiency"]) or int(epc_potential_sap) != int(
x["potential_epc_efficiency"]
):
continue
heating_kwh = self.extract_kwh_value(x["heating_text"])
hot_water_kwh = self.extract_kwh_value(x["hot_water_text"])
internal_dataset.append(
{
**epc_data,
"heating_kwh": heating_kwh,
"hot_water_kwh": hot_water_kwh,
"dataset_run_date": dataset_run_date
}
)
complete_data.extend(internal_dataset)
df = pd.DataFrame(complete_data)
# Because we collate multiple runs into a single data source, it's possible that we have duplicated data at
# the uprn level, so we dedupe based on the newest dataset_run_date
df = df.sort_values("dataset_run_date", ascending=False).drop_duplicates(subset="uprn", keep="first")
df = df.drop(columns=["dataset_run_date"])
for col in self.COLS_TO_STRINGIFY:
df[col] = df[col].astype(str)
# Save the data back to s3, but this time as a parquet file
self.consumption_data_filepath = f"energy_consumption/{self.run_date}/energy_consumption_dataset.parquet"
logger.info(f"Storing energy consumption dataset in s3 at {self.consumption_data_filepath}")
save_dataframe_to_s3_parquet(
bucket_name=self.bucket,
file_key=self.consumption_data_filepath,
df=df
)
# We also estimate the energy consumption reduction from this data, by band
df["total_consumption"] = df["heating_kwh"] + df["hot_water_kwh"]
consumption_averages = df.groupby("current-energy-efficiency")["total_consumption"].mean().reset_index()
df = df.drop(columns=["total_consumption"])
self.consumption_averages_filepath = f"energy_consumption/{self.run_date}/consumption_averages.parquet"
logger.info(f"Storing consumption averages in s3 at {self.consumption_averages_filepath}")
# Save the consumption averages back to s3
save_dataframe_to_s3_parquet(
bucket_name="retrofit-data-dev",
file_key=self.consumption_averages_filepath,
df=consumption_averages
)
self.data = df
def transform(
self, data: pd.DataFrame, cleaned, new=False, save=False
):
"""
Given the input EPCs, this method will transform the data into a format that can be used by the model
This method can be used to transform the training data, or new epcs within the backend engine
:return:
"""
if save and self.bucket is None:
raise Exception("bucket not set, cannot save data")
# TODO: New is a temporary parameter, which will transform the epc descriptions to their transformed features
# in anticipation of the new model
data["lodgement-date"] = pd.to_datetime(data["lodgement-date"])
data["lodgement-year"] = data["lodgement-date"].dt.year
data["lodgement-month"] = data["lodgement-date"].dt.month
# For walls, roof, floor description where we have average thermal transmittance, to avoid too many
# categories
# we group them
ranges = {
"lessthan 0.1": (0, 0.1),
"0.1 - 0.3": (0.1, 0.3),
"0.3 - 0.5": (0.3, 0.5),
"morethan 0.5": (0.5, 2.5),
}
# Generate the lookup table
thermal_transmittance_lookup_table = []
for i in range(1, 251):
value = i / 100
for label, (low, high) in ranges.items():
if low < value <= high:
thermal_transmittance_lookup_table.append({"from": value, "to": label})
break
# Convert to DataFrame for display
thermal_transmittance_lookup_table = pd.DataFrame(thermal_transmittance_lookup_table)
thermal_transmittance_lookup_table["from"] = thermal_transmittance_lookup_table["from"].astype(str)
# Apply the lookup table to the data
for feature in ["walls-description", "roof-description", "floor-description"]:
cleaned_df = pd.DataFrame(cleaned[feature])[["original_description", "thermal_transmittance"]]
# Round to 2 decimal places and convert to string
cleaned_df["thermal_transmittance"] = cleaned_df["thermal_transmittance"].round(2).astype(str)
data = data.merge(
cleaned_df,
how="left",
left_on=feature,
right_on="original_description",
)
# We now have the thermal transmittance in the data, which we can use to group with the lookup table
data = data.merge(
thermal_transmittance_lookup_table,
how="left",
left_on="thermal_transmittance",
right_on="from",
)
# Where "to" is populated, replace feature with to
data[feature] = np.where(
~pd.isnull(data["to"]),
data["to"],
data[feature]
)
data = data.drop(columns=["original_description", "thermal_transmittance", "from", "to"])
data[self.NUMERICAL_COLUMNS] = data[self.NUMERICAL_COLUMNS].apply(pd.to_numeric)
data[self.CATEGORICAL_COLUMNS] = data[self.CATEGORICAL_COLUMNS].astype(str)
# Create new features:
data['estimate_annual_kwh'] = data['energy-consumption-current'] * data['total-floor-area']
if save:
self.model_training_data_filepath = f"energy_consumption/{self.run_date}/training_data.parquet"
logger.info(f"Storing energy consumption dataset in s3 at {self.consumption_data_filepath}")
save_dataframe_to_s3_parquet(
bucket_name=self.bucket,
file_key=self.model_training_data_filepath,
df=data
)
return
return data
@staticmethod
def _prepare_epc(p: Property):
"""
Given an instance of the property class, this method will ensure that the EPC is ready for scoring with the
kwh models. In the backend, we perform some cleaning and transformation on an EPC so we just ensure that the
data is in the format required by the model
:return:
"""
epc = p.data.copy()
numeric_cols = [
'current-energy-efficiency',
'potential-energy-efficiency', 'environment-impact-current',
'environment-impact-potential', 'energy-consumption-current',
'energy-consumption-potential', 'co2-emissions-current',
'co2-emiss-curr-per-floor-area', 'co2-emissions-potential',
'lighting-cost-current', 'lighting-cost-potential',
'heating-cost-current', 'heating-cost-potential',
'hot-water-cost-current', 'hot-water-cost-potential',
'total-floor-area', 'multi-glaze-proportion',
'extension-count', 'number-habitable-rooms', 'number-heated-rooms',
'low-energy-lighting', 'number-open-fireplaces',
'wind-turbine-count', 'unheated-corridor-length',
'floor-height', 'photo-supply', 'fixed-lighting-outlets-count',
'low-energy-fixed-light-count',
]
for v in numeric_cols:
if epc[v] is not None:
epc[v] = float(epc[v])
bools_to_remap = ['mains-gas-flag', 'flat-top-storey']
bool_map = {
True: "Y",
False: "N",
None: "N",
"Y": "Y",
"N": "N"
}
for v in bools_to_remap:
epc[v] = bool_map[epc[v]]
no_data = {
"floor-level": "NODATA!",
"floor-energy-eff": "NO DATA!"
}
for v, fill_val in no_data.items():
if pd.isnull(epc[v]):
epc[v] = fill_val
return epc
def prepare_epc(self, input_properties: list[Property]):
scoring_data = pd.DataFrame([self._prepare_epc(p) for p in input_properties])
scoring_data["lodgement-year"] = pd.to_datetime(scoring_data["lodgement-date"]).dt.year
scoring_data["lodgement-month"] = pd.to_datetime(scoring_data["lodgement-date"]).dt.month
scoring_data["id"] = scoring_data["uprn"].copy()
return scoring_data
def convert_cost_to_today(self, original_cost, lodgement_date):
"""
Given energy costs in an EPC, this function converts that energy cost to a figure based on today's energy costs
(or as close to today as possible)
:param original_cost: The original energy cost
:param lodgement_date: The date the EPC was lodged
:return:
"""
closest_date = self.retail_price_comparison.iloc[
(self.retail_price_comparison['Date'] - lodgement_date).abs().argsort()[:1]
]['Date'].values[0]
closest_date = pd.Timestamp(closest_date)
# Extract the tariff price on the closest date
tariff_2024 = self.retail_price_comparison[
self.retail_price_comparison['Date'] == closest_date
]['Average standard variable tariff (Large legacy suppliers)'].values[0]
# Extract the latest available tariff price
latest_tariff = self.retail_price_comparison[
'Average standard variable tariff (Large legacy suppliers)'
].iloc[-1]
# Calculate the ratio
ratio = float(latest_tariff) / float(tariff_2024)
# Calculate the updated heating cost
updated_cost = original_cost * ratio
return updated_cost

View file

@ -132,49 +132,56 @@ def app():
energy_consumption_data = []
for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)):
# Skip the first 50
if i < 250:
continue
data = pd.read_csv(directory / "certificates.csv", low_memory=False)
# Rename the columns to the same format as the api returns
data.columns = [c.replace("_", "-").lower() for c in data.columns]
# Take just date before the date threshold
data = data[data["lodgement-date"] >= EARLIEST_EPC_DATE]
data = data[~pd.isnull(data["uprn"])]
# Take just the newest EPC per uprn, based on lodgement-date
data = data.sort_values("lodgement-date", ascending=False).drop_duplicates("uprn")
data = data.sample(sample_size)
# We use the addreess data to find the related information
collected_data = []
for _, property_data in data.iterrows():
time.sleep(np.random.uniform(0.3, 2))
uprn = int(property_data["uprn"])
address = property_data["address1"]
postcode = property_data["postcode"]
expected_expiry_date = calculate_expiry_date(property_data["lodgement-date"])
response = retrieve_find_my_epc_data(
uprn=uprn,
postcode=postcode,
address=address,
expected_expiry_date=expected_expiry_date
)
if response is None:
try:
# Skip the first 50
if i < 256:
continue
collected_data.append(
{
**response,
"epc": property_data.to_dict(),
"epc_directory": str(directory)
}
)
energy_consumption_data.extend(collected_data)
data = pd.read_csv(directory / "certificates.csv", low_memory=False)
# Rename the columns to the same format as the api returns
data.columns = [c.replace("_", "-").lower() for c in data.columns]
# Take just date before the date threshold
data = data[data["lodgement-date"] >= EARLIEST_EPC_DATE]
data = data[~pd.isnull(data["uprn"])]
# Take just the newest EPC per uprn, based on lodgement-date
data = data.sort_values("lodgement-date", ascending=False).drop_duplicates("uprn")
data = data.sample(sample_size, replace=False)
# We use the addreess data to find the related information
collected_data = []
for _, property_data in data.iterrows():
time.sleep(np.random.uniform(0.2, 1.5))
uprn = int(property_data["uprn"])
address = property_data["address1"]
postcode = property_data["postcode"]
expected_expiry_date = calculate_expiry_date(property_data["lodgement-date"])
response = retrieve_find_my_epc_data(
uprn=uprn,
postcode=postcode,
address=address,
expected_expiry_date=expected_expiry_date
)
if response is None:
continue
collected_data.append(
{
**response,
"epc": property_data.to_dict(),
"epc_directory": str(directory)
}
)
energy_consumption_data.extend(collected_data)
except Exception as e:
print(f"Error for directory {directory}: {e}")
# If we have an error, then we wait for a bit since it's likely due to timeout
time.sleep(300)
continue
# Store the pickle in s3
save_time = datetime.now()

View file

@ -1,104 +0,0 @@
import re
from datetime import datetime
from tqdm import tqdm
import pandas as pd
from utils.s3 import list_files_in_s3_folder, read_pickle_from_s3, save_dataframe_to_s3_parquet
# These columns we co-erce to strings before saving
PROBLEMATIC_COLUMNS = ["main-heating-controls", "floor-level"]
def extract_kwh_value(text):
"""
Extract the numerical kWh value from a given string.
:param text: The input string containing the kWh value.
:return: The extracted numerical kWh value as an integer.
"""
# Use regular expression to find the numerical value followed by "kWh per year"
match = re.search(r'([\d,]+) kWh per year', text)
if match:
# Remove commas from the extracted value and convert to integer
kwh_value = int(match.group(1).replace(',', ''))
return kwh_value
else:
# If no match is found, return None or raise an exception
return None
def app():
"""
Given the files written in our datalake in s3, this application will collate the data into a single file
and store it back in s3 for analysis
:return:
"""
# Firstly, list all of the saved files in s3
data_files = list_files_in_s3_folder(bucket_name="retrofit-datalake-dev", folder_name="energy_consumption_data")
run_date = datetime.now().strftime("%Y-%m-%d")
complete_data = []
for files in tqdm(data_files):
dataset_run_date = files.split("/")[-1].split(".")[0]
# Extract the date from the file name
dataset_run_date = pd.Timestamp(dataset_run_date)
# Load the data from the file
data = read_pickle_from_s3(bucket_name="retrofit-datalake-dev", s3_file_name=files)
# We check that the retrieved energy consumption sufficiently matches the EPC data
internal_dataset = []
for x in data:
epc_data = x["epc"]
epc_sap = epc_data["current-energy-efficiency"]
epc_potential_sap = epc_data["potential-energy-efficiency"]
# Make sure this matches the extracted sap
if int(epc_sap) != int(x["current_epc_efficiency"]) or int(epc_potential_sap) != int(
x["potential_epc_efficiency"]
):
continue
heating_kwh = extract_kwh_value(x["heating_text"])
hot_water_kwh = extract_kwh_value(x["hot_water_text"])
internal_dataset.append(
{
**epc_data,
"heating_kwh": heating_kwh,
"hot_water_kwh": hot_water_kwh,
"dataset_run_date": dataset_run_date
}
)
complete_data.extend(internal_dataset)
df = pd.DataFrame(complete_data)
# Because we collate multiple runs into a single data source, it's possible that we have duplicated data at
# the uprn level, so we dedupe based on the newest dataset_run_date
df = df.sort_values("dataset_run_date", ascending=False).drop_duplicates(subset="uprn", keep="first")
df = df.drop(columns=["dataset_run_date"])
for col in PROBLEMATIC_COLUMNS:
df[col] = df[col].astype(str)
# Save the data back to s3, but this time as a parquet file
save_dataframe_to_s3_parquet(
bucket_name="retrofit-data-dev",
file_key=f"energy_consumption/{run_date}/energy_consumption_dataset.parquet",
df=df
)
# We also estimate the energy consumption reduction from this data, by band
df["total_consumption"] = df["heating_kwh"] + df["hot_water_kwh"]
consumption_averages = df.groupby("current-energy-rating")["total_consumption"].meam().reset_index()
# Save the consumption averages back to s3
save_dataframe_to_s3_parquet(
bucket_name="retrofit-data-dev",
file_key=f"energy_consumption/{run_date}/consumption_averages.parquet",
df=consumption_averages
)

View file

@ -1,57 +0,0 @@
from pprint import pprint
import msgpack
from utils.s3 import read_from_s3
from etl.bill_savings.EnergyConsumptionModel import EnergyConsumptionModel
def handler():
"""
This function is used to train the model and store the final models in s3 as pickles
:return:
"""
dataset_version = "2024-07-08"
# Usage:
cleaned = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-dev"
)
cleaned = msgpack.unpackb(cleaned, raw=False)
model = EnergyConsumptionModel(cleaned=cleaned, n_jobs=2)
model.read_dataset(f'energy_consumption/{dataset_version}/energy_consumption_dataset.parquet')
model.feature_engineering()
model.save_dummy_schema(dataset_version=dataset_version)
# For heating_kwh
model.split_dataset(target='heating_kwh')
model.fit_model(target='heating_kwh')
model.re_train_final_model(target='heating_kwh')
evaluation_results = model.evaluate_model(target='heating_kwh')
pprint(evaluation_results["train"])
pprint(evaluation_results["test"])
model.save_model(target='heating_kwh', dataset_version=dataset_version)
# importance_df = evaluation_results["train"]["Feature Importance"]
# testing_predictions = model.testing_predictions["heating_kwh"]
# testing_predictions = testing_predictions.sort_values("residual", ascending=False)
# training_predictions = model.training_predictions["heating_kwh"]
# training_predictions = training_predictions.sort_values("residual", ascending=False)
# # Merge on model.input_data, by the index
# merged_data = testing_predictions.merge(model.input_data, left_index=True, right_index=True)
# merged_data_train = training_predictions.merge(model.input_data, left_index=True, right_index=True)
# For hot_water_kwh
model.split_dataset(target='hot_water_kwh')
model.fit_model(target='hot_water_kwh')
model.re_train_final_model(target='hot_water_kwh')
evaluation_results = model.evaluate_model(target='hot_water_kwh')
pprint(evaluation_results["train"])
pprint(evaluation_results["test"])
model.save_model(target='hot_water_kwh', dataset_version=dataset_version)

View file

@ -0,0 +1,24 @@
import msgpack
from etl.bill_savings.KwhData import KwhData
from utils.s3 import read_from_s3
def app():
"""
Given the files written in our datalake in s3, this application will collate the data into a single file
and store it back in s3 for analysis
:return:
"""
cleaned = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-dev"
)
cleaned = msgpack.unpackb(cleaned, raw=False)
# If there is any problematic data, it could be:
# s3://retrofit-datalake-dev/energy_consumption_data/2024-08-10 18:48:06.866647.pkl
kwh_data_client = KwhData(bucket="retrofit-datalake-dev")
kwh_data_client.combine()
kwh_data_client.transform(data=kwh_data_client.data, cleaned=cleaned, save=True)

View file

@ -0,0 +1,211 @@
"""
This script prepares some data for the Birmingham City Council tender
"""
import pandas as pd
import numpy as np
epc_data = pd.read_csv("local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv")
# Broad assumptions
# Around 67% of homes in the Uk have an EPC, to be conservative with our estimates, we round up to 70%:
# https://www.ons.gov.uk/peoplepopulationandcommunity/housing/articles/energyefficiencyofhousinginenglandandwales/2023
# However, we have 322128 homes in Birmingham with an EPC, which is 76% of the total number of homes in Birmingham
# based on the 2021 census, which put this figure at 423,500 homes
PROPORTION_OF_HOMES_WITH_AN_EPC = 0.761
N_HOUSEHOLDS_IN_BIRMINGHAM = 423_500
N_HOMES_WITHOUT_AN_EPC = 423_500 - 322128
# 55% of households are recipients of benefits in the West Midlands
# (2021/2022 - https://www.statista.com/statistics/382858/uk-state-benefits-by-region/)
PROPORTION_OF_HOMES_ON_BENEFITS = 0.55
# https://www.justgroupplc.co.uk/~/media/Files/J/Just-Retirement-Corp/news-doc/2023/six-in-10-homeowners-eligible-for
# -benefits-failing-to-claim-just-group-annual-insight-report.pdf
PROPORTION_OF_HOMEOWNERS_CLAIMING_FOR_BENEFITS = 0.106
# Breakdown of properties in council tax bands in the UK, to give us an estimate of the number of properties in A-D
band_a_proportion = 0.239
band_b_proportion = 0.195
band_c_proportion = 0.219
band_d_proportion = 0.156
COUNCIL_TAX_BAND_A_TO_D_PROPORTION = band_a_proportion + band_b_proportion + band_c_proportion + band_d_proportion
# Get the newest record, based on lodgment datetime, by uprn
epc_data["LODGEMENT_DATETIME"] = pd.to_datetime(epc_data["LODGEMENT_DATETIME"], errors="coerce")
epc_data = epc_data.sort_values(["LODGEMENT_DATETIME"], ascending=False).drop_duplicates("UPRN")
# We want to figure out the number of properties that are eligible for ECO/GBIS funding
social_tenures = ["Rented (social)", "rental (social)"]
owner_occupied_tenures = ["Owner-occupied", "owner-occupied"]
prs_tenures = ["Rented (private)", "rental (private)"]
# If social tenure, then as long as the property is EPC D-G, it's eligible
epc_data["eligibility_type"] = None
# Eligibiltiy 1: ECO4 help to heat group OO - tenure is owner occupied and EPC rating D-G
epc_data["eligibility_type"] = np.where(
(
epc_data["TENURE"].isin(owner_occupied_tenures) &
epc_data["CURRENT_ENERGY_RATING"].isin(["D", "E", "F", "G"]) &
pd.isnull(epc_data["eligibility_type"])
),
"eco4_oo_hthg_needs_scaling_on_benefits",
epc_data["eligibility_type"]
)
# Eligibility 2: ECO4 help to heat group PRS - tenure is private rental and EPC rating E-G
epc_data["eligibility_type"] = np.where(
(
epc_data["TENURE"].isin(prs_tenures) &
epc_data["CURRENT_ENERGY_RATING"].isin(["E", "F", "G"]) &
pd.isnull(epc_data["eligibility_type"])
),
"eco4_prs_hthg_needs_scaling_on_benefits",
epc_data["eligibility_type"]
)
# Eligibiltiy 3: ECO4 Social housing - tenure is social rented and EPC rating D-G
epc_data["eligibility_type"] = np.where(
(
epc_data["TENURE"].isin(social_tenures) &
epc_data["CURRENT_ENERGY_RATING"].isin(["D", "E", "F", "G"]) &
pd.isnull(epc_data["eligibility_type"])
),
"eco4_social_housing",
epc_data["eligibility_type"]
)
# Eligibility 4: GBIS General Eligibility, OO - tenure is owner occupied and EPC rating D-G
# This is a subset of Eligiblity 1. We scale eco4_oo_hthg_needs_scaling based on thhe % of properties on benefits
# For any properties left over that are deemed as not eligibile, a % of these will be eligible for GBIS via Eligibility
# 4, and therefore any properties that fall out of Eligibility 1, a % will fall into eligibility 4 based a % of units
# being in council tax bands A-D
# Eligibility 5: GBIS General Eligibility, PRS - tenure is private rental and EPC rating D-G
# Additionally, some units that fall our of Eligibility 2 will be eligible for GBIS via Eligibility 5, via the same
# mechanism as Eligibility 4. We handle this later
epc_data["eligibility_type"] = np.where(
(
epc_data["TENURE"].isin(prs_tenures) &
epc_data["CURRENT_ENERGY_RATING"].isin(["D", "E", "F", "G"]) &
pd.isnull(epc_data["eligibility_type"])
),
"gbis_prs_ge_needs_scaling_on_council_tax_band",
epc_data["eligibility_type"]
)
# Example EPCS to analysis
analysis_epcs = epc_data[~pd.isnull(epc_data["eligibility_type"])].copy()
# Keep just columns we need
analysis_epcs = analysis_epcs[
[
"UPRN", "TENURE", "CURRENT_ENERGY_RATING", "WALLS_DESCRIPTION", "ROOF_DESCRIPTION",
"CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA", "PROPERTY_TYPE", "BUILT_FORM", "MAINHEAT_DESCRIPTION",
"eligibility_type",
]
]
analysis_epcs["grouped_epc_band"] = np.where(
analysis_epcs["CURRENT_ENERGY_RATING"].isin(["D"]),
"EPC D",
"EPC E-G"
)
analysis_epcs.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/bcc tender/analysis_epcs.csv", index=False)
# Create aggregations and we store this information
agg_cols = ["CURRENT_ENERGY_RATING", "CONSTRUCTION_AGE_BAND", "PROPERTY_TYPE", "BUILT_FORM", "grouped_epc_band"]
agg_cols = ["WALLS_DESCRIPTION", "ROOF_DESCRIPTION", "MAINHEAT_DESCRIPTION"]
for col in agg_cols:
agg_df = analysis_epcs.groupby([col]).size().reset_index(name="Number of Properties")
agg_df["Percentage of Properties"] = 100 * agg_df["Number of Properties"] / agg_df["Number of Properties"].sum()
agg_df.to_csv(f"/Users/khalimconn-kowlessar/Documents/hestia/Customers/bcc tender/{col}.csv", index=False)
# Eligibiilty 6: GBIS General Eligibility, Social - tenure is social rented and EPC rating D-G, but also the property
# should be rented out below market rate
# This is a subset of Eligibility 3 - we likely don't need to do any scaling
n_eco4_oo_hthg_needs_scaling_on_benefits = epc_data[
epc_data["eligibility_type"] == "eco4_oo_hthg_needs_scaling_on_benefits"
].shape[0]
n_eco4_prs_hthg_needs_scaling_on_benefits = epc_data[
epc_data["eligibility_type"] == "eco4_prs_hthg_needs_scaling_on_benefits"
].shape[0]
n_eco4_social = epc_data[
epc_data["eligibility_type"] == "eco4_social_housing"
].shape[0]
n_gbis_prs_ge_needs_scaling_on_council_tax_band = epc_data[
epc_data["eligibility_type"] == "gbis_prs_ge_needs_scaling_on_council_tax_band"
].shape[0]
# We're going to make the broad assumption that all homeowners claiming for benefits, live in homes in council tax
# bands A-D. There there are no additionals in eligibility 4 and 5
# n_eligibility_1 = np.floor(n_eco4_oo_hthg_needs_scaling_on_benefits * PROPORTION_OF_HOMEOWNERS_CLAIMING_FOR_BENEFITS)
n_eligibility_1 = np.floor(n_eco4_oo_hthg_needs_scaling_on_benefits * COUNCIL_TAX_BAND_A_TO_D_PROPORTION)
# n_eligibility_2 = np.floor(n_eco4_prs_hthg_needs_scaling_on_benefits * PROPORTION_OF_HOMES_ON_BENEFITS)
n_eligibility_2 = np.floor(n_eco4_prs_hthg_needs_scaling_on_benefits * COUNCIL_TAX_BAND_A_TO_D_PROPORTION)
n_eligiblity_3 = n_eco4_social
# We subtract the number of homes in eligiblity 1, from the number of homes under ECO4 OO, HTHG, before scaling on
# benefits. This gives us the number of homes that were not on benefits. We then scale this number based on the % of
# homes in council tax bands A-D
# n_eligiblity_4 = np.floor(
# (n_eco4_oo_hthg_needs_scaling_on_benefits - n_eligibility_1) * COUNCIL_TAX_BAND_A_TO_D_PROPORTION
# )
# We also need to add on homes that fall out of eligibility 2
n_eligibiltiy_5 = np.floor(
np.floor(n_gbis_prs_ge_needs_scaling_on_council_tax_band * COUNCIL_TAX_BAND_A_TO_D_PROPORTION)
# np.floor((n_eco4_prs_hthg_needs_scaling_on_benefits - n_eligibility_2) * COUNCIL_TAX_BAND_A_TO_D_PROPORTION)
)
# We don't scale up the # of homes based on % of homes with an EPC, because
n_owner_occupied = epc_data[epc_data["TENURE"].isin(owner_occupied_tenures)].shape[0]
oo_eligibility = n_eligibility_1
# 68% of owner occupied are eligibiltiy
proportion_of_oo_eligible = oo_eligibility / n_owner_occupied
# We then use this % on the rest of the homes in Birmingham that do not have an EPC
oo_eligible_without_an_epc = np.floor(N_HOMES_WITHOUT_AN_EPC * proportion_of_oo_eligible)
oo_eligibility = oo_eligibility + oo_eligible_without_an_epc
# All private rentals require an EPC
prs_eligibility = (n_eligibility_2 + n_eligibiltiy_5)
# Most social housing properties will have an EPC so we don't scale this up
social_eligibility = n_eligiblity_3
# We scale this up since this number is based on the number of homes in Birmingham with an EPC, and we want to
# estimate the total number of homes in Birmingham
total_eligible = oo_eligibility + prs_eligibility + social_eligibility
proportion_of_homes_eligibile = total_eligible / N_HOUSEHOLDS_IN_BIRMINGHAM
# Approx 53% of homes in Birmingham are eligible for ECO/GBIS funding
# Approximately 53% of Homes are eligible for some form of ECO4 or GBIS funding, 227k homes
# This is broken down as follows:
# - 155k owner occupiers
# - 33k private rentals
# - 39k social housing
# We can't seem to identify the properties owned by the council in the company ownership data, because what is the
# entity that owns the property? Is it the council, or is it a company that is owned by the council? We can't be sure
# and so since BCC owns 54,000 social housing properties (5k) supported housing
# [https://www.birmingham.gov.uk/info/50094/housing_options/2686/apply_for_social_housing#:~:text=We%20manage
# %20around%2054%2C000%20social,a%20member%20of%20your%20household.]
# and there are 78,410 social housing properties in Birmingham, we can assume that the council owns 54,000 of these
# and so 69% of the social housing is owned by the Council
# Since we saw that 38,779 of 78,410 social housing looked to be able to benefit from ECO/GBIS funding, we can assume
# that 69% of these are owned by the council, which is 26,757 properties
# So, with these assumptions in mind:
# We can commit to [x] per annum based on your 54k council-owned, of which approximately 27k are likely to be eligible
# for some form of ECO/GBIS funding. We will work directly with Housing associations to address the remaining 12k
# social properties that may be eligible for funding through ECO/GBIS.
# We will market directly to the 33k private rentals and 155k owner occupiers that are eligible for funding,
# and assuming a 5% conversion, will aim to complete work on

View file

@ -11,7 +11,10 @@ from utils.s3 import read_dataframe_from_s3_parquet
# The mode EPC rating is D, so we associate the £238k valuation with an EPC D property
# Therefore value_of_F * 1.15 = value_of_D * 1.03
# Therefore value_of_F = value_of_D * 1.03/1.15 = 238k * (1.03/1.15) = 213165
PROPERTY_VALUE_ESTIMATE = 213_165
PROPERTY_VALUE_ESTIMATE = 200_000
# UPRNs of properties we need
MANUAL_EXCLUSIONS = []
def aggregate_matches(matching_lookup, company_ownership, properties):
@ -72,10 +75,15 @@ def find_f_g_properties(paths):
epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
epc_data["UPRN"] = epc_data["UPRN"].astype(int).astype(str)
# Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this
epc_data["LODGEMENT_DATETIME"] = pd.to_datetime(epc_data["LODGEMENT_DATETIME"], format='mixed')
if pd.isnull(pd.to_datetime(epc_data["LODGEMENT_DATETIME"], errors="coerce")).sum():
raise Exception("wtf")
epc_data = epc_data.sort_values("LODGEMENT_DATETIME", ascending=False).drop_duplicates("UPRN")
# Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this
epc_data["LODGEMENT_DATETIME"] = pd.to_datetime(epc_data["LODGEMENT_DATETIME"], errors="coerce")
epc_data = epc_data.sort_values(
["LODGEMENT_DATE", "LODGEMENT_DATETIME"], ascending=False
).drop_duplicates("UPRN")
# Get G & F properties
epc_data = epc_data[epc_data["CURRENT_ENERGY_RATING"].isin(["G", "F"])]
@ -84,7 +92,7 @@ def find_f_g_properties(paths):
data = pd.concat(data)
# Save as an excel
data.to_excel("EPC F & G Properties.xlsx", index=False)
data.to_excel("EPC F & G Properties - V2.xlsx", index=False)
def remove_text_in_brackets(address: str) -> str:
@ -196,7 +204,7 @@ def remove_duplicate_matches(matching_lookup, properties, company_ownership):
matches_to_drop[["UPRN", "Title Number"]].copy()
)
to_drop = pd.concat(to_drop)
to_drop = pd.concat(to_drop) if to_drop else pd.DataFrame()
if not to_drop.empty:
merged = pd.merge(matching_lookup, to_drop, on=['UPRN', 'Title Number'], how='left', indicator=True)
@ -245,6 +253,74 @@ def remove_duplicate_uprn_matches(matching_lookup, properties, company_ownership
return matching_lookup
def filter_land_registry(properties):
column_names = [
"transaction_id",
"price",
"date_of_transfer",
"postcode",
"property_type",
"old_new",
"duration",
"paon",
"saon",
"street",
"locality",
"town_city",
"district",
"county",
"ppd_category_type",
"record_status",
]
land_registry = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/pp-complete.csv", header=None)
land_registry.columns = column_names
land_registry = land_registry[
land_registry["postcode"].str.lower().isin(properties["POSTCODE"].str.lower().unique())
]
land_registry["date_of_transfer"] = pd.to_datetime(
land_registry["date_of_transfer"], format="%Y-%m-%d", errors="coerce"
)
# Take data from the last 5 years
land_registry = land_registry[
(land_registry["date_of_transfer"] >= "2019-01-01")
]
# Filter this
land_registry.to_csv(
"/Users/khalimconn-kowlessar/Downloads/land_registry_prices_paid_filtered.csv", index=False
)
def is_substring(x, match_string):
if pd.isnull(x):
return False
return x in match_string.lower()
def house_number_match(paon, house_number):
# Firstly try and convert to numberic
try:
paon_numeric = int(paon)
house_number_numeric = int(house_number)
return paon_numeric == house_number_numeric
except Exception as e: # noqa
# If we can't convert both to numeric, we do an equality
return paon == house_number
def check_equalities(lr_filtered):
all_paon_equal = all(lr_filtered["paon"] == lr_filtered["paon"].values[0])
if pd.isnull(lr_filtered["saon"].values[0]):
all_saon_equal = all(pd.isnull(lr_filtered["saon"]))
else:
all_saon_equal = all(lr_filtered["saon"] == lr_filtered["saon"].values[0])
all_street_equal = all(lr_filtered["street"] == lr_filtered["street"].values[0])
return all_paon_equal, all_saon_equal, all_street_equal
def app():
"""
This script is for scoping property ownership for EPC F & G rated properties in Birmingam, for Goldman Sachs
@ -254,8 +330,8 @@ def app():
# https://epc.opendatacommunities.org/domestic/search?address=&postcode=&local-authority=&constituency
# =&uprn=100031179243&from-month=1&from-year=2008&to-month=12&to-year=2024
# is actually listed in two local authorities causing us to think it's an EPC F & G property, but it's
# it's actually EPC E. Need to handle this, probably by reading in all of the EPC data, concatenating together
# and performing a singular filter for most recent EPC by UPRN
# it's actually EPC E. Need to handle this, probably by reading in all of the EPC data, concatenating
# together and performing a singular filter for most recent EPC by UPRN
# paths = [
# "local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv",
# "local_data/all-domestic-certificates/domestic-E08000031-Wolverhampton/certificates.csv",
@ -293,17 +369,19 @@ def app():
# paths = list(set(paths))
# find_f_g_properties(paths)
properties = pd.read_excel("EPC F & G Properties.xlsx")
company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_04.csv")
properties = pd.read_excel("EPC F & G Properties - V2.xlsx")
# filter_land_registry(properties)
company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_07.csv")
company_ownership["is_overseas"] = False
overseas_company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/OCOD_FULL_2024_04 2.csv")
overseas_company_ownership = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/OCOD_FULL_2024_07.csv")
overseas_company_ownership["is_overseas"] = True
company_ownership = pd.concat([company_ownership, overseas_company_ownership])
# FIlter on relevant postcodes
company_ownership = company_ownership[
company_ownership["Postcode"].str.lower().isin(properties["POSTCODE"].str.lower().unique())]
company_ownership["Postcode"].str.lower().isin(properties["POSTCODE"].str.lower().unique())
]
# Now we filter properties the other way around
properties = properties[properties["POSTCODE"].str.lower().isin(company_ownership["Postcode"].str.lower().unique())]
@ -328,6 +406,8 @@ def app():
~company_ownership["Property Address"].str.lower().str.startswith(starting_term)
]
# address = properties[properties["UPRN"] == 100030253055].squeeze()
freehold_matching_lookup = [] # 634
leasehold_matching_lookup = [] # 86
shared_leasehold_match = []
@ -414,13 +494,16 @@ def app():
freehold_matching_lookup = pd.DataFrame(freehold_matching_lookup)
leasehold_matching_lookup = pd.DataFrame(leasehold_matching_lookup)
shared_leasehold_match = pd.concat(shared_leasehold_match)
shared_freehold_match = pd.concat(shared_freehold_match)
# freehold_matching_lookup.to_excel("freehold_matching_lookup_new.xlsx")
# leasehold_matching_lookup.to_excel("leasehold_matching_lookup_new.xlsx")
# shared_leasehold_match.to_excel("shared_leasehold_match_new.xlsx")
# shared_freehold_match.to_excel("shared_freehold_match_new.xlsx")
# freehold_matching_lookup.to_excel("freehold_matching_lookup V2.xlsx")
# leasehold_matching_lookup.to_excel("leasehold_matching_lookup V2.xlsx")
# freehold_matching_lookup = pd.read_excel("freehold_matching_lookup V2.xlsx")
# leasehold_matching_lookup = pd.read_excel("leasehold_matching_lookup V2.xlsx")
# freehold_matching_lookup.shape
# (1537, 4)
# leasehold_matching_lookup.shape
# (390, 4)
# The approximate matches aren't very good
freehold_matching_lookup = freehold_matching_lookup[freehold_matching_lookup["match_type"] == "exact"]
@ -428,24 +511,313 @@ def app():
# Combine
combined_matching_lookup = pd.concat([freehold_matching_lookup, leasehold_matching_lookup])
# Remove duplicates
combined_matching_lookup = remove_duplicate_matches(combined_matching_lookup, properties, company_ownership)
combined_matching_lookup = remove_duplicate_matches(
matching_lookup=combined_matching_lookup, properties=properties, company_ownership=company_ownership
)
# We also have duplicates at a UPRN level
combined_matching_lookup = remove_duplicate_uprn_matches(combined_matching_lookup, properties, company_ownership)
# There are some cases where we have duplicates
# freehold_matching_lookup = remove_duplicate_matches(freehold_matching_lookup, properties, company_ownership)
# leasehold_matching_lookup = remove_duplicate_matches(leasehold_matching_lookup, properties, company_ownership)
matched_addresses = combined_matching_lookup.merge(
properties[["UPRN", "ADDRESS", "CURRENT_ENERGY_EFFICIENCY", "CURRENT_ENERGY_RATING"]].rename(
columns={"ADDRESS": "epc_address"}),
properties[
[
"UPRN",
"ADDRESS",
"ADDRESS1",
"CURRENT_ENERGY_EFFICIENCY",
"CURRENT_ENERGY_RATING",
"POSTCODE",
"LODGEMENT_DATE",
"TRANSACTION_TYPE"
]
].rename(
columns={
"ADDRESS": "epc_address",
"ADDRESS1": "epc_address1",
"POSTCODE": "epc_postcode"
}
),
how="left", on="UPRN"
).merge(
company_ownership[["Title Number", "Property Address", "Company Registration No. (1)", "Proprietor Name (1)"]],
company_ownership[
[
"Title Number",
"Property Address",
"Postcode",
"Company Registration No. (1)",
"Proprietor Name (1)",
"Date Proprietor Added",
]
],
how="left", on="Title Number"
)
# Let's try and get the house number
matched_addresses["house_number"] = (
matched_addresses["epc_address"]
.apply(remove_text_in_brackets)
.apply(SearchEpc.get_house_number)
.str.lower()
.str.replace(",", "")
)
# Read in land registry
land_registry = pd.read_csv(
"/Users/khalimconn-kowlessar/Downloads/land_registry_prices_paid_filtered.csv",
)
# We now perform a match between the land registry data and the matched address, in an attempt to find
# out when these properties last sold. The land registry data has been pre filtered on the postcodes in this
# data, and for sales within the last 5 years, to ensure the file isn't too large.
land_registry["postcode"] = land_registry["postcode"].str.lower().str.strip()
land_registry["street"] = land_registry["street"].str.lower().str.strip()
land_registry["paon"] = land_registry["paon"].str.lower().str.strip()
land_registry["saon"] = land_registry["saon"].str.lower().str.strip()
land_registry["date_of_transfer"] = pd.to_datetime(land_registry["date_of_transfer"])
land_registry_matches = []
for _, match in tqdm(matched_addresses.iterrows(), total=len(matched_addresses)):
# Filter land registry on the postcode
lr_filtered = land_registry[
(land_registry["postcode"] == match["epc_postcode"].lower().strip())
]
# Filter further, when the street is in in the address
# street should be contained in epc_address
lr_filtered = lr_filtered[
lr_filtered["street"].apply(lambda x: is_substring(x, match["epc_address"].lower())) |
lr_filtered["street"].apply(lambda x: is_substring(x, match["Property Address"].lower()))
]
if lr_filtered.empty:
continue
# We now check if paon is in address 1
lr_filtered["paon_match"] = lr_filtered["paon"].apply(lambda x: house_number_match(x, match["house_number"]))
# We also try the secondary match
lr_filtered["saon_match"] = (
lr_filtered["saon"].apply(
lambda x: False if pd.isnull(x) else is_substring(x, match["epc_address1"])
)
)
# We fileter where we have a primary or secondary match
lr_filtered = lr_filtered[
lr_filtered["paon_match"] | lr_filtered["saon_match"]
]
if lr_filtered.empty:
continue
elif lr_filtered.shape[0] == 1:
land_registry_matches.append(
{
"uprn": match["UPRN"],
"transaction_id": lr_filtered['transaction_id'].values[0],
"price": lr_filtered["price"].values[0],
"date_of_transfer": lr_filtered["date_of_transfer"].values[0],
}
)
continue
elif lr_filtered.shape[0] > 1:
# We make sure all records are the same and take the newest
all_paon_equal, all_saon_equal, all_street_equal = check_equalities(lr_filtered)
has_paon_match = any(lr_filtered["paon_match"])
if all_paon_equal and all_street_equal and all_saon_equal:
# Take the newest record, append and continue
lr_filtered = lr_filtered.sort_values("date_of_transfer", ascending=False)
lr_filtered = lr_filtered.head(1)
land_registry_matches.append(
{
"uprn": match["UPRN"],
"transaction_id": lr_filtered['transaction_id'].values[0],
"price": lr_filtered["price"].values[0],
"date_of_transfer": lr_filtered["date_of_transfer"].values[0],
}
)
continue
elif has_paon_match and all_street_equal:
# Peform filter on paon
lr_filtered = lr_filtered[lr_filtered["paon_match"]]
# Do an addtiioanl equality check
all_paon_equal, all_saon_equal, all_street_equal = check_equalities(lr_filtered)
if all_paon_equal and all_street_equal and all_saon_equal:
lr_filtered = lr_filtered.sort_values("date_of_transfer", ascending=False)
lr_filtered = lr_filtered.head(1)
land_registry_matches.append(
{
"uprn": match["UPRN"],
"transaction_id": lr_filtered['transaction_id'].values[0],
"price": lr_filtered["price"].values[0],
"date_of_transfer": lr_filtered["date_of_transfer"].values[0],
}
)
else:
# We do a match on saon
lr_filtered["saon_match2"] = lr_filtered["saon"].apply(
lambda x: False if pd.isnull(x) else is_substring(x, match["epc_address"])
)
lr_filtered = lr_filtered[lr_filtered["saon_match2"]]
if lr_filtered.empty:
continue
elif lr_filtered.shape[0] == 1:
land_registry_matches.append(
{
"uprn": match["UPRN"],
"transaction_id": lr_filtered['transaction_id'].values[0],
"price": lr_filtered["price"].values[0],
"date_of_transfer": lr_filtered["date_of_transfer"].values[0],
}
)
continue
else:
raise NotImplementedError("wtf")
else:
# We have a final check, based on an observed case
lr_address_1 = " ".join([x.lower().strip() for x in match["Property Address"].split(",")[0:2]])
lr_filtered["paon_match2"] = lr_filtered["paon"].apply(
lambda x: False if pd.isnull(x) else is_substring(x, lr_address_1)
)
lr_filtered = lr_filtered[lr_filtered["paon_match2"]]
if lr_filtered.empty:
continue
elif lr_filtered.shape[0] == 1:
land_registry_matches.append(
{
"uprn": match["UPRN"],
"transaction_id": lr_filtered['transaction_id'].values[0],
"price": lr_filtered["price"].values[0],
"date_of_transfer": lr_filtered["date_of_transfer"].values[0],
}
)
continue
else:
# Check all the same
all_paon_equal, all_saon_equal, all_street_equal = check_equalities(lr_filtered)
# Check saon is house number with exact match
lr_filtered["saon_match2"] = lr_filtered["saon"].apply(
lambda x: False if pd.isnull(x) else house_number_match(x, match["house_number"])
)
# We check if we have a flat
match_flat_number = re.match("flat (\d+)", match["epc_address1"].lower())
match_apartment_number = re.match("apartment (\d+)", match["epc_address1"].lower())
lr_filtered["saon_match3"] = False
if match_flat_number is not None:
# Get out the match
match_flat_number = "flat " + match_flat_number.group(1)
lr_filtered["saon_match3"] = lr_filtered["saon"].apply(
lambda x: False if pd.isnull(x) else x == match_flat_number
)
if match_apartment_number is not None:
# Get out the match
match_apartment_number = "apartment " + match_apartment_number.group(1)
lr_filtered["saon_match3"] = lr_filtered["saon"].apply(
lambda x: False if pd.isnull(x) else x == match_apartment_number
)
if all_paon_equal and all_saon_equal and all_street_equal:
# Take the newest record
lr_filtered = lr_filtered.sort_values("date_of_transfer", ascending=False)
lr_filtered = lr_filtered.head(1)
land_registry_matches.append(
{
"uprn": match["UPRN"],
"transaction_id": lr_filtered['transaction_id'].values[0],
"price": lr_filtered["price"].values[0],
"date_of_transfer": lr_filtered["date_of_transfer"].values[0],
}
)
continue
elif any(lr_filtered["saon_match2"]):
lr_filtered = lr_filtered[lr_filtered["saon_match2"]]
all_saon_equal, all_paon_equal, all_street_equal = check_equalities(lr_filtered)
if all_paon_equal and all_saon_equal and all_street_equal:
# Filter on the newest record
lr_filtered = lr_filtered.sort_values("date_of_transfer", ascending=False)
lr_filtered = lr_filtered.head(1)
if lr_filtered.shape[0] == 1:
land_registry_matches.append(
{
"uprn": match["UPRN"],
"transaction_id": lr_filtered['transaction_id'].values[0],
"price": lr_filtered["price"].values[0],
"date_of_transfer": lr_filtered["date_of_transfer"].values[0],
}
)
continue
elif any(lr_filtered["saon_match3"]):
lr_filtered = lr_filtered[lr_filtered["saon_match3"]]
if lr_filtered.shape[0] == 1:
land_registry_matches.append(
{
"uprn": match["UPRN"],
"transaction_id": lr_filtered['transaction_id'].values[0],
"price": lr_filtered["price"].values[0],
"date_of_transfer": lr_filtered["date_of_transfer"].values[0],
}
)
continue
raise NotImplementedError("wtf")
else:
raise NotImplementedError("What happened here?")
land_registry_matches = pd.DataFrame(land_registry_matches)
# land_registry_matches.to_excel("land_registry_matches.xlsx")
# Check the matches against the addresses
# lr_to_addresses = matched_addresses[
# ["UPRN", "epc_address", "epc_postcode", "Property Address", "Postcode"]
# ].merge(
# land_registry_matches,
# how="inner",
# left_on="UPRN",
# right_on="uprn"
# ).drop(columns=["uprn"]).merge(
# land_registry[["transaction_id", "paon", "saon", "street", "postcode"]],
# how="left", on="transaction_id"
# )
# Merge onto matched addresses
matched_addresses = matched_addresses.merge(
land_registry_matches,
how="left",
left_on="UPRN",
right_on="uprn"
).drop(columns=["uprn"])
# Flag anything that sold in the last year
matched_addresses["sold_recently"] = (
matched_addresses["date_of_transfer"] >= pd.Timestamp.now() - pd.DateOffset(years=1)
)
matched_addresses["sale_lodged_recently"] = (
(pd.to_datetime(matched_addresses["LODGEMENT_DATE"]) >= pd.Timestamp.now() - pd.DateOffset(months=12)) &
(matched_addresses["TRANSACTION_TYPE"].isin(["marketed sale", "non marketed sale"]))
)
# Save this
# matched_addresses.to_excel("combined_aggregate - pre filter 28th July.xlsx", index=False)
# Drop rows on the booleans
matched_addresses = matched_addresses[
~matched_addresses["sold_recently"] &
~matched_addresses["sale_lodged_recently"]
]
# Filter combined_matching_lookup accordingly
combined_matching_lookup = combined_matching_lookup[
combined_matching_lookup["UPRN"].isin(matched_addresses["UPRN"])
]
# shared_freehold_match = pd.DataFrame(shared_freehold_match)
# Strore these files
# freehold_matching_lookup.to_excel("freehold_matching_lookup.xlsx")
@ -457,33 +829,28 @@ def app():
# leasehold_matching_lookup = pd.read_excel("leasehold_matching_lookup.xlsx")
# shared_leasehold_match = pd.read_excel("shared_leasehold_match.xlsx")
freehold_aggregate = aggregate_matches(freehold_matching_lookup, company_ownership, properties)
leasehold_aggregate = aggregate_matches(leasehold_matching_lookup, company_ownership, properties)
# freehold_aggregate = aggregate_matches(freehold_matching_lookup, company_ownership, properties)
# leasehold_aggregate = aggregate_matches(leasehold_matching_lookup, company_ownership, properties)
combined_aggregate = aggregate_matches(
combined_matching_lookup, company_ownership, properties
matching_lookup=combined_matching_lookup,
company_ownership=company_ownership,
properties=properties
)
investment_20m = combined_aggregate[combined_aggregate["cumulative_value"] <= 20_500_000]
investment_50m = combined_aggregate[combined_aggregate["cumulative_value"] <= 51_000_000]
investment_20m_properties = matched_addresses[
matched_addresses["Company Registration No. (1)"].isin(investment_20m["Company Registration No. (1)"])
]
investment_50m_properties = matched_addresses[
matched_addresses["Company Registration No. (1)"].isin(investment_50m["Company Registration No. (1)"])
]
portfolio_epc_data_50m = properties[properties["UPRN"].isin(investment_50m_properties["UPRN"])]
portfolio_epc_data_20m = properties[properties["UPRN"].isin(investment_20m_properties["UPRN"])]
investment_20m_properties.to_excel("investment_20m_properties 28th May.xlsx", index=False)
investment_50m_properties.to_excel("investment_50m_properties 28th May.xlsx", index=False)
# Storing data
# investment_50m_properties.to_excel("investment_50m_properties 28th July.xlsx", index=False)
# Store the EPC data
portfolio_epc_data_50m.to_excel("portfolio_epc_data_50m 28th May.xlsx", index=False)
portfolio_epc_data_20m.to_excel("portfolio_epc_data_20m 28th May.xlsx", index=False)
# portfolio_epc_data_50m.to_excel("portfolio_epc_data_50m 28th July.xlsx", index=False)
# We check if any of these properties are in a conservation area
valuations = pd.read_excel("property value.xlsx")
@ -529,6 +896,48 @@ def company_aggregation():
aggregation.to_excel("Company ownership aggregation.xlsx")
def extract_price_info(text):
# Use regex to find the relevant price information
match = re.search(r'Estimated price\n\nLow£([\d,]+)k\n\n£([\d,]+)k\n\nHigh£([\d,]+)k', text)
if match:
low_price = int(match.group(1).replace(',', '')) * 1000
est_price = int(match.group(2).replace(',', '')) * 1000
high_price = int(match.group(3).replace(',', '')) * 1000
price_info = {
'Zoopla Valuation': est_price,
'Zoopla Lower Bound': low_price,
'Zoopla Upper Bound': high_price
}
return price_info
return None
def get_valuations(portfolio_epc_data_50m):
# This gets blocked pretty quickly by Zoopla
import requests
import time
from tqdm import tqdm
valuation_data = []
for _, property_data in tqdm(portfolio_epc_data_50m.iterrows(), total=len(portfolio_epc_data_50m)):
uprn = property_data["UPRN"]
response = requests.get(
f"https://r.jina.ai/https://www.zoopla.co.uk/property/uprn/{uprn}/"
)
pricing = extract_price_info(response.text)
valuation_data.append(
{
"UPRN": uprn,
**pricing
}
)
time.sleep(2)
def prepare_anonymised_data():
investment_50m_properties = pd.read_excel("investment_50m_properties 28th May.xlsx", header=0)
investment_epc_data = pd.read_excel("portfolio_epc_data_50m 28th May.xlsx", header=0)
@ -603,3 +1012,230 @@ def prepare_anonymised_data():
)
df.to_excel("Property List - 50% redacted.xlsx", index=False)
def adhoc_change_of_portfolio_analysis_july_2024():
"""
This is just some adhoc analysis, which answers some questions which arose upon refreshing the SFR portfolio
in late July 2024
:return:
"""
# Question 1: Which properties in the previous portfolio were in conservation areas or had listed/heritage status?
def answer_q1():
# Data was just stored here:
geospatial_data = pd.read_excel("geospatial_data.xlsx")
special_buildings = geospatial_data[
(geospatial_data["conservation_status"] == 1) |
geospatial_data["is_listed_building"] |
geospatial_data["is_heritage_building"]
]
print(
f"There were {special_buildings.shape[0]} properties in the previous portfolio which were in conservation "
f"areas or had listed/heritage status"
)
print(f"{(special_buildings['conservation_status'] == 1).sum()} were in a conservation area")
print(f"{special_buildings['is_listed_building'].sum()} were listed buildings")
print(f"{special_buildings['is_heritage_building'].sum()} were heritage buildings")
answer_q1()
# Question 2: For each property in the old portfolio, why was it lost?
def answer_q2():
# We read in the previous 50m portfolio
previous_portfolio = pd.read_excel("investment_50m_properties 28th May.xlsx") # 39 owners
new_matched_addresses = pd.read_excel("combined_aggregate - pre filter 28th July.xlsx")
new_portfolio = pd.read_excel("investment_50m_properties 28th July.xlsx") # 69 owners
# dropped units
dropped_units = previous_portfolio[
~previous_portfolio["UPRN"].isin(new_portfolio["UPRN"].values)
]
# Lots of properties are missed out - why
# 1) What was dropped, but was in the matched addresses and therefore was maybe filtered out
dropped_units_matched = dropped_units[
dropped_units["UPRN"].isin(new_matched_addresses["UPRN"])
].copy()
dropped_units_matched = dropped_units_matched.merge(
new_matched_addresses[
["UPRN", 'transaction_id', 'price', 'date_of_transfer', 'sold_recently', 'sale_lodged_recently']
],
how="left", on="UPRN"
)
# 97 units here - how mant were sold
of_which_sold = dropped_units_matched[
dropped_units_matched["sold_recently"]
]
n_sold = of_which_sold.shape[0]
print(f"{n_sold} sold recently ({n_sold / previous_portfolio.shape[0] * 100})%")
of_which_have_sale_epc_but_not_sold = dropped_units_matched[
~dropped_units_matched["sold_recently"] & dropped_units_matched["sale_lodged_recently"]
]
n_with_sale_epc_but_not_yet_sold = of_which_have_sale_epc_but_not_sold.shape[0]
print(
f"{n_with_sale_epc_but_not_yet_sold} have a sale EPC but have not sold yet ("
f"{n_with_sale_epc_but_not_yet_sold / previous_portfolio.shape[0] * 100})%"
)
# What about things that haven't sold or don't look likely to sell
not_sold = dropped_units_matched[
~dropped_units_matched["sold_recently"] & ~dropped_units_matched["sale_lodged_recently"]
]
new_owner_sizes = new_portfolio.groupby(
["Company Registration No. (1)"]
).size().reset_index().rename(columns={0: "Number of Properties"})
new_owner_sizes = new_owner_sizes.sort_values("Number of Properties", ascending=False)
previous_owner_sizes = previous_portfolio.groupby(
["Company Registration No. (1)"]
).size().reset_index().rename(columns={0: "Number of Properties"})
previous_owner_sizes = previous_owner_sizes.sort_values("Number of Properties", ascending=False)
# Let's just confirm that we took in a bigger owner, as we see this unit was still matched
owner_too_small = []
owner_big_enough = []
for _, property in not_sold.iterrows():
owner_reg_id = property["Company Registration No. (1)"]
old_portfolio_owner_size = previous_owner_sizes[
previous_owner_sizes["Company Registration No. (1)"] == owner_reg_id
]
# We make sure that the number of properties is smaller than the new smallest number
if (
old_portfolio_owner_size["Number of Properties"].values[0] >
new_owner_sizes["Number of Properties"].min()
):
owner_big_enough.append(property.to_dict())
continue
owner_too_small.append(property.to_dict())
n_owner_too_small = len(owner_too_small)
owner_big_enough = pd.DataFrame(owner_big_enough)
summary = []
for _, record in owner_big_enough.iterrows():
# Do we have this new owner?
new_owner = new_portfolio[
new_portfolio["Company Registration No. (1)"] == record["Company Registration No. (1)"]
]
if new_owner.empty:
# Why don't we have this new owner
new_owner_data = new_matched_addresses[
new_matched_addresses["Company Registration No. (1)"] == record["Company Registration No. (1)"]
]
new_owner_data_filtered = new_owner_data[
~new_owner_data["sold_recently"] & ~new_owner_data["sale_lodged_recently"]
]
summary.append(
{
"Owner Name": record["Proprietor Name (1)"],
"Owner reg id": record["Company Registration No. (1)"],
"N properties in new portfolio before filtering": new_owner_data.shape[0],
"N properties in new portfolio after filtering": new_owner_data_filtered.shape[0],
}
)
continue
raise Exception("something went wrong")
summary = pd.DataFrame(summary)
not_accounted_for = summary[
(
summary["N properties in new portfolio before filtering"] <
previous_owner_sizes["Number of Properties"].min()
)
]
# We have two owners not accounted for:
# ALLMID LIMITED, 01959058
# CORAL RACING LIMITED, 541600
# What happened to these owners?
new_epc = pd.read_excel("EPC F & G Properties - V2.xlsx")
allmid = previous_portfolio[previous_portfolio["Company Registration No. (1)"] == "01959058"].copy()
# Check if any of the properties are not in the new EPC data
allmid["not_in_new_epc"] = ~allmid["UPRN"].isin(new_epc["UPRN"])
allmid["not_in_matched_pre_filtered"] = ~allmid["UPRN"].isin(new_matched_addresses["UPRN"])
# In the previous portfolio, Allmid had 4 properties and in the re-build, it has just 2. Why?
# Firstly, one of their properties was re-surveyed not at an F/G
# Secondly, one of their properties is no longer owned by them:
# https://www.zoopla.co.uk/property/uprn/100070553074/
# So as an owner, they fell out of the ranking
coral_racing = previous_portfolio[previous_portfolio["Company Registration No. (1)"] == "541600"].copy()
coral_racing["not_in_new_epc"] = ~coral_racing["UPRN"].isin(new_epc["UPRN"])
coral_racing["not_in_matched_pre_filtered"] = ~coral_racing["UPRN"].isin(new_matched_addresses["UPRN"])
# Coral goes down from 4 -> 1 on refresh, so what happened?
# 1) 2 properties had new EPCs and re-scored higher
# 2) 1 property, 85A Market Street, Church Gresley, Swadlincote, DE11 9PN is no longer matched to the ownership
# data, which is correct
# Why were these units lost?
# There's just 1 owner, who is BARHAM PROPERTY LTD
owner_too_big_ids = owner_big_enough["Company Registration No. (1)"].unique()
owner_too_big_names = owner_big_enough["Proprietor Name (1)"].unique()
previous_owner_size = previous_owner_sizes[
previous_owner_sizes["Company Registration No. (1)"].isin(owner_too_big_ids)
]
new_owner_size = new_matched_addresses[
new_matched_addresses["Company Registration No. (1)"].isin(owner_too_big_ids) |
new_matched_addresses["Proprietor Name (1)"].isin(owner_too_big_names)
]
n_unsold = new_owner_size[~new_owner_size["sold_recently"] & ~new_owner_size["sale_lodged_recently"]].shape
# Happy with the justification to this point
assert (
(n_sold + n_with_sale_epc_but_not_yet_sold + n_owner_too_small + len(owner_big_enough)) ==
dropped_units_matched.shape[0]
)
# We now have a list of properties that were lost from the previous iteration to the next that were not matched
dropped_units_unmatched = dropped_units[
~dropped_units["UPRN"].isin(new_matched_addresses["UPRN"])
].copy()
# A few possibilities: They aren't in the EPC data?
new_epc = pd.read_excel("EPC F & G Properties - V2.xlsx")
unmatched_not_in_epc = dropped_units_unmatched[
~dropped_units_unmatched["UPRN"].isin(new_epc["UPRN"])
]
# There are 17 units that have had new EPCs above a G
# Who were the owners? - various, nothing particularly remarkable
(
previous_portfolio[
previous_portfolio["UPRN"].isin(unmatched_not_in_epc["UPRN"])
]["Proprietor Name (1)"].value_counts()
)
# 22 final units to be accounted for...!
unmatched_in_epc = dropped_units_unmatched[
dropped_units_unmatched["UPRN"].isin(new_epc["UPRN"])
]
# Some of them will be due to ownership
# TODO: Read in freehold/leashold data and see how many of these were non-exact matches!
leasehold_matching_lookup = pd.read_excel("leasehold_matching_lookup V2.xlsx")
freehold_matching_lookup = pd.read_excel("freehold_matching_lookup V2.xlsx")
combined_matching_lookup = pd.concat([leasehold_matching_lookup, freehold_matching_lookup])
# THis is 13 matches, all of them approximate
weak_matches = unmatched_in_epc.merge(combined_matching_lookup, how="inner", on="UPRN")
# These have been lost due to ownership updates. This has been checked manually for every unit and there has
# been sale activity for each one, justifying the change in ownership data
remaining_matches = unmatched_in_epc[
~unmatched_in_epc["UPRN"].isin(weak_matches["UPRN"])
]
assert dropped_units.shape[0] == (
(n_sold + n_with_sale_epc_but_not_yet_sold + n_owner_too_small + len(owner_big_enough)) + len(
weak_matches) + unmatched_not_in_epc.shape[0]
)

View file

View file

@ -0,0 +1,378 @@
import inspect
import pandas as pd
from etl.epc.settings import EARLIEST_EPC_DATE
from pathlib import Path
import numpy as np
from utils.s3 import save_csv_to_s3
src_file_path = inspect.getfile(lambda: None)
EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates"
CUSTOMER_DATA_DIRECTORY = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/Data"
USER_ID = 8
PORTFOLIO_ID = 90
def make_asset_list():
"""
Set up a small asset list for the study
"""
# Read in EPC data for Lewes
lewes_directory = EPC_DIRECTORY / "domestic-E07000063-Lewes/certificates.csv"
epc_data = pd.read_csv(lewes_directory, low_memory=False)
# Rename the columns to the same format as the api returns
epc_data.columns = [c.replace("_", "-").lower() for c in epc_data.columns]
# Take just date before the date threshold
epc_data = epc_data[epc_data["lodgement-date"] >= EARLIEST_EPC_DATE]
epc_data = epc_data[~pd.isnull(epc_data["uprn"])]
epc_data["uprn"] = epc_data["uprn"].astype(int).astype(str)
# Take the newest EPC per uprn
epc_data = epc_data.sort_values("lodgement-date").groupby("uprn").last().reset_index()
# /Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/Data/
# We read in the multiple data sources
address_base = pd.read_csv(
f"{CUSTOMER_DATA_DIRECTORY}/OS AddressBase Premium/OS AddressBase Premium.csv",
low_memory=False,
)
# Filter on resi
address_base = address_base[address_base["Primary Code Description"] == "Residential"]
address_base["UPRN"] = address_base["UPRN"].astype(int).astype(str)
pv_potential = pd.read_csv(
f"{CUSTOMER_DATA_DIRECTORY}/Domestic Rooftop PV Potential/Domestic Rooftop PV Potential.csv",
low_memory=False,
)
pv_potential["UPRN"] = pv_potential["UPRN"].astype(int).astype(str)
ashp_potential = pd.read_csv(
f"{CUSTOMER_DATA_DIRECTORY}/Air Source Heat Pump Potential/Air Source Heat Pump Potential.csv",
low_memory=False,
)
ashp_potential["UPRN"] = ashp_potential["UPRN"].astype(int).astype(str)
ashp_potential[ashp_potential["UPRN"] == "100060067063"].squeeze()
insulation_potential = pd.read_csv(
f"{CUSTOMER_DATA_DIRECTORY}/Insulation Potential/Insulation Potential.csv",
low_memory=False,
)
insulation_potential["UPRN"] = insulation_potential["UPRN"].astype(int).astype(str)
renewables_cost = pd.read_csv(
f"{CUSTOMER_DATA_DIRECTORY}/Low Carbon Technology Costs/Low Carbon Technology Costs.csv",
low_memory=False,
)
renewables_cost["UPRN"] = renewables_cost["UPRN"].astype(int).astype(str)
# Merge the EPC data onto address base
asset_list = address_base[
[
"UPRN", "Class Description", "Relative Height - Eaves",
]
].merge(
epc_data[
["uprn", "current-energy-efficiency", "current-energy-rating", "address1", "postcode", "floor-height",
"property-type", "built-form", "co2-emissions-current"]],
how="left",
left_on="UPRN",
right_on="uprn"
).drop(
columns=["uprn"]
).merge(
insulation_potential[["UPRN", "EPC Rating", "Wall Area [m^2]", "Building Area [m^2]"]],
how="left",
on="UPRN"
).rename(
columns={"Wall Area [m^2]": "insulation_wall_area", "Building Area [m^2]": "floor_area"}
)
had_an_epc = asset_list[~pd.isnull(asset_list["current-energy-efficiency"])]
below_b = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 80].shape
below_c = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 69].shape
had_an_epc["energy-efficiency-rating"].value_counts()
asset_list["current-energy-rating"].value_counts()
asset_list["co2-emissions-current"].mean()
# # Get the underlying data of a histograme
import matplotlib.pyplot as plt
n, bins, patches = plt.hist(asset_list["co2-emissions-current"], bins=100, color="blue", alpha=0.7)
#
bins = np.arange(0, asset_list["co2-emissions-current"].max(), 1) # Bins from 50 to 150 with a step of 10
#
# # Step 3: Calculate the frequency of data in each bin
hist, bin_edges = np.histogram(asset_list["co2-emissions-current"], bins=bins)
# Take properties below a B - there are 2844 units
asset_list = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 80]
# Drop caravans
asset_list = asset_list[asset_list["Class Description"] != "Caravan"]
asset_list = asset_list[~pd.isnull(asset_list["current-energy-efficiency"])]
# Take a sample, for properties that have an EPC, with a seed
# asset_list = asset_list.sample(frac=0.5, random_state=42)
AVG_FLOOR_HEIGHT = asset_list["floor-height"].median()
def estimate_n_floors(
building_height, floor_height, address_base_property_description, epc_property_type,
):
if address_base_property_description == "Self Contained Flat (Includes Maisonette / Apartment)":
if epc_property_type in ["Flat"]:
return 1
if epc_property_type == "Maisonette":
return 2
return None
if pd.isnull(floor_height):
return np.round(building_height / AVG_FLOOR_HEIGHT)
return np.round(building_height / floor_height)
# Estimate the number of floors
asset_list["number_of_floors"] = asset_list.apply(
lambda x: estimate_n_floors(
building_height=x["Relative Height - Eaves"],
floor_height=x["floor-height"],
address_base_property_description=x["Class Description"],
epc_property_type=x["property-type"],
),
axis=1
)
# Drop any entires with null floors because that means the ordnance survey data doesn't align with the epc data
asset_list = asset_list[~pd.isnull(asset_list["number_of_floors"])]
# Drop any entries with null insulation wall area
asset_list = asset_list[~pd.isnull(asset_list["insulation_wall_area"])]
# D 0.419929
# C 0.391459
# E 0.160142
# F 0.017794
# G 0.010676
# Total asset list:
# D 0.450409
# C 0.412016
# E 0.110203
# F 0.020263
# G 0.007110
# We do the followings:
# 1) Create final asset list
# 2) Create Non-intrusive recommendations
# 3) Create a third party costing object
cost_testing = renewables_cost.merge(
insulation_potential, how="inner", on="UPRN"
)
cost_testing["cwi_cost_per_m2"] = cost_testing["Insulation - Cavity Wall - Total"] / cost_testing["Wall Area [m^2]"]
# Their cavity wall insulation is £8 per m^2
cost_testing["ewi_cost_per_m2"] = cost_testing["Insulation - External Wall - Total"] / cost_testing[
"Wall Area [m^2]"]
cost_testing["li_cost_per_m2"] = cost_testing["Insulation - Loft - Total"] / cost_testing["Building Area [m^2]"]
cost_testing["underfloor_cost_per_m2"] = cost_testing["Insulation - Under Floor- Total"] / cost_testing[
"Building Area [m^2]"]
final_asset_list = asset_list.rename(
columns={"UPRN": "uprn", "address1": "address", "floor_area": "insulation_floor_area"}
)[["uprn", "address", "postcode", "insulation_wall_area", "insulation_floor_area", "number_of_floors"]]
# Create non-invasive recommendations, which come from the solar potential and ASHP potential data sources
non_invasive_recommendations = []
for _, row in final_asset_list.iterrows():
property_ashp_potential = ashp_potential[
(ashp_potential["UPRN"] == row["uprn"]) & ashp_potential["Overall Suitability Rating"]
]
property_pv_potential = pv_potential[
(pv_potential["UPRN"] == row["uprn"]) & pv_potential["Overall Suitability"]
]
property_costs = renewables_cost[renewables_cost["UPRN"] == row["uprn"]]
property_non_invasive_recs = []
if not property_ashp_potential.empty:
if property_costs.empty:
similar_properties = ashp_potential[
ashp_potential["Overall Suitability Rating"] &
(ashp_potential["Recommended Heat Pump Size [kW]"] ==
property_ashp_potential["Recommended Heat Pump Size [kW]"].values[0])
].merge(
renewables_cost, how="inner", on="UPRN"
)
property_costs = similar_properties[["Air Source Heat Pump - Total"]].mean().to_frame().T
property_non_invasive_recs.append(
{
"type": "air_source_heat_pump",
"suitable": True,
"size": property_ashp_potential["Recommended Heat Pump Size [kW]"].values[0],
"cost": property_costs["Air Source Heat Pump - Total"].values[0],
"ashp_only_heating_recommendation": True
}
)
else:
property_non_invasive_recs.append(
{
"type": "air_source_heat_pump",
"suitable": False
}
)
if not property_pv_potential.empty:
property_non_invasive_recs.append(
{
"type": "solar_pv",
"suitable": True,
"array_wattage": property_pv_potential["Recommended Array Size [kW]"].values[0] * 1000,
"initial_ac_kwh_per_year": property_pv_potential["Annual Generation [kWh]"].values[0],
"panneled_roof_area": property_pv_potential["Roof area suitable for PV [m^2]"].values[0],
"cost": property_costs["Rooftop PV - Total"].values[0],
}
)
else:
property_non_invasive_recs.append(
{
"type": "solar_pv",
"suitable": False
}
)
non_invasive_recommendations.append(
{
"uprn": row["uprn"],
"recommendations": property_non_invasive_recs,
}
)
# Save the asset list
# Store the asset list in s3
filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
save_csv_to_s3(
dataframe=final_asset_list,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
# Store non-invasive recommendations in S3
non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
save_csv_to_s3(
dataframe=pd.DataFrame(non_invasive_recommendations),
bucket_name="retrofit-plan-inputs-dev",
file_name=non_invasive_recommendations_filename
)
# We add a patch to one of the units because there's no data for the built form
# We would be able to handle this automatically in the future, when using OS API
patches = [
{
"uprn": "10033266220",
"built-form": "Semi-Detached",
},
{'uprn': '10033266219', 'built-form': 'Semi-Detached'}
]
# Store patches in s3
patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.json"
save_csv_to_s3(
dataframe=pd.DataFrame(patches),
bucket_name="retrofit-plan-inputs-dev",
file_name=patches_filename
)
# Create three scenarios
body1 = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "A",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": patches_filename,
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"scenario_name": "Demand Reduction - no solid wall, windows, LEDs",
"multi_plan": True,
"exclusions": [
"internal_wall_insulation", "external_wall_insulation", "floor_insulation", "heating", "solar_pv",
"lighting", "windows", "secondary_heating"
],
"budget": None,
}
print(body1)
body2 = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "A",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": patches_filename,
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"scenario_name": "Demand Reduction - no solid wall, floors or heating",
"multi_plan": True,
"exclusions": [
"internal_wall_insulation", "external_wall_insulation", "floor_insulation", "heating", "solar_pv",
],
"budget": None,
}
print(body2)
# 2.5 - full fabric, no decant
body2_5 = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "A",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": patches_filename,
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"scenario_name": "Demand Reduction - no solid wall, floors or heating",
"multi_plan": True,
"exclusions": [
"internal_wall_insulation", "floor_insulation", "heating", "solar_pv",
],
"budget": None,
}
print(body2_5)
# Scenario B
body3 = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "A",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": patches_filename,
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"scenario_name": "Demand Reduction, Heating Systems, Solar PV - no solid wall or floors",
"multi_plan": True,
"exclusions": ["internal_wall_insulation", "external_wall_insulation", "floor_insulation"],
"budget": None,
}
print(body3)
# Scenario 4 - deep fabric, no IWI, floor
body4 = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "A",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": patches_filename,
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"scenario_name": "Whole House",
"multi_plan": True,
"budget": None,
}
print(body4)

View file

@ -0,0 +1,417 @@
from tqdm import tqdm
import pandas as pd
import numpy as np
from sqlalchemy.orm import sessionmaker
from backend.app.db.connection import db_engine
from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, Scenario
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
def get_data(portfolio_id, scenario_ids):
session = sessionmaker(bind=db_engine)()
session.begin()
# Get properties and their details for a specific portfolio
properties_query = session.query(
PropertyModel,
PropertyDetailsEpcModel
).join(
PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id
).filter(
PropertyModel.portfolio_id == portfolio_id # Filter by portfolio ID
).all()
# Transform properties data to include all fields dynamically
properties_data = [
{**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns},
**{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in
PropertyDetailsEpcModel.__table__.columns}}
for prop in properties_query
]
# Get property IDs from fetched properties
# Get plans linked to the fetched properties
plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all()
# Transform plans data to include all fields dynamically
plans_data = [
{col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
for plan in plans_query
]
# Extract plan IDs for filtering recommendations through PlanRecommendations
plan_ids = [plan['id'] for plan in plans_data]
# Get recommendations through PlanRecommendations for those plans and that are default
recommendations_query = session.query(
Recommendation,
Plan.scenario_id
).join(
PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id
).join(
Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id
).filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default == True # Filtering for default recommendations
).all()
# Transform recommendations data to include all fields dynamically and include scenario_id
recommendations_data = [
{**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') else getattr(rec,
col.name) for
col in Recommendation.__table__.columns},
"Scenario ID": rec.scenario_id}
for rec in recommendations_query
]
session.close()
return properties_data, plans_data, recommendations_data
def estimate_post_retrofit_heating_hotwater_kwh(properties_df, recommendations_df, scenario_ids):
# properties_starting_with_electric_heating = properties_df[
# properties_df["mainfuel"].isin(
# ["Electricity not community", "Electricity electricity unspecified tariff"]
# )
# ]["id"].tolist()
# Get the recommendations for the scenario, default
scenario_comparison_df = []
scenario_comparison_df_2 = []
cost_per_kwh_saved_table = []
for scenario_id in scenario_ids:
# Get the recommendations for the scenario, default
scenario_recommendations = recommendations_df[
(recommendations_df["Scenario ID"] == scenario_id) &
(recommendations_df["default"] == True)
].copy()
scenario_recommendations['ligting_kwh'] = scenario_recommendations.apply(
lambda x: x['kwh_savings'] if x['type'] == 'low_energy_lighting' else 0,
axis=1)
scenario_recommendations['solar_kwh'] = scenario_recommendations.apply(
lambda x: x['kwh_savings'] if x['type'] == 'solar_pv' else 0, axis=1)
# Set 'Estimated Kwh Savings' to zero where specific kwh columns are used
scenario_recommendations['Estimated Kwh Savings'] = scenario_recommendations.apply(
lambda x: 0 if x['type'] in ['low_energy_lighting', 'solar_pv'] else x[
'kwh_savings'], axis=1)
# We need to determine if any of the properties start with electric heating or end with it
# property_electric_heating = []
# for pid, recs in scenario_recommendations.groupby("property_id"):
# has_ashp = recs[recs["description"].str.contains("air source heat pump")]
# if not has_ashp.empty:
# property_electric_heating.append(pid)
# continue
# has_heating_rec = recs[recs["description"].str.contains("high heat retention electric")]
# if not has_heating_rec.empty:
# property_electric_heating.append(pid)
# continue
grouped_data = scenario_recommendations.groupby(['property_id']).agg({
'Estimated Kwh Savings': 'sum',
'ligting_kwh': 'sum',
'solar_kwh': 'sum',
"estimated_cost": "sum"
}).reset_index()
comparison = properties_df.drop_duplicates().merge(
grouped_data, on=["property_id"], how="left"
)
comparison["Post Retrofit Heating & Hotwater kwh"] = (
comparison["current_energy_demand_heating_hotwater"] - \
comparison["Estimated Kwh Savings"]
)
avgs = comparison[['current_energy_demand_heating_hotwater', 'Post Retrofit Heating & Hotwater kwh']].mean()
# We now, for properties that have a plan, do a before and after
with_savings = comparison[~pd.isnull(comparison["Estimated Kwh Savings"])]
avgs2 = with_savings[
['current_energy_demand_heating_hotwater', 'Post Retrofit Heating & Hotwater kwh']].mean()
avgs2["difference"] = avgs2["current_energy_demand_heating_hotwater"] - avgs2[
"Post Retrofit Heating & Hotwater kwh"]
avgs2["percentage_reduction"] = 100 * avgs2["difference"] / avgs2["current_energy_demand_heating_hotwater"]
# We also calculate the cost per kwh saves
total_kwh_saved = (
with_savings["Estimated Kwh Savings"].sum() +
with_savings["ligting_kwh"].sum() +
with_savings["solar_kwh"].sum()
)
total_cost = with_savings["estimated_cost"].sum()
cost_per_kwh_saved = total_cost / total_kwh_saved
scenario_comparison_df.append({"scenario_id": scenario_id, **avgs})
scenario_comparison_df_2.append({"scenario_id": scenario_id, **avgs2})
cost_per_kwh_saved_table.append({"scenario_id": scenario_id, "cost_per_kwh_saved": cost_per_kwh_saved})
scenario_comparison_population = pd.DataFrame(scenario_comparison_df)
scenario_comparison_retrofitted_units = pd.DataFrame(scenario_comparison_df_2)
cost_per_kwh_saved_table = pd.DataFrame(cost_per_kwh_saved_table)
return scenario_comparison_population, scenario_comparison_retrofitted_units, cost_per_kwh_saved_table
def slides():
# Prepares the information required for the slides
# Right now this is the second version of the nehaven portfolio
portfolio_id = 90
# Look at one scenario at a time, otherwise this is agony
scenario_ids = [47, 48, 49, 50, 51]
properties_data, plans_data, recommendations_data = get_data(portfolio_id, scenario_ids)
properties_df = pd.DataFrame(properties_data)
plans_df = pd.DataFrame(plans_data)
recommendations_df = pd.DataFrame(recommendations_data)
if properties_df.shape[0] != 2553:
raise ValueError("The number of unique properties is not 2553")
# Q1: What is the baseline heating and energy demand for the properties in the portfolio - baseline?
heating_hotwater_kwh = (
properties_df[['current_energy_demand', 'current_energy_demand_heating_hotwater']]
.mean()
)
# Q2: For each scenario, what is for what is the heating and hot water kwh after retrofit, on the entire
# popoulation (incl those without retrofit) and for just those being retrofit
# We also calculat the cost per kwh saved
scenario_comparison_population, scenario_comparison_retrofitted_units, cost_per_kwh_saved_table = (
estimate_post_retrofit_heating_hotwater_kwh(properties_df, recommendations_df, scenario_ids)
)
# Q3: For each scenario, we want to answer what the heating and hot water kwh looks like after retrofit
# We need to take recommndations that affect just the heating and hot water
# By property
recommendations_df["type_mapped"] = recommendations_df["type"].copy().replace(
{
"loft_insulation": "roof_insulation",
"room_roof_insulation": "roof_insulation",
"flat_roof_insulation": "roof_insulation",
"hot_water_tank_insulation": "other",
"cylinder_thermostat": "other",
"sealing_open_fireplace": "other",
"suspended_floor_insulation": "floor_insulation",
"solid_floor_insulation": "floor_insulation",
}
)
recommendations_df["type_mapped"] = np.where(
recommendations_df["description"].str.contains("air source heat pump"),
"air_source_heat_pump",
recommendations_df["type_mapped"]
)
# Group by 'Plan Name' and 'Recommendation Type' and count unique 'Property ID'
recommendation_summary = recommendations_df[recommendations_df["default"] == True].groupby(
['Scenario ID', 'type_mapped']
).agg({
'property_id': 'nunique'
}).reset_index()
recommendation_summary.columns = ['Scenario ID', 'Type Mapped', 'Number of Properties']
recommendation_summary["Percentage of Properties"] = 100 * (
recommendation_summary["Number of Properties"] / properties_df["id"].nunique()
)
recommendation_summary_final_scenario = recommendation_summary[recommendation_summary["Scenario ID"].isin([51])]
# MVP implementation of funding estimation for the most basic scenario, using GBIS
project_scores_matrix = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv")
def find_abs(sap_movement, starting_sap, floor_area):
starting_band = find_band(starting_sap)
finishing_band = find_band(starting_sap + sap_movement)
if starting_band == finishing_band:
return 0
if floor_area <= 72:
floor_area_segment = '0-72'
elif (floor_area > 72) and (floor_area <= 97):
floor_area_segment = "73-97"
elif (floor_area > 97) and (floor_area <= 199):
floor_area_segment = "98-199"
else:
floor_area_segment = "200+"
return project_scores_matrix[
(project_scores_matrix["Floor Area Segment"] == floor_area_segment) &
(project_scores_matrix["Starting Band"] == starting_band) &
(project_scores_matrix["Finishing Band"] == finishing_band)
].squeeze()["Cost Savings"]
eco4_scores_sap_table = [
{'Band': 'High_A', 'From': 96.0, 'Up to': 100.0, 'Mid-point': 98.0},
{'Band': 'Low_A', 'From': 92.0, 'Up to': 96.0, 'Mid-point': 94.0},
{'Band': 'High_B', 'From': 86.0, 'Up to': 91.0, 'Mid-point': 88.5},
{'Band': 'Low_B', 'From': 81.0, 'Up to': 86.0, 'Mid-point': 83.5},
{'Band': 'High_C', 'From': 74.5, 'Up to': 80.0, 'Mid-point': 77.25},
{'Band': 'Low_C', 'From': 69.0, 'Up to': 74.5, 'Mid-point': 71.75},
{'Band': 'High_D', 'From': 61.5, 'Up to': 68.0, 'Mid-point': 64.75},
{'Band': 'Low_D', 'From': 55.0, 'Up to': 61.5, 'Mid-point': 58.25},
{'Band': 'High_E', 'From': 46.5, 'Up to': 54.0, 'Mid-point': 50.25},
{'Band': 'Low_E', 'From': 39.0, 'Up to': 46.5, 'Mid-point': 42.75},
{'Band': 'High_F', 'From': 29.5, 'Up to': 38.0, 'Mid-point': 33.75},
{'Band': 'Low_F', 'From': 21.0, 'Up to': 29.5, 'Mid-point': 25.25},
{'Band': 'High_G', 'From': 10.5, 'Up to': 20.0, 'Mid-point': 15.25},
{'Band': 'Low_G', 'From': 1.0, 'Up to': 10.5, 'Mid-point': 5.75}
]
eco4_scores_sap_table = pd.DataFrame(eco4_scores_sap_table)
def find_band(value):
# Iterate through each row in the DataFrame to find the correct band
value_floored = np.floor(value)
return eco4_scores_sap_table[
(eco4_scores_sap_table["From"] <= value_floored) & (eco4_scores_sap_table["Up to"] >= value_floored)
].squeeze()["Band"]
def identify_funding_measure(p, p_recs, is_social):
measures = ["cavity_wall_insulation", "loft_insulation"]
property_abs = []
for m in measures:
funding_measure = p_recs[p_recs["type"] == m]
if not funding_measure.empty:
funding_measure = funding_measure.squeeze()
project_abs = find_abs(
sap_movement=funding_measure["sap_points"],
starting_sap=p["current_sap_points"],
floor_area=p["total_floor_area"]
)
property_abs.append({
"property_id": p["property_id"],
"measure": funding_measure["type"],
"cost": funding_measure["estimated_cost"],
"abs": project_abs,
"is_social": is_social
})
if not property_abs:
return None
property_abs = pd.DataFrame(property_abs).sort_values("cost", ascending=False)
property_abs = property_abs.head(1).to_dict(orient="records")[0]
return property_abs
social_tenure = ["rental (social)", "Rented (social)"]
scenario_recs = recommendations_df[recommendations_df["Scenario ID"].isin([47])]
funding = []
for _, p in tqdm(properties_df.iterrows(), total=len(properties_df)):
p_recs = scenario_recs[scenario_recs["property_id"] == p["property_id"]]
if p_recs.empty:
continue
if (p["tenure"] in social_tenure) and (p["current_sap_points"] < 69):
f = identify_funding_measure(p, p_recs, True)
if f:
funding.append(f)
continue
if p["current_sap_points"] < 69:
f = identify_funding_measure(p, p_recs, False)
if f:
funding.append(f)
continue
funding = pd.DataFrame(funding)
conservative_abs = 20
funding["expected_funding"] = funding["abs"] * conservative_abs
# We take rows where the expected funding is higher than the cost of the works + 10%
funding = funding[funding["expected_funding"] >= (funding["cost"] * 1.15)]
# From the owner of the properties, the funding that they see is just the cost of the works. The actual funding
# recieved will go to the installer
# We now look at the social funding
social_funding = funding[funding["is_social"]]["cost"].sum()
# For the private funding, we need to scale this to consider the fact that only a proportion of the properties
# will qualify due to needing the property to fall into council tax bands A - D, and that only some of the tenants
# will meet the benefits criteria
private_funding = funding[~funding["is_social"]]["cost"].sum()
# 51% of households are recipients of benefits in the South East, in the UK
# (2021/2022 - https://www.statista.com/statistics/382858/uk-state-benefits-by-region/)
# We also need to deduce the % of properties in council tax bands A - D
# 2023 council tax bands:
# https://www.gov.uk/government/statistics/council-tax-stock-of-properties-2023/council-tax-stock-of-properties
# -statistical-commentary
band_a_proportion = 0.239
band_b_proportion = 0.195
band_c_proportion = 0.219
band_d_proportion = 0.156
a_to_d_proportion = band_a_proportion + band_b_proportion + band_c_proportion + band_d_proportion
benefits_proportion = 0.51
# Note: It's probable that an occupant of a property in council tax bands A-D is more likely to be on benefits,
# however we retain the regional average to be conservative
# We scale the private funding based on these two factors
private_funding_scaled = private_funding * benefits_proportion * a_to_d_proportion
n_private_projects = np.round((~funding["is_social"]).sum() * benefits_proportion * a_to_d_proportion)
# Look at the impact of EWI for scenario
ewi_jobs = recommendations_df[
(recommendations_df["Scenario ID"] == 49) & (recommendations_df["type"] == "external_wall_insulation")
]
ewi_jobs["estimated_cost"].sum()
has_cavity = recommendations_df[
(recommendations_df["type"] == "cavity_wall_insulation") & (recommendations_df["Scenario ID"] == 47)
]
# Take the some properties in this
cavity_units = properties_df[properties_df["property_id"].isin(has_cavity["property_id"].values)]
cavity_units[cavity_units.index == 3][["uprn", "property_id"]]
z = recommendations_df[recommendations_df["property_id"] == 24525]
# Recommenation type by kwh savings per unit
recommendations_final_scenario = recommendations_df[
recommendations_df["Scenario ID"].isin([51]) &
(recommendations_df["default"] == True)
].copy()
# Merge on floor area
recommendations_final_scenario = recommendations_final_scenario.merge(
properties_df[["property_id", "total_floor_area"]], on="property_id", how="left"
)
recommendations_final_scenario = recommendations_final_scenario[
~pd.isnull(recommendations_final_scenario["total_floor_area"])]
recommendations_final_scenario["kwh_savings_per_unit"] = recommendations_final_scenario["kwh_savings"] / \
recommendations_final_scenario["total_floor_area"]
recommendations_final_scenario["type_mapped2"] = recommendations_df["type"].copy().replace(
{
"room_roof_insulation": "roof_insulation",
"flat_roof_insulation": "roof_insulation",
"hot_water_tank_insulation": "other",
"cylinder_thermostat": "other",
"sealing_open_fireplace": "other",
"suspended_floor_insulation": "floor_insulation",
"solid_floor_insulation": "floor_insulation",
}
)
aggs = recommendations_final_scenario.groupby("type_mapped")[
["kwh_savings_per_unit", "estimated_cost"]].mean().reset_index().sort_values(
"kwh_savings_per_unit", ascending=False
)
aggs["cost_per_kwh_saved"] = aggs["estimated_cost"] / aggs["kwh_savings_per_unit"]
# Show more columns with pandas
pd.set_option('display.max_columns', None)
# Show more rows with pandas
pd.set_option('display.max_rows', None)
# Show more characters in a column
pd.set_option('display.max_colwidth', None)

View file

@ -0,0 +1,420 @@
import pandas as pd
import numpy as np
from backend.SearchEpc import SearchEpc
from dotenv import load_dotenv
from tqdm import tqdm
import os
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
def clean_colnames(df):
secondary_cols = ["" if pd.isnull(x) else x for x in df.iloc[0, :].values]
new_colnames = [
"+".join([df.columns[i], secondary_cols[i]]) if secondary_cols[i] else df.columns[i]
for i, c in enumerate(df.columns)
]
# Drop row 0
df = df.drop(0)
df.columns = new_colnames
return df
def lesney_farms():
"""
Some rough and ready analysis to get a view of what the achetypes could be, ahead of a meeting with Wates
on the 28th Aug 2024
:return:
"""
all_locations = [
"Forest Road Erith",
"Lesney Farms",
"Brook Street 155 - 243",
"Hazel Drive",
"Page Crescent",
"Brook Salmon Roberts and Chapma",
"Beacon Road"
]
all_assets = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Orbit - Wates/Bexley Wave 3 Project - external - "
"reduced.xlsx",
sheet_name="Full Property List",
header=1
)
all_assets = clean_colnames(all_assets)
all_assets["Location"] = None
locations = {
location_name: clean_colnames(pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Orbit - Wates/Bexley Wave 3 Project - external - "
"reduced.xlsx",
sheet_name=location_name,
header=1
)) for location_name in all_locations
}
for loc in all_locations:
all_assets["Location"] = np.where(
all_assets["Asset Reference"].isin(locations[loc]["Asset Reference"]),
loc,
all_assets["Location"]
)
if pd.isnull(all_assets["Location"]).sum():
raise Exception("something went wrong")
# 234 properties below EPC C
below_epc_c = all_assets[all_assets["PRE CALCULATED EPC"].isin(["D", "E", "F", "G"])].copy()
# We simplify wall type
below_epc_c["wall_type_simplified"] = below_epc_c["Wall Type"].str.split(" ").str[0]
known_no_epc = [
28679, # These is no EPC for 11 Page Crescent, Erith, Kent, DA8 2HJ, just 11A
29291, # No EPC for 225 Slade Green Road, Erith, Kent, DA8 2JW
]
# Get the EPC data
# epc_data = []
# for _, home in tqdm(all_assets.iterrows(), total=len(all_assets)):
# if home["Asset Reference"] in known_no_epc:
# continue
#
# address = home["Address"]
# # Spelling error
# if "Frinstead" in address:
# address = address.replace("Frinstead", "Frinsted")
#
# address1 = address.split(",")[0]
#
# asset_type_map = {
# "HOUSE": "House",
# "BUNGALOWS": "Bungalow",
# "FLATS": "Flat",
# "MAISONETTES": "Maisonette",
# }
#
# searcher = SearchEpc(
# address1=address1,
# postcode=home["Address - Postcode"],
# auth_token=EPC_AUTH_TOKEN,
# os_api_key="",
# full_address=address,
# )
# searcher.ordnance_survey_client.property_type = asset_type_map[home["Asset Type"]]
# searcher.ordnance_survey_client.built_form = None
#
# searcher.find_property(skip_os=True)
# if searcher.newest_epc is None:
# raise Exception("Couldn't find")
#
# epc_data.append(
# {
# "Asset Reference": home["Asset Reference"],
# **searcher.newest_epc.copy()
# }
# )
#
# epc_data = pd.DataFrame(epc_data)
epc_data = pd.read_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Orbit - Wates/Bexley EPC data.csv", )
# epc_data.to_csv(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Orbit - Wates/Bexley EPC data.csv", index=False
# )
epc_comparison = all_assets[
['Asset Reference', 'Address', 'PRE CALCULATED EPC']
].merge(
epc_data[["Asset Reference", "current-energy-rating", "lodgement-date"]],
on='Asset Reference',
how="left"
)
# There are a large # of properties (147) that have different pre calcualted EPC rating, to what's on the registry
# These may be internally held EPRs but this may inform which properties we might want to prioritise for survey
different_epcs = epc_comparison[
epc_comparison["PRE CALCULATED EPC"] != epc_comparison["current-energy-rating"]
]
not_c = different_epcs[
(different_epcs["PRE CALCULATED EPC"] == "C") &
(different_epcs["current-energy-rating"] != "C")
]
system_builds = below_epc_c[
below_epc_c["Wall Type"].str.contains("SystemBuilt")
].copy()
combinations = system_builds[
['Asset Type', 'Property Type', 'Location', 'PRE CALCULATED EPC', 'Wall Type', ]
].drop_duplicates()
system_build_data_comparison = system_builds.merge(
epc_data[
["Asset Reference", "walls-description", "roof-description", "current-energy-rating", "lodgement-date",
"current-energy-efficiency"]],
left_on='Asset Reference',
right_on='Asset Reference',
how="left"
)
# Apply patches
patches = {
25847: {"Property Type": "Semi Detached House"},
}
for asset_ref, patch in patches.items():
for k, v in patch.items():
system_build_data_comparison.loc[
system_build_data_comparison["Asset Reference"] == asset_ref,
k
] = v
archetype_columns = [
["Asset Type", "Property Type", "Wall Type", "Location"],
["Asset Type", "Property Type", "Location"],
["Asset Type", "Property Type", "Wall Type", "Location", "PRE CALCULATED EPC", "roof-description"],
["Asset Type", "Property Type", "Location", "PRE CALCULATED EPC"]
]
summary = []
for cols in archetype_columns:
combinations = system_build_data_comparison[cols].drop_duplicates()
summary.append(
{
"cols": cols,
"number_archetypes": len(combinations),
}
)
summary = pd.DataFrame(summary)
# Let's use this column combination
chosen_combination = [
"Asset Type", "Property Type", "Wall Type", "Location", "PRE CALCULATED EPC", "roof-description"
]
# For this combination, let's find the properties
archetype_combinations = system_build_data_comparison[chosen_combination].drop_duplicates().reset_index(drop=True)
archetype_combinations["archetype ID"] = archetype_combinations.index
archetyped_data = system_build_data_comparison.merge(
archetype_combinations, how="left", on=chosen_combination
)
counts = archetyped_data["archetype ID"].value_counts()
# Archetype 0: Semi D, As built system built, Pre calculated EPC D, flat insulated roof, (Lesney-0)
# Archetype 1: Semi D, Externally insulated system built, Pre calculated EPC D, flat insulated roof (Lesney-1)
# Archetype 4: Semi D, System built with unknown insulation, Pre calculated EPC D, flat roof insulated (Lesney-2)
# Archetype 3: Semi D, Externally insulated system built, Pre calculated EPC D, flat roof uninsulated (assumed) (
# Lesney-3)
# 0 21
# 1 11
# 4 11
# 3 3
# 2 1
# 5 1
# 6 1
# 7 1
# 8 1
# 9 1
# This archetype is the same as 0, apart from the pre calculate EPC being an E. The registry says this is a D
# This has been added to additonal units
eg1 = archetyped_data[archetyped_data["archetype ID"] == 2]
# Semi D, System built with unknown insulation, Pre calculated EPC D, flat roof insulated
# This looks like it would fit either in archetype
eg2 = archetyped_data[archetyped_data["archetype ID"] == 5]
eg3 = archetyped_data[archetyped_data["archetype ID"] == 6]
# Archetypes 7, 8, 9 are all similar, Semi D, Uninsulated system built, with pitched lofts with up to 200mm
# insulation in the lofts
# It's just the three units
# They're all labelled as
pitched_system_built_properties = archetyped_data[archetyped_data["archetype ID"].isin([9, 10, 11])]
pitched_system_built_properties["Address"]
notes = [
{
"Asset Reference": 27445,
"note": "Confirmed this has a pitched roof on Maps"
},
{
"Asset Reference": 27443,
"note": "Confirmed this has a pitched roof on Maps"
},
{
"Asset Reference": 27442,
"note": "Confirmed this has a pitched roof on Maps"
},
{
"Asset Reference": 25847,
"note": "This is labelled as a mid-terrace but the EPC data + Maps suggest it's a semi-detached"
}
]
# These are As Built, System Built
system_built_streets = (
archetyped_data["Address"].str.split(",").str[0].str.split(" ").str[1].unique()
)
all_assets_w_epcs = all_assets.merge(epc_data, on="Asset Reference", how="left")
# Grab all of the properties on this street that aren't system built
streets_not_system_builds = all_assets_w_epcs[
all_assets_w_epcs["Address"].str.split(",").str[0].str.split(" ").str[1].isin(system_built_streets) &
~all_assets_w_epcs["Wall Type"].str.contains("SystemBuilt")
]
system_builds = archetyped_data[
archetyped_data["Wall Type"].str.contains("SystemBuilt")
][["Asset Reference", "Address", "Wall Type", "walls-description"]].sort_values("Address")
birling_street_system_builds = system_builds[system_builds["Address"].str.contains("Birling")]
halstead_street_system_builds = system_builds[system_builds["Address"].str.contains("Halstead")]
brasted_street_system_builds = system_builds[system_builds["Address"].str.contains("Brasted")]
frinstead_street_system_builds = system_builds[
system_builds["Address"].str.contains("Frinstead") | system_builds["Address"].str.contains("Frinsted")
]
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
streets_not_system_builds[["Asset Reference", "Address", "Wall Type", "walls-description"]]
system_builds[system_builds["Address"].str.contains("Birling")]
# Possible System Builds
# Create the proposed sample
# lesney-0
archetyped_data["lodgement-date"] = pd.to_datetime(archetyped_data["lodgement-date"])
lesney_0 = archetyped_data[archetyped_data["archetype ID"] == 0].copy()
# Get the oldest EPC per postcode
lesney_0 = lesney_0.sort_values(["Address - Postcode", "lodgement-date"])
lesney_0[["Address", "Address - Postcode", "lodgement-date"]]
lesney_1 = archetyped_data[archetyped_data["archetype ID"] == 1].copy()
lesney_1 = lesney_1.sort_values(["Address - Postcode", "lodgement-date"])
lesney_1[["Address", "Address - Postcode", "lodgement-date"]]
lesney_2 = archetyped_data[archetyped_data["archetype ID"] == 4].copy()
lesney_2 = lesney_2.sort_values(["Address - Postcode", "lodgement-date"])
lesney_2[["Address", "Address - Postcode", "lodgement-date"]]
lesney_3 = archetyped_data[archetyped_data["archetype ID"] == 3].copy()
lesney_3 = lesney_3.sort_values(["Address - Postcode", "lodgement-date"])
lesney_3[["Address", "Address - Postcode", "lodgement-date", "roof-description"]]
# Get the pitched roof properties, which are lesney-4
lesney_4 = archetyped_data[archetyped_data["archetype ID"].isin([7, 8, 9])].copy()
lesney_4 = lesney_4.sort_values(["Address - Postcode", "lodgement-date"])
lesney_4[["Address", "Address - Postcode", "lodgement-date", "roof-description"]]
assigned_archetypes = archetyped_data[
["Asset Reference", "archetype ID", "Address", "Address - Postcode"] + chosen_combination +
["lodgement-date", "current-energy-rating", "current-energy-efficiency", "walls-description"]
].copy()
# Map the archetype ID to their string representation
assigned_archetypes["archetype ID"] = assigned_archetypes["archetype ID"].replace(
{
0: "Lesney-0",
1: "Lesney-1",
4: "Lesney-2",
3: "Lesney-3",
7: "Lesney-4",
8: "Lesney-4",
9: "Lesney-4",
2: "Lesney-0",
5: "Lesney-2",
6: "Lesney-0",
}
)
assigned_archetypes["Asset Reference"] = assigned_archetypes["Asset Reference"].astype(int)
assigned_archetypes.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Orbit - Wates/assigned_archetypes.csv", index=False
)
def culworth_court():
"""
Some rough works on Cuthwork Court
They're looking at an ASHP/GSHP
:return:
"""
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Orbit - Wates/001 - EPC CULWORTH COURT.xlsx",
sheet_name="EPC C",
header=1
)
asset_list = clean_colnames(asset_list)
# Let's get the EPC data
# Get the EPC data
epc_data = []
for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
address = home["Address"]
# Spelling error
if "Frinstead" in address:
address = address.replace("Frinstead", "Frinsted")
address1 = address.split(",")[0]
asset_type_map = {
"HOUSE": "House",
"BUNGALOWS": "Bungalow",
"FLATS": "Flat",
"MAISONETTES": "Maisonette",
}
searcher = SearchEpc(
address1=address1,
postcode=home["Address - Postcode"],
auth_token=EPC_AUTH_TOKEN,
os_api_key="",
full_address=address,
)
searcher.ordnance_survey_client.property_type = asset_type_map[home["Asset Type"]]
searcher.ordnance_survey_client.built_form = None
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
raise Exception("Couldn't find")
epc_data.append(
{
"Asset Reference": home["Asset Reference"],
**searcher.newest_epc.copy()
}
)
epc_data = pd.DataFrame(epc_data)
asset_list = asset_list.merge(epc_data, on="Asset Reference", how="left")
asset_list["floor-level"] = np.where(
asset_list["floor-level"] == "NODATA!",
"",
asset_list["floor-level"]
)
asset_list["built-form"] = np.where(
asset_list["built-form"] == "Enclosed End-Terrace",
"End-Terrace",
asset_list["built-form"]
)
archetype_combinations = asset_list[
["Asset Type", "Property Type", "built-form", "floor-level"]
].drop_duplicates()
z = asset_list[asset_list["built-form"] == "Enclosed End-Terrace"]

View file

@ -0,0 +1,141 @@
import pandas as pd
from utils.s3 import save_csv_to_s3
USER_ID = 8
PORTFOLIO_ID = 100
def app():
"""
This function sets up an asset list with just a few properties to model the impact of the following scenarios:
1) EWI
2) EWI + Solar
3) EWI + Solar + ASHP
:return:
"""
asset_list = [
# This is an example of a low D - SAP score is 60
{
"address": "37, Birling Road",
"postcode": "DA8 3JQ",
"uprn": 100020225444
},
{
"address": "16, Brasted Road",
"postcode": "DA8 3HU",
"uprn": 100020225805
},
{
"address": "25, Birling Road",
"postcode": "DA8 3JQ",
"uprn": 100020225432,
},
{
"address": "4, Halstead Road",
"postcode": "DA8 3HX",
"uprn": 100020229555
}
]
asset_list = pd.DataFrame(asset_list)
filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
save_csv_to_s3(
dataframe=asset_list,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
non_invasive_recs = []
for _, al in asset_list.iterrows():
solar_rec = {
"type": "solar_pv",
"suitable": True,
"array_wattage": 4000,
"initial_ac_kwh_per_year": 3800,
"cost": 4009,
"panneled_roof_area": 20 # Rough estimate for 10 panels, around 1m x 1.8m (accomodate gaps and 30cm edge)
}
non_invasive_recs.append({
"uprn": al["uprn"],
"recommendations": [solar_rec],
})
# Store non-invasive recommendations in S3
non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
save_csv_to_s3(
dataframe=pd.DataFrame(non_invasive_recs),
bucket_name="retrofit-plan-inputs-dev",
file_name=non_invasive_recommendations_filename
)
body1 = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "A",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": "",
"scenario_name": "ECO4 funding - EWI",
"multi_plan": True,
"exclusions": [
"internal_wall_insulation",
"roof_insulation", "ventilation", "floor_insulation", "windows", "fireplace", "heating", "hot_water",
"lighting", "secondary_heating", "solar_pv"
],
"budget": None,
}
print(body1)
body2 = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "A",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"scenario_name": "ECO4 funding - EWI + Solar",
"multi_plan": True,
"exclusions": [
"internal_wall_insulation",
"roof_insulation",
"ventilation",
"floor_insulation",
"windows",
"fireplace",
"heating",
"hot_water",
"lighting",
"secondary_heating",
"boiler_upgrade",
"high_heat_retention_storage_heater",
],
"budget": None,
}
print(body2)
body3 = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "A",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"scenario_name": "ECO4 funding - EWI + Solar + ASHP",
"multi_plan": True,
"exclusions": [
"internal_wall_insulation",
"roof_insulation", "ventilation", "floor_insulation", "windows", "fireplace", "hot_water",
"lighting", "secondary_heating",
],
"budget": None,
}
print(body3)

View file

@ -0,0 +1,196 @@
import pandas as pd
from utils.s3 import save_csv_to_s3
def app():
# This is the payload to be used to extract the energy assessment data from s3 and upload it to the database,
# as well as produce links to each of the uploaded documents.
portfolio_id = 101
body = {
"portfolio_id": portfolio_id,
"surveyor": "JAFFERSONS ENERGY CONSULTANTS",
"project_code": "VEC001",
}
# These are the recommendations based on the on-site survey of the property.
non_intrusive_recommendations = [
{
# 2 Grove Mansions
"uprn": 121016121,
"recommendations": [
{
"type": "draught_proofing",
"cost": 123,
"survey": True,
"sap_points": 1
},
{
"type": "mixed_glazing", "cost": 12345, "survey": True,
"description": "Install double glazing to north facing windows and secondary glazing to the "
"remaining windows at the front of the building",
"sap_points": 3
},
{"type": "trickle_vents", "cost": 500, "survey": True},
{"type": "suspended_floor_insulation", "cost": None, "survey": True, "sap_points": 2},
{"type": "internal_wall_insulation", "cost": None, "survey": True, "sap_points": 5},
]
},
{
# 8 Grove Mansions
"uprn": 10024087855,
"recommendations": [
{"type": "draught_proofing", "cost": 123, "survey": True, "sap_points": 2},
{
"type": "mixed_glazing", "cost": 12345, "survey": True,
"description": "Install double glazing to north facing windows and secondary glazing to the "
"remaining windows at the front of the building",
"sap_points": 4
},
{"type": "trickle_vents", "cost": 500, "survey": True},
{"type": "low_energy_lighting", "cost": None, "survey": True, "sap_points": 0},
{"type": "internal_wall_insulation", "cost": None, "survey": True, 'sap_points': 5},
]
},
{
# 9 Grove Mansions
"uprn": 121016128,
"recommendations": [
{"type": "draught_proofing", "cost": 123, "survey": True, "sap_points": 1},
{
"type": "mixed_glazing", "cost": 12345, "survey": True,
"description": "Install double glazing to north facing windows and secondary glazing to the "
"remaining windows at the front of the building",
"sap_points": 3
},
{"type": "trickle_vents", "cost": 500, "survey": True},
{"type": "low_energy_lighting", "cost": None, "survey": True, "sap_points": 1},
{"type": "suspended_floor_insulation", "cost": None, "sap_points": 1},
{"type": "internal_wall_insulation", "cost": None, "survey": True, "sap_points": 6},
]
},
{
# 5 Grove Mansions
"uprn": 121016124,
"recommendations": [
{
"type": "mixed_glazing", "cost": 12345, "survey": True,
"description": "Install double glazing to north facing windows and secondary glazing to the "
"remaining windows at the front of the building",
"sap_points": 5
},
{"type": "trickle_vents", "cost": 500, "survey": True},
{"type": "low_energy_lighting", "cost": None, "survey": True, "sap_points": 2},
{"type": "internal_wall_insulation", "cost": None, "survey": True, "sap_points": 8},
]
},
{
# 14 Grove Mansions
"uprn": 121016117,
"recommendations": [
{"type": "draught_proofing", "cost": 123, "survey": True, "sap_points": 1},
{
"type": "mixed_glazing", "cost": 12345, "survey": True,
"description": "Install double glazing to north facing windows and secondary glazing to the "
"remaining windows at the front of the building",
"sap_points": 4
},
{"type": "trickle_vents", "cost": 500, "survey": True},
{"type": "low_energy_lighting", "cost": None, "survey": True, "sap_points": 1},
{"type": "internal_wall_insulation", "cost": None, "survey": True, "sap_points": 6},
]
},
{
# 19 Grove Mansions
"uprn": 10024087902,
"recommendations": [
{"type": "low_energy_lighting", "cost": None, "survey": True, "sap_points": 0},
{"type": "internal_wall_insulation", "cost": None, "survey": True, "sap_points": 2},
{"type": "room_roof_insulation", "cost": None, "survey": True, "sap_points": 16},
]
},
]
asset_list = [
{
"uprn": 121016121, "address": "", "postcode": ""
},
{
"uprn": 10024087855, "address": "", "postcode": ""
},
{
"uprn": 121016128, "address": "", "postcode": ""
},
{
"uprn": 121016124, "address": "", "postcode": ""
},
{
"uprn": 121016117, "address": "", "postcode": ""
},
{
"uprn": 10024087902, "address": "", "postcode": ""
},
]
asset_list = pd.DataFrame(asset_list)
filename = f"{8}/{portfolio_id}/asset_list.csv"
save_csv_to_s3(
dataframe=asset_list,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
# TODO Create asset list
# TODO: Store asset list & non_intrusive_recommendations
# Store non-invasive recommendations in S3
non_invasive_recommendations_filename = f"{8}/{portfolio_id}/non_invasive_recommendations.json"
save_csv_to_s3(
dataframe=pd.DataFrame(non_intrusive_recommendations),
bucket_name="retrofit-plan-inputs-dev",
file_name=non_invasive_recommendations_filename
)
# This is the first scenario which includes the first batch of recommendations
body1 = {
"portfolio_id": str(portfolio_id),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "A",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"inclusions": [
"draught_proofing", "mixed_glazing", "trickle_vents", "low_energy_lighting",
],
"budget": None,
"scenario_name": "Quick wins - do now while tenanted",
"multi_plan": True,
}
# This is the second scenario which includes the second batch of recommendations
body2 = {
"portfolio_id": str(portfolio_id),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "A",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"inclusions": [
"draught_proofing",
"mixed_glazing",
"trickle_vents",
"low_energy_lighting",
"suspended_floor_insulation",
"internal_wall_insulation"
],
"budget": None,
"scenario_name": "Do when void",
"multi_plan": True,
}
print(body1)
print(body2)

View file

@ -0,0 +1,90 @@
import inspect
import pandas as pd
from tqdm import tqdm
from pathlib import Path
src_file_path = inspect.getfile(lambda: None)
EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates"
def app():
# For EPCs lodged from 2020 onwards, this collects data on the energy efficiency categories for wall insulation
# so that when we simulate, we know what the resulting energy efficiency category will be
epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
date_cutoff = "2020-01-01"
walls_data = []
ashp_data = []
for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)):
data = pd.read_csv(directory / "certificates.csv", low_memory=False)
# Rename the columns to the same format as the api returns
data.columns = [c.replace("_", "-").lower() for c in data.columns]
insulated_walls = data[
data["walls-description"].isin(
[
"Cavity wall, filled cavity",
"Solid brick, with internal insulation",
"Solid brick, with external insulation",
]
)
]
insulated_walls = insulated_walls[~pd.isnull(insulated_walls["uprn"])]
insulated_walls = insulated_walls[
pd.to_datetime(insulated_walls["lodgement-date"]) >= date_cutoff
]
ashp = data[
data["mainheat-description"] == "Air source heat pump, radiators, electric"
]
ashp = ashp[~pd.isnull(ashp["uprn"])]
ashp = ashp[
pd.to_datetime(ashp["lodgement-date"]) >= date_cutoff
]
walls_data.append(insulated_walls)
ashp_data.append(ashp)
walls_df = pd.concat(walls_data)
ashp_df = pd.concat(ashp_data)
ashp_agg = (
ashp_df.
groupby(
["construction-age-band", "mainheat-description", "mainheatcont-description", "mainheat-energy-eff",
"mainheatc-energy-eff"]
)
.size()
.reset_index()
)
ashp_agg = ashp_agg[
ashp_agg["mainheatcont-description"].isin(
["Programmer, TRVs and bypass", "Time and temperature zone control"]
)
]
aggregations = {}
for description in [
"Cavity wall, filled cavity", "Solid brick, with internal insulation", "Solid brick, with external insulation"
]:
aggregation = walls_df[
walls_df["walls-description"] == description
].groupby(
["construction-age-band", "walls-energy-eff"]
).size().reset_index().rename(columns={0: "count"})
# For each grouping of age band, we use the most populus energy efficiency category
aggregation_deduped = aggregation.sort_values("count", ascending=False).drop_duplicates("construction-age-band")
aggregations[description] = aggregation_deduped
# Since these tables are small, we just convert them to python dictionaries
# This data is just held in the wall_energy_efficiency_values script, rather than s3
df1 = aggregations["Cavity wall, filled cavity"]
df2 = aggregations["Solid brick, with internal insulation"]
df3 = aggregations["Solid brick, with external insulation"]
df1.to_dict("records")
df2.to_dict("records")
df3.to_dict("records")

1126
etl/ownership/Ownership.py Normal file

File diff suppressed because it is too large Load diff

10
etl/ownership/README.md Normal file
View file

@ -0,0 +1,10 @@
# Ownership Application
This application contains methods that allows us to attempt to discover
corporate ownership of properties, where possible.
Practically, it's likely that the code within this application will be
exported into other areas of this repository, and used to assemble
pipelines that solve specific property ownership questions, and so this
codebase is set up with the goal of providing farily easy to use, plug
and play tools.

35
etl/ownership/config.py Normal file
View file

@ -0,0 +1,35 @@
# These are the registration numbers for companies we've heard a reponse from, and cannot sell
OWNERS_WHO_CANT_SELL = [
# Al Rayan - they're the senior lender, not able to sell
"4483430",
# Ultrabarn - they're unwilling to sell and will sort any retrofits themselves
"2794851",
# Mountview - Anna spoke with someone from Mounview - they acquire tenancies and sell them as soon as they become
# vacant. They have no immediate opportunities but we may come back and remove this
"328090",
]
EXCLUDED_UPRNS = [
# This property no longer exists
200003827624,
# This property doesn't seem to exist
90070698,
# Can't really find a solid record on Zoopla/Rightmove
10090437990,
# This property doesn't seem to exist
100070902790,
# This property doesn't seem to exist
100070902791,
# This property doesn't seem to exist
100031997775,
# Can't find reliable information to this property on zoopla/rightmove
200001372608,
# Can't find reliable information to this property on zoopla/rightmove
100031592801,
# Can't find reliable information to this property on zoopla/rightmove
100031579087,
# Can't find reliable information to this property on zoopla/rightmove
200000877273,
# Can't find reliable information to this property on zoopla/rightmove - seems like a post office!
100071391639
]

View file

@ -0,0 +1,181 @@
import datetime
from sqlalchemy.orm import sessionmaker
from backend.app.db.connection import db_engine
from backend.app.db.models.portfolio import Portfolio, PortfolioUsers
from etl.ownership.Ownership import Ownership
from etl.ownership.config import OWNERS_WHO_CANT_SELL as EXCLUDED_OWNERS, EXCLUDED_UPRNS
from utils.s3 import save_csv_to_s3
# Set up the project configuration
USER_IDS = [
2, # Khalim
3, # Chenai
5, # Anna
30, # Patricia
]
EPC_PATHS = [
"local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv",
"local_data/all-domestic-certificates/domestic-E08000031-Wolverhampton/certificates.csv",
"local_data/all-domestic-certificates/domestic-E08000026-Coventry/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000016-Leicester/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000015-Derby/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000021-Stoke-on-Trent/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000018-Nottingham/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000154-Northampton/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000061-North-Northamptonshire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000062-West-Northamptonshire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000152-East-Northamptonshire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000155-South-Northamptonshire/certificates.csv",
#
"local_data/all-domestic-certificates/domestic-E08000027-Dudley/certificates.csv",
"local_data/all-domestic-certificates/domestic-E08000029-Solihull/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000234-Bromsgrove/certificates.csv",
"local_data/all-domestic-certificates/domestic-E08000030-Walsall/certificates.csv",
"local_data/all-domestic-certificates/domestic-E08000028-Sandwell/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000019-Herefordshire-County-of/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000020-Telford-and-Wrekin/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000218-North-Warwickshire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000222-Warwick/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000237-Worcester/certificates.csv",
# East midlands
"local_data/all-domestic-certificates/domestic-E07000035-Derbyshire-Dales/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000038-North-East-Derbyshire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000039-South-Derbyshire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000012-North-East-Lincolnshire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000013-North-Lincolnshire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000138-Lincoln/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000134-North-West-Leicestershire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000017-Rutland/certificates.csv",
]
DOMESTIC_OWNERSHIP_PATH = "/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_07.csv"
OVERSEAS_OWNERSHIP_PATH = "/Users/khalimconn-kowlessar/Downloads/OCOD_FULL_2024_07.csv"
LAND_REGISTRY_PATH = "/Users/khalimconn-kowlessar/Downloads/pp-complete.csv"
PROJECT_NAME = "Midlands Portfolio"
DATA_BUCKET = "retrofit-data-dev"
# We use this as a rough figure, which helps us shape the portfolio
PROPERTY_VALUE_ESTIMATE = 200_000
# We want a 50m portfolio, but we create a bigger portfolio that needed, since properties will be filtered out
PORTFOLIO_VALUE = 75_000_000
def create_sfr_portfolio(project_name, user_ids, status, goal):
session = sessionmaker(bind=db_engine)()
try:
session.begin()
# Check for an existing portfolio by name
portfolio = session.query(Portfolio).filter_by(name=project_name).one_or_none()
if portfolio:
# Fetch the associated users
existing_user_ids = {
pu.user_id for pu in session.query(PortfolioUsers.user_id).filter_by(portfolioId=portfolio.id)
}
# Check if the specified user_ids match any existing associations
if existing_user_ids.intersection(set(user_ids)):
print("Portfolio already exists under this name, for specified users.")
else:
print("Portfolio already exists under this name, for different users.")
session.rollback() # No changes to be committed
return None # Optional: You could also update the user associations here if needed
return portfolio # Return the existing portfolio data
# If portfolio does not exist, create a new one with the provided status and goal
new_portfolio = Portfolio(name=project_name, status=status, goal=goal)
session.add(new_portfolio)
session.flush() # Ensures that 'id' is available before committing if needed
# Create new user associations in PortfolioUsers
for user_id in user_ids:
new_association = PortfolioUsers(user_id=user_id, portfolioId=new_portfolio.id) # corrected attribute name
session.add(new_association)
session.commit()
print(f"New portfolio created with ID: {new_portfolio.id}")
return new_portfolio
except Exception as e:
session.rollback() # Ensure no partial changes are committed
print(f"An error occurred: {e}")
raise
finally:
session.close()
def app():
epc_column_filters = {
"CURRENT_ENERGY_RATING": ["F", "G"]
}
ownership_instance = Ownership(
epc_paths=EPC_PATHS,
domestic_ownership_path=DOMESTIC_OWNERSHIP_PATH,
overseas_ownership_path=OVERSEAS_OWNERSHIP_PATH,
land_registry_path=LAND_REGISTRY_PATH,
project_name=PROJECT_NAME,
bucket=DATA_BUCKET,
average_property_value=PROPERTY_VALUE_ESTIMATE,
portfolio_value=PORTFOLIO_VALUE,
excluded_owners=EXCLUDED_OWNERS,
excluded_uprns=EXCLUDED_UPRNS
)
ownership_instance.pipeline(column_filters=epc_column_filters)
# Create the project, if a portfolio doesn't exist for the project name
# Create the asset list and the body of the portfolio
asset_list = ownership_instance.get_asset_list()
# Create the portfolio
# TODO: Wasn't working
# create_sfr_portfolio(project_name=PROJECT_NAME, user_ids=USER_IDS, status="scoping", goal="Increasing EPC")
portfolio_id = 99
user_id = 8
filename = f"{user_id}/{portfolio_id}/asset_list.csv"
save_csv_to_s3(
dataframe=asset_list,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
body = {
"portfolio_id": str(portfolio_id),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "C",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": "",
"scenario_name": "Hit EPC C",
"multi_plan": True,
"exclusions": ["fireplace", "floor_insulation"],
"budget": None,
}
print(body)
# # We read in the current valuation data and identify if there are any uprns that need to be added
# previous_valuations = pd.read_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/sfr/sfr property valuations.xlsx")
# missed = asset_list[~asset_list["uprn"].astype(str).isin(previous_valuations["uprn"].astype(str))]
# missed.to_csv("missed_valuations.csv")
# We now need a distinct step to prepare final outputs
portfolio_timestamp = "2024-08-20 19:51:33.884145"
# Create a date in the yyyy-mm-dd format to store the data against
storage_date = datetime.datetime.now().strftime("%Y-%m-%d")
ownership_instance.create_final_outputs(
portfolio_timestamp=portfolio_timestamp, storage_date=storage_date, exclusion_uprns=EXCLUDED_UPRNS
)

View file

@ -3,7 +3,8 @@ from tqdm import tqdm
import pandas as pd
import geopandas as gpd
from utils.logger import setup_logger
from utils.s3 import read_io_from_s3, save_dataframe_to_s3_parquet
from utils.s3 import read_io_from_s3, save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
from backend.Property import Property
logger = setup_logger()
@ -116,3 +117,81 @@ class OpenUprnClient:
file_key=file_key,
bucket_name=bucket_name
)
@staticmethod
def make_uprn_map(uprns, uprn_filenames):
"""
Given a list of UPRNs, this method will return a map of the UPRN to the filename that the UPRN is contained in
:param uprns: List of UPRNs
:param uprn_filenames: Lookup from UPRN range to filename
:return:
"""
uprn_map = {}
for uprn in uprns:
filtered_df = uprn_filenames[
(uprn_filenames["lower"] <= int(uprn))
& (uprn_filenames["upper"] >= int(uprn))
]
if filtered_df["filenames"].values[0] in uprn_map:
uprn_map[filtered_df["filenames"].values[0]].append(int(uprn))
else:
uprn_map[filtered_df["filenames"].values[0]] = [int(uprn)]
return uprn_map
@classmethod
def set_spatial_data(cls, input_properties: list[Property], bucket_name):
"""
Given a list of properties, this method will set the spatial data for each property
The method will look for the minimal set of uprn datasets that it needs to read in to get all of the spatial
data for the properties
"""
uprn_filenames = read_dataframe_from_s3_parquet(
bucket_name=bucket_name, file_key="spatial/filename_meta.parquet"
)
uprns = [p.uprn for p in input_properties]
uprn_map = cls.make_uprn_map(uprns, uprn_filenames)
for filename, associated_uprn in tqdm(uprn_map.items(), total=len(uprn_map)):
# Read in the file
spatial_data = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev", file_key=f"spatial/{filename}"
)
spatial_df = spatial_data[spatial_data["UPRN"].isin(associated_uprn)]
for p in input_properties:
if p.uprn in associated_uprn:
p.set_spatial(spatial_df[spatial_df["UPRN"] == p.uprn])
# Perform a final check to ensure that all properties have spatial data
for p in input_properties:
if p.spatial is None:
raise Exception(f"Property with UPRN {p.uprn} does not have spatial data")
return input_properties
@classmethod
def get_spatial_data(cls, uprns: list[int], bucket_name):
"""
Similar method to set_spatial_data, but designed to work more generally on a list of uprns
:return:
"""
uprn_filenames = read_dataframe_from_s3_parquet(
bucket_name=bucket_name, file_key="spatial/filename_meta.parquet"
)
uprn_map = cls.make_uprn_map(uprns, uprn_filenames)
uprn_spatial_table = []
for filename, associated_uprn in tqdm(uprn_map.items(), total=len(uprn_map)):
# Read in the file
spatial_data = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev", file_key=f"spatial/{filename}"
)
spatial_df = spatial_data[spatial_data["UPRN"].isin(associated_uprn)]
uprn_spatial_table.append(spatial_df)
return pd.concat(uprn_spatial_table)

View file

@ -0,0 +1,287 @@
# We use some sample properties from Newhaven to use as a testing dataset for implementing the model fixes
import inspect
import pandas as pd
from etl.epc.settings import EARLIEST_EPC_DATE
from pathlib import Path
from utils.s3 import save_csv_to_s3
src_file_path = inspect.getfile(lambda: None)
EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates"
USER_ID = 8
PORTFOLIO_ID = -1
def app():
"""
This application is tasked with pulling a large quantity of data from the find my epc website, containing the
estimated energy consumption for properties
:return:
"""
lewes_directory = EPC_DIRECTORY / "domestic-E07000063-Lewes/certificates.csv"
data = pd.read_csv(lewes_directory, low_memory=False)
# Rename the columns to the same format as the api returns
data.columns = [c.replace("_", "-").lower() for c in data.columns]
# Take just date before the date threshold
data = data[data["lodgement-date"] >= EARLIEST_EPC_DATE]
data = data[~pd.isnull(data["uprn"])]
data = data[data["current-energy-efficiency"].astype(float) < 52]
data = data.sample(10)
# Create an asset list
asset_list = data[["uprn", "address1", "postcode"]].copy().rename(columns={"address1": "address"})
asset_list["uprn"] = asset_list["uprn"].astype(str)
filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
save_csv_to_s3(
dataframe=asset_list,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "B",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": "",
"budget": None,
}
print(body)
# This is some temp code, which is for diagnosing the issues with the bills models
heating_training_data_filepath = "sap_change_model/2024-08-06-11-19-49/dataset_rooms.parquet"
# For the heating model:
heating_drop_columns = [
"sap_ending", "heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending",
"lighting_cost_ending", "hot_water_cost_ending",
# "days_to_ending", "days_to_starting", # TODO This is in the live version
'number_habitable_rooms_starting', 'number_habitable_rooms_ending', 'number_heated_rooms_starting',
'number_heated_rooms_ending',
'number_habitable_rooms', 'number_heated_rooms'
]
heating_response = "heating_cost_ending"
# for the hot water model (older dataset)
hot_water_training_data_filepath = "sap_change_model/2024-07-10-20-28-54/dataset_rooms.parquet"
hot_water_drop_columns = [
"sap_ending", "heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending",
"lighting_cost_ending", "heating_cost_ending",
"days_to_starting", "days_to_ending",
'number_habitable_rooms_starting', 'number_habitable_rooms_ending', 'number_heated_rooms_starting',
'number_heated_rooms_ending',
'number_habitable_rooms', 'number_heated_rooms'
]
# Diagnose heating
from utils.s3 import read_dataframe_from_s3_parquet
train = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev",
file_key=heating_training_data_filepath
)
# Drop the columns that aren't used
train = train.drop(columns=heating_drop_columns)
# if the value is postive, it means the ending cost is bigger than the starting (which means it got more expensive)
train["cost_diference"] = (train["heating_cost_ending"] - train["heating_cost_starting"])
change_direction = train["cost_diference"] > 0
change_direction.value_counts(normalize=True)
average_costs_by_time_starting = train.groupby(
["lodgement_year_starting", "lodgement_month_starting"]
)["heating_cost_starting"].mean().reset_index().sort_values(["lodgement_year_starting", "lodgement_month_starting"])
average_costs_by_time_ending = train.groupby(
["lodgement_year_ending", "lodgement_month_ending"]
)["heating_cost_ending"].mean().reset_index().sort_values(["lodgement_year_ending", "lodgement_month_ending"])
# Check by photo supply values - if the property is gas, solar panels won't have an affect on the heating or hot
# water so let's look for electric homes
# Across the entire dataset, there is no correlation
# Even for electric properties, there is no correlation
photo_supply_averages = train[
train["fuel_type_ending"] == "electricity"
].groupby(["photo_supply_ending"])["heating_cost_ending"].mean().reset_index()
photo_supply_to_size = train.groupby("photo_supply_ending")["total_floor_area_ending"].mean().reset_index()
photo_supply_to_size[["photo_supply_ending", "total_floor_area_ending"]].corr()
train[["total_floor_area_ending", "heating_cost_ending"]].corr()
# Bigger properties end up with smaller photo_supply values. This will be because the array size likely remains fairly
# consistent but takes up a smaller proportion of the roof. Typically, the bigger the floor area, the higher the heating
# costs, but bigger units also have smaller photo_supply
adding_solar = train[
(train["photo_supply_ending"] > 0) & (train["photo_supply_starting"] == 0)
]
is_positive = (adding_solar["cost_diference"] > 0)
is_positive.value_counts(normalize=True)
photo_supply_by_time = (
train[
train["fuel_type_ending"] == "electricity"
].groupby(
["lodgement_year_ending", "photo_supply_ending"]
)["heating_cost_ending"].mean().reset_index().sort_values(
["lodgement_year_ending", "photo_supply_ending"], ascending=True)
)
# Plot
photo_supply_by_time[["photo_supply_ending", "heating_cost_ending"]].corr()
photo_supply_by_time.plot()
# Observations
# 1) We retain all of the potential columns, however they are just based on the starting EPC
# 2) 21% of the the time, the ending heating cost is more than the starting but this is clearly a minority
# 3) Let's get ride of estimated perimeter starting and ending
# Things I should check
# 1) Do we updated the lodgment_year_ending and lodgement_month_ending
# 2) Should we adjust costs to now, as well as lodgement_dates to today? Since 2023, costs have increased a lot so
# any savings should be benchmarked against what a customer is paying now
# 3) It might make sense to create a feature between floor area and photo supply, to give a more consistent estimate
# of a panel size for the property
# Get an example and score with the models
example = train[
(train["photo_supply_starting"] == 0) &
(train["photo_supply_ending"] > 0) &
(train["heating_cost_starting"] > train["heating_cost_ending"])
].sample(1)
# example["lodgement_month_starting"]
# example["lodgement_year_starting"]
# example["lodgement_month_ending"]
# example["lodgement_year_ending"].values[0]
#
# example["lodgement_year_ending"] = 2023
# example["days_to_ending"] = 3500
# example["days_to_starting"]
# {'heating_cost_predictions': predictions
# 0 378.5}
resp = model_api.predict_all(
df=example,
bucket="retrofit-data-dev",
prediction_buckets=get_prediction_buckets(),
model_prefixes=["heating_cost_predictions"],
extract_ids=False
)
# Step 1: get a cost for today
p.create_base_difference_epc_record(cleaned)
cwi_impact = p.base_difference_record.df.copy()
for k in property_recommendations[0][0]["simulation_config"]:
cwi_impact[k] = property_recommendations[0][0]["simulation_config"][k]
# 2212.4 - Baseline
today = model_api.predict_all(
df=p.base_difference_record.df.copy(),
bucket="retrofit-data-dev",
prediction_buckets=get_prediction_buckets(),
model_prefixes=["heating_cost_predictions"],
extract_ids=False
)
# impact of CWI - 1908
cwi_response = model_api.predict_all(
df=cwi_impact,
bucket="retrofit-data-dev",
prediction_buckets=get_prediction_buckets(),
model_prefixes=["heating_cost_predictions"],
extract_ids=False
)
pv_impact = cwi_impact.copy()
pv_impact["photo_supply_ending"] = 50
pv_impact["heating_cost_starting"] = 2212.4
pv_response = model_api.predict_all(
df=pv_impact,
bucket="retrofit-data-dev",
prediction_buckets=get_prediction_buckets(),
model_prefixes=["heating_cost_predictions"],
extract_ids=False
)
# Testing kwh for vde
base_prediction = model_api.predict_all(
df=epcs_for_scoring,
bucket=get_settings().DATA_BUCKET,
prediction_buckets=get_prediction_buckets(),
model_prefixes=["heating_kwh_predictions"],
extract_ids=False
)
cwi_epc = pd.DataFrame([property_scoring_epcs[1].copy()])
cwi_epc = add_features_from_code(cwi_epc)
cwi_epc = add_estimate_annual_kwh(cwi_epc)
# cwi_epc["walls-description"] = "Cavity wall, filled cavity"
# cwi_epc["walls-energy-eff"] = "Good"
# cwi_epc["heating-cost-current"] = 1650
# cwi_epc["current-energy-efficiency"] = 72
# cwi_epc["current-energy-rating"] = "C"
# cwi_epc["co2-emissions-current"] = 3.7
# cwi_epc["energy-consumption-current"] = 121
# cwi_epc["co2-emiss-curr-per-floor-area"] = 19
# cwi_epc["photo-supply"] = 0
# cwi_epc["energy-consumption-current"] =
# cwi_epc["roof-description"] = "Pitched, 300 mm loft insulation"
# cwi_epc["roof-energy-eff"] = "Very Good"
# cwi_epc["heating-cost-current"] = 1264
# "heating-cost-current": rec_impact["epc_heating_cost"],
# "hot-water-cost-current": rec_impact["epc_hot_water_cost"],
# # CO₂ emissions per square metre floor area per year in kg/m². Since CO₂ emissions are in tonnes
# # per year, we multiply by 1000 to get kg/m²
# "co2-emiss-curr-per-floor-area": round(
# 1000 * (rec_impact["carbon"] / self.data["total-floor-area"])
# ),
# "co2-emissions-current": rec_impact["carbon"],
# "current-energy-rating": sap_to_epc(rec_impact["sap"]),
# "current-energy-efficiency": int(np.floor(rec_impact["sap"])),
# "energy-consumption-current": rec_impact["heat_demand"],
# "lighting-cost-current": rec_impact["epc_lighting_cost"],
# "id": "+".join([str(self.id), rec_id])
cwi_prediction = model_api.predict_all(
df=cwi_epc,
bucket=get_settings().DATA_BUCKET,
prediction_buckets=get_prediction_buckets(),
model_prefixes=["heating_kwh_predictions", "hotwater_kwh_predictions"],
extract_ids=False
)
# 77 perryn
starting_heating = 19837.2
starting_hot_water = 2974.1
ending_heating = 17041.1
ending_hot_water = 2735.3
# 44 lindlings
starting_heating = 13327.1
starting_hot_water = 2349.5
ending_heating = 9672.3
ending_hot_water = 2030.2
ending_heating = 8695.1
ending_hot_water = 2437.0
heating_impact = starting_heating - ending_heating
hot_water_impact = starting_hot_water - ending_hot_water
total_impact = heating_impact + hot_water_impact

38
etl/webscrape/Zoopla.py Normal file
View file

@ -0,0 +1,38 @@
# Initial Code
from seleniumbase import SB
import time
uprns = [
100071297618,
100080893397,
100060778033,
200004793081,
100071265143,
100071297618,
100080893397,
100060778033,
200004793081,
100071265143,
]
estimate_list = []
for uprn in uprns:
# Probably can change the timings here
time.sleep(5)
with SB(uc=True) as sb:
sb.uc_open_with_reconnect(
f"https://www.zoopla.co.uk/property/uprn/{uprn}/",
3,
)
soup = sb.get_beautiful_soup()
estimates = soup.find_all("div", {"data-testid": "sale-estimate"})
# Can change the way we extract the text here
estimate_text = (
estimates[-1].find_all("p")[-1].find_all("span")[-1]["aria-label"]
)
estimate_list.append(estimate_text)

View file

@ -0,0 +1,796 @@
import re
import numpy as np
import usaddress
from datetime import datetime
from xml.dom.minidom import parseString
from backend.app.utils import sap_to_epc
from etl.xml_survey_extraction.pcdb import heating_data
PROPERTY_TYPE_LOOKUP = {
"0": "House",
"House": "House",
"2": "Flat"
}
def get_house_number(address: str) -> str | None:
"""
This method will use the usaddress library to parse an address and extract the house number
:return:
"""
parsed = usaddress.parse(address)
parsed_house_number = [x for x in parsed if (x[1] == "AddressNumber")]
parsed_house_number = parsed_house_number[0][0] if parsed_house_number else None
if parsed_house_number is None:
# Because usaddress isn't optimal for parsing addresses with some prefixes such as 'Flat',
# we also add a custom approach
# Pattern to look for 'Flat' or 'Apartment' followed by a number, or just a number at the beginning
pattern = r'(?i)(?:flat|apartment)\s*(\d+)|^\s*(\d+)'
match = re.search(pattern, address)
if match:
# Return the first non-None group found
return next(g for g in match.groups() if g is not None)
else:
return None
# Remove training commas
parsed_house_number = parsed_house_number.replace(",", "")
return parsed_house_number
class XmlParser:
epc = {}
additional_data = {}
uprn = None
# heating/emissions information
space_heating_kwh = None
water_heating_kwh = None
heating_system = None
heating_controls = None
# Assessor details
surveyor_name = None
number_of_doors = None
number_of_insulated_doors = None
windows = None
# Property dimensions
number_of_floors = None
perimeter = None
heat_loss_perimeter = None
party_wall_length = None
total_floor_area = None
floor_height = None
insulation_wall_area = None
floor_dimensions = None
# The age band lookup is based on the country code
AGE_BAND_LOOKUP = {
# England & Wales
"EAW": {
"A": "England and Wales: before 1900",
"B": "England and Wales: 1900-1929",
"C": "England and Wales: 1930-1949",
"D": "England and Wales: 1950-1966",
"E": "England and Wales: 1967-1975",
"F": "England and Wales: 1976-1982",
"G": "England and Wales: 1983-1990",
"H": "England and Wales: 1991-1995",
"I": "England and Wales: 1996-2002",
"J": "England and Wales: 2003-2006",
"K": "England and Wales: 2007-2011",
"L": "England and Wales: 2012 onwards",
}
}
RATINGS_MAP = {
"0": "N/A",
"1": "Very Poor",
"2": "Poor",
"3": "Average",
"4": "Good",
"5": "Very Good"
}
MECHANICAL_VENTILATION_MAP = {
"0": "natural"
}
BUILT_FORM_MAP = {
"1": "Detached",
"3": "End-Terrace",
"4": "Mid-Terrace",
}
GLAZED_AREA_MAP = {
"4": "Much More Than Typical"
}
FUEL_TYPE_MAP = {
"26": "mains gas (not community)"
}
TRANSACTION_TYPE_MAP = {
"13": "ECO assessment"
}
TENURE_MAP = {
"1": "Owner-occupied",
"2": "Rented (social)",
"3": "Rented (private)",
}
TARIFF_MAP = {
"1": "Dual",
"2": "Single"
}
def __init__(self, file, filekey, surveyor_company, uprn=None):
file.seek(0) # Ensure the file pointer is at the beginning
xml_string = file.read().decode('utf-8')
self.xml = parseString(xml_string)
self.filekey = filekey
self.surveyor_company = surveyor_company
# We check if we have a lig xml or rdsap xml
# We look for the presence of the Schema-Version-Original tag
self.is_lig = len(self.xml.getElementsByTagName("Schema-Version-Original")) > 0
self.get_uprn(uprn)
@staticmethod
def get_node(node):
"""
Utility function to get the node value from the xml, where data might be optional
:return:
"""
node_first_child = node.firstChild
if node_first_child is None:
return None
return node_first_child.nodeValue
def run(self):
if not self.is_lig:
return
self.get_assessor_details()
self.get_heating_and_emissions_data()
# self.get_detailed_heating_specs()
# Building fabric
self.get_doors()
self.get_floor_dimensions()
self.get_windows()
# Get all of the EPC data
self.extract_epc()
# Put together all of the additional data we capture
self.extract_additional_data()
def _parse_heat_loss_corridor(self):
hlc_lookup = {"2": "unheated corridor", "Unheated": "unheated corridor"}
if self.is_lig:
heat_loss_corridor = self.get_node_value('Heat-Loss-Corridor')
else:
# For some reason, this tag is spelt incorrectly in the rdsap xml
heat_loss_corridor = self.get_node_value('FlatCoridor')
return hlc_lookup[heat_loss_corridor]
def _parse_heat_loss_corridor_length(self):
if self.is_lig:
return self.get_node_value('Unheated-Corridor-Length')
return self.get_node_value('FlatShelteredWallLength')
def _parse_flat_storey_count(self):
# in the EPR the tag is Storeys
if self.is_lig:
storeys = None
else:
storeys = self.get_node_value('Storeys')
return storeys
def _parse_flat_top_storey(self):
if self.is_lig:
return self.get_node_value('Top-Storey')
return None
def _parse_floor_level(self):
if self.is_lig:
flat_details = self.xml.getElementsByTagName('SAP-Flat-Details')[0]
return flat_details.getElementsByTagName("Level")[0].firstChild.nodeValue
return None
def extract_epc(self):
if self.floor_dimensions is None:
raise ValueError("Run get_floor_dimensions() first")
if self.windows is None:
raise ValueError("Run get_windows() first")
property_type = self.get_property_type()
if property_type == "Flat":
heat_loss_corridor = self._parse_heat_loss_corridor()
unheated_corridor_length = self._parse_heat_loss_corridor_length()
flat_storey_count = self._parse_flat_storey_count()
flat_top_storey = self._parse_flat_top_storey()
floor_level = self._parse_floor_level()
else:
heat_loss_corridor = "NO DATA!"
unheated_corridor_length = ""
flat_storey_count = ""
flat_top_storey = ""
floor_level = "NO DATA!"
floor_height = np.mean([
float(x['room_height']) for x in self.floor_dimensions if
x['building_part_identifier'] == 'Main Dwelling' and not x['room_roof']
])
# Take the most prevelant glazing type
glazed_type = [w["glazing_type"] for w in self.windows if w['window_location'] == '0']
glazed_type = max(glazed_type, key=glazed_type.count)
energy_tariff = (
self.xml.getElementsByTagName("SAP-Energy-Source")[0]
.getElementsByTagName("Meter-Type")[0]
.firstChild.nodeValue
)
energy_tariff = self.TARIFF_MAP[energy_tariff]
self.epc = {
"uprn": self.uprn,
"uprn-source": "Address Matched",
"property-type": property_type,
"building-reference-number": "",
**self.get_sap(),
**self.get_property_address(),
"low-energy-fixed-light-count": self.get_node_value('Low-Energy-Fixed-Lighting-Outlets-Count'),
"construction-age-band": self.AGE_BAND_LOOKUP[
self.get_node_value('Country-Code')
][self.get_node_value('Construction-Age-Band')],
"mainheat-energy-eff": self.RATINGS_MAP[
self.get_property_summary_value('Main-Heating', 'Energy-Efficiency-Rating')
],
"windows-env-eff": self.RATINGS_MAP[
self.get_property_summary_value('Window', 'Environmental-Efficiency-Rating')
],
"lighting-energy-eff": self.RATINGS_MAP[
self.get_property_summary_value('Lighting', 'Energy-Efficiency-Rating')
],
"environment-impact-potential": self.get_energy_assessment_value('Environmental-Impact-Potential'),
"mainheatcont-description":
self.get_property_summary_value('Main-Heating-Controls', 'Description'),
"sheating-energy-eff": self.RATINGS_MAP[
self.get_property_summary_value('Secondary-Heating', 'Energy-Efficiency-Rating')
],
"local-authority": "", # Not included in the xml
"local-authority-label": "",
"fixed-lighting-outlets-count": self.get_node_value('Fixed-Lighting-Outlets-Count'),
"energy-tariff": energy_tariff,
"mechanical-ventilation": self.MECHANICAL_VENTILATION_MAP[self.get_node_value('Mechanical-Ventilation')],
"solar-water-heating-flag": self.get_node_value('Solar-Water-Heating'),
"co2-emissions-potential": self.get_energy_assessment_value('CO2-Emissions-Potential'),
"number-heated-rooms": self.get_node_value('Heated-Room-Count'),
"floor-description": self.get_property_summary_value('Floor', 'Description'),
"energy-consumption-potential": self.get_energy_assessment_value('Energy-Consumption-Potential'),
"built-form": self.BUILT_FORM_MAP[self.get_node_value('Built-Form')],
"number-open-fireplaces": self.get_node_value('Open-Fireplaces-Count'),
"windows-description": self.get_property_summary_value('Window', 'Description'),
"glazed-area": self.GLAZED_AREA_MAP[self.get_node_value('Glazed-Area')],
"inspection-date": self.get_node_value('Inspection-Date'),
"mains-gas-flag": self.get_node_value('Mains-Gas'),
"co2-emiss-curr-per-floor-area": self.get_energy_assessment_value('CO2-Emissions-Current-Per-Floor-Area'),
"heat-loss-corridor": heat_loss_corridor,
"unheated-corridor-length": unheated_corridor_length,
"flat-storey-count": flat_storey_count,
"roof-energy-eff": self.RATINGS_MAP[
self.get_property_summary_value('Roof', 'Energy-Efficiency-Rating')
],
"total-floor-area": self.get_node_value('Total-Floor-Area'),
"environment-impact-current": self.get_energy_assessment_value('Environmental-Impact-Current'),
"roof-description": self.get_property_summary_value('Roof', 'Description'),
"floor-energy-eff": self.RATINGS_MAP[
self.get_property_summary_value('Floor', 'Energy-Efficiency-Rating')
],
"number-habitable-rooms": self.get_node_value('Habitable-Room-Count'),
"hot-water-env-eff": self.RATINGS_MAP[
self.get_property_summary_value('Hot-Water', 'Environmental-Efficiency-Rating')
],
"mainheatc-energy-eff": self.RATINGS_MAP[
self.get_property_summary_value('Main-Heating-Controls', 'Energy-Efficiency-Rating')
],
"main-fuel": self.FUEL_TYPE_MAP[self.get_node_value('Main-Fuel-Type')],
"lighting-env-eff": self.RATINGS_MAP[
self.get_property_summary_value('Lighting', 'Environmental-Efficiency-Rating')
],
"windows-energy-eff": self.RATINGS_MAP[
self.get_property_summary_value('Window', 'Energy-Efficiency-Rating')
],
"floor-env-eff": self.RATINGS_MAP[
self.get_property_summary_value('Floor', 'Environmental-Efficiency-Rating')
],
"sheating-env-eff": self.RATINGS_MAP[
self.get_property_summary_value('Secondary-Heating', 'Environmental-Efficiency-Rating')
],
"lighting-description": self.get_property_summary_value('Lighting', 'Description'),
"roof-env-eff": self.RATINGS_MAP[
self.get_property_summary_value('Roof', 'Environmental-Efficiency-Rating')
],
"walls-energy-eff": self.RATINGS_MAP[
self.get_property_summary_value('Wall', 'Energy-Efficiency-Rating')
],
"photo-supply": self.get_photo_supply(),
"lighting-cost-potential": self.get_energy_assessment_value('Lighting-Cost-Potential'),
"mainheat-env-eff": self.RATINGS_MAP[
self.get_property_summary_value('Main-Heating', 'Environmental-Efficiency-Rating')
],
"multi-glaze-proportion": self.get_node_value('Multiple-Glazed-Proportion'),
"main-heating-controls": self.get_property_summary_value('Main-Heating-Controls', 'Description'),
"flat-top-storey": flat_top_storey,
"secondheat-description": self.get_property_summary_value('Secondary-Heating', 'Description'),
"walls-env-eff": self.RATINGS_MAP[
self.get_property_summary_value('Wall', 'Environmental-Efficiency-Rating')
],
"transaction-type": self.TRANSACTION_TYPE_MAP[self.get_node_value('Transaction-Type')],
"extension-count": self.get_node_value('Extensions-Count'),
"mainheatc-env-eff": self.RATINGS_MAP[
self.get_property_summary_value('Main-Heating-Controls', 'Environmental-Efficiency-Rating')
],
"lmk-key": "", # Doesn't exist for non-EPC xmls
"wind-turbine-count": self.get_node_value('Wind-Turbines-Count'),
"tenure": self.TENURE_MAP[self.get_node_value('Tenure')],
"floor-level": floor_level,
"potential-energy-efficiency": self.get_energy_assessment_value('Energy-Rating-Potential'),
"potential-energy-rating": sap_to_epc(float(self.get_energy_assessment_value('Energy-Rating-Potential'))),
"hot-water-energy-eff": self.RATINGS_MAP[
self.get_property_summary_value('Hot-Water', 'Energy-Efficiency-Rating')
],
"low-energy-lighting": self.get_node_value('Low-Energy-Lighting'),
"walls-description": self.get_property_summary_value('Wall', 'Description'),
"hotwater-description": self.get_property_summary_value('Hot-Water', 'Description'),
"co2-emissions-current": self.get_node_value('CO2-Emissions-Current'),
"heating-cost-current": self.get_node_value('Heating-Cost-Current'),
"heating-cost-potential": self.get_energy_assessment_value('Heating-Cost-Potential'),
"hot-water-cost-current": self.get_node_value('Hot-Water-Cost-Current'),
"hot-water-cost-potential": self.get_energy_assessment_value('Hot-Water-Cost-Potential'),
"lighting-cost-current": self.get_node_value('Lighting-Cost-Current'),
"energy-consumption-current": self.get_node_value('Energy-Consumption-Current'),
"lodgement-date": self.get_node_value('Inspection-Date'),
"lodgement-datetime":
datetime.strptime(self.get_node_value('Inspection-Date'), "%Y-%m-%d").isoformat(),
"mainheat-description": self.get_property_summary_value('Main-Heating', 'Description'),
"floor-height": floor_height,
"glazed-type": glazed_type,
}
def get_insulation_wall_area(self):
"""
Extracts the insulation wall area for the main dwelling
Note that this doesn't include any extensions. We don't have recommendations for extensions right now, so we
don't currently calculate the insulation wall area for them, since it's not used in the recommendations.
"""
main_dwelling_floors = [
f for f in self.floor_dimensions if f["building_part_identifier"] == "Main Dwelling" and not f["room_roof"]
]
main_dwelling_windows = [
w for w in self.windows if w["window_location"] == "0"
]
wall_areas = sum([float(f["heat_loss_perimeter"]) * float(f["room_height"]) for f in main_dwelling_floors])
window_areas = sum([float(w["window_area"]) for w in main_dwelling_windows])
return wall_areas - window_areas
def extract_additional_data(self):
self.insulation_wall_area = self.get_insulation_wall_area()
# We pull this out which is used as the insulation floor area
main_dwelling_ground_floor_area = [
f for f in self.floor_dimensions if f["building_part_identifier"] == "Main Dwelling" and f["floor"] == "0"
][0]["total_floor_area"]
main_dwelling_windows = [w for w in self.windows if w["window_location"] == "0"]
number_of_windows = len(main_dwelling_windows)
windows_area = sum([float(w["window_area"]) for w in main_dwelling_windows])
boolean_lookup = {
"true": True,
"false": False,
"Y": True,
"N": False
}
cylinder_insulation_type = {
None: "",
"1": "Foam",
}
cylinder_insulation_thickness = int(
self.get_node_value('Cylinder-Insulation-Thickness')
) if self.get_node_value('Cylinder-Insulation-Thickness') else None
cylinder_thermostat = boolean_lookup[self.get_node_value('Cylinder-Thermostat')] \
if self.get_node_value('Cylinder-Thermostat') else None
self.additional_data = {
"file_location": self.filekey,
"surveyor_name": self.surveyor_name,
"surveyor_company": self.surveyor_company,
"space_heating_kwh": self.space_heating_kwh,
"water_heating_kwh": self.water_heating_kwh,
# "heating_system": self.heating_system,
# "heating_controls": self.heating_controls,
"number_of_doors": self.number_of_doors,
"number_of_insulated_doors": self.number_of_insulated_doors,
"number_of_floors": self.number_of_floors,
"insulation_wall_area": self.insulation_wall_area,
"heat_loss_perimeter": self.heat_loss_perimeter,
"party_wall_length": self.party_wall_length,
"perimeter": self.perimeter,
"rooms_with_bath_and_or_shower": int(self.get_node_value('Rooms-With-Bath-And-Or-Shower')),
"rooms_with_mixer_shower_no_bath": int(self.get_node_value('Rooms-With-Mixer-Shower-No-Bath')),
"room_with_bath_and_mixer_shower": int(self.get_node_value('Rooms-With-Bath-And-Mixer-Shower')),
"percent_draftproofed": int(self.get_node_value('Percent-Draughtproofed')),
"has_hot_water_cylinder": boolean_lookup[self.get_node_value('Has-Hot-Water-Cylinder')],
"cylinder_insulation_type": cylinder_insulation_type[self.get_node_value('Cylinder-Insulation-Type')],
"cylinder_insulation_thickness": cylinder_insulation_thickness,
"cylinder_thermostat": cylinder_thermostat,
"main_dwelling_ground_floor_area": float(main_dwelling_ground_floor_area),
"number_of_windows": int(number_of_windows),
"windows_area": float(windows_area),
}
def get_node_value(self, tag_name):
nodes = self.xml.getElementsByTagName(tag_name)
if nodes and nodes[0].firstChild:
return nodes[0].firstChild.nodeValue
return None
def get_node_value_from_floor_dimensions(self, tag_name):
nodes = self.xml.getElementsByTagName('SAP-Floor-Dimension')
if nodes:
tag = nodes[0].getElementsByTagName(tag_name)
if tag and tag[0].firstChild:
return tag[0].firstChild.nodeValue
return None
def get_property_summary_value(self, section, tag_name):
nodes = self.xml.getElementsByTagName('Property-Summary')[0].getElementsByTagName(section)
if nodes:
tag = nodes[0].getElementsByTagName(tag_name)
if tag and tag[0].firstChild:
return tag[0].firstChild.nodeValue
return None
def get_energy_assessment_value(self, tag_name):
nodes = self.xml.getElementsByTagName('Energy-Assessment')[0]
if nodes:
tag = nodes.getElementsByTagName(tag_name)
if tag and tag[0].firstChild:
return tag[0].firstChild.nodeValue
return None
def get_uprn(self, uprn):
if uprn is not None:
self.uprn = uprn
return
uprn_tag = self.xml.getElementsByTagName('UPRN')[0].firstChild
if uprn_tag is None:
self.uprn = -1
return
self.uprn = uprn_tag.nodeValue
# If all of the characters in the UPRN are 0, then there is not set UPRN
if self.uprn.count("0") == len(self.uprn):
self.uprn = 0
else:
self.uprn = self.uprn.lower().split("uprn-")[1]
def get_property_type(self):
if not self.xml:
raise ValueError("You need to read the file first")
property_type = self.xml.getElementsByTagName('Property-Type')
if not property_type:
property_type = self.xml.getElementsByTagName('PropertyType1')
if len(property_type) > 1:
property_types = {PROPERTY_TYPE_LOOKUP[p.firstChild.nodeValue] for p in property_type}
if len(property_types) > 1:
raise ValueError("Multiple property types found")
return property_types.pop()
return PROPERTY_TYPE_LOOKUP[property_type[0].firstChild.nodeValue]
def get_sap(self):
sap_score = self.xml.getElementsByTagName('Energy-Rating-Current')
sap_score = int(sap_score[0].firstChild.nodeValue)
epc_rating = sap_to_epc(sap_score)
return {
"current-energy-efficiency": str(sap_score),
"current-energy-rating": epc_rating
}
def get_heating_and_emissions_data(self):
"""
This method will extract the following pieces of information:
1) Space heating requirement
2) Water heating requirement
3) CO2 emissions
4) Heat demand per square meter per year
5) Bills
:return:
"""
self.space_heating_kwh = self.xml.getElementsByTagName(
'Space-Heating-Existing-Dwelling'
)[0].firstChild.nodeValue
self.water_heating_kwh = self.xml.getElementsByTagName('Water-Heating')[0].firstChild.nodeValue
def get_detailed_heating_specs(self):
"""
Given the heating data that is found in the <SAP-Heating> tag, we extract the detailed about the heating
system
:return:
"""
sap_main_heating_details = (
self.xml.getElementsByTagName('SAP-Heating')[0]
.getElementsByTagName("Main-Heating-Details")[0]
.getElementsByTagName("Main-Heating")[0]
)
heating_code = sap_main_heating_details.getElementsByTagName("Main-Heating-Number")[0].firstChild.nodeValue
# Get the heating system
heating_system = heating_data[heating_data["code"] == int(heating_code)]["description"]
heating_system = heating_system.values[0] if not heating_system.empty else f"Heating code: {heating_code}"
# Get the heating controls
heating_controls_code = (
sap_main_heating_details.getElementsByTagName("Main-Heating-Control")[0].firstChild.nodeValue
)
heating_controls = heating_data[heating_data["code"] == int(heating_controls_code)]["description"]
heating_controls = (
heating_controls.values[0] if not heating_controls.empty else f"Heating Controls code: {heating_code}"
)
self.heating_system = heating_system
self.heating_controls = heating_controls
def get_doors(self):
# Doors can be found in the SAP-Property-Details tag
self.number_of_doors = int(
self.xml.getElementsByTagName('SAP-Property-Details')[0]
.getElementsByTagName('Door-Count')[0]
.firstChild.nodeValue
)
self.number_of_insulated_doors = int(
self.xml.getElementsByTagName('SAP-Property-Details')[0]
.getElementsByTagName('Insulated-Door-Count')[0]
.firstChild.nodeValue
)
def get_photo_supply(self):
photo_supply_tag = self.xml.getElementsByTagName("Photovoltaic-Supply")[0]
# Check if the "None-Or-No-Details" tag is present
if photo_supply_tag.getElementsByTagName("None-Or-No-Details"):
return (
photo_supply_tag.
getElementsByTagName("None-Or-No-Details")[0].
getElementsByTagName("Percent-Roof-Area")[0].
firstChild.nodeValue
)
else:
raise NotImplementedError("Implement me")
def get_assessor_details(self):
energy_assessor_tag = self.xml.getElementsByTagName('Energy-Assessor')[0]
self.surveyor_name = (
energy_assessor_tag.getElementsByTagName("Name")[0].firstChild.nodeValue
)
def get_property_address(self):
property_tag = self.xml.getElementsByTagName("Property")[0]
address1 = self.get_node(property_tag.getElementsByTagName("Address-Line-1")[0])
address2 = self.get_node(property_tag.getElementsByTagName("Address-Line-2")[0])
address3 = self.get_node(property_tag.getElementsByTagName("Address-Line-3")[0])
posttown = self.get_node(property_tag.getElementsByTagName("Post-Town")[0])
postcode = self.get_node(property_tag.getElementsByTagName("Postcode")[0])
address = ", ".join(
[x for x in [address1, address2, address3] if x is not None]
)
county = property_tag.getElementsByTagName("County")
if county:
county = county[0].firstChild.nodeValue
else:
county = ""
# Seems to be unavailable in the xml
constituency = None
constituency_label = None
return {
"address1": address1,
"address2": address2,
"address3": address3,
"posttown": posttown,
"postcode": postcode,
"address": address,
"county": county,
"constituency": constituency,
"constituency-label": constituency_label
}
def get_floor_dimensions(self):
"""
Extracts physical measurements of the property such as the floor area, room height, etc.
across the main dwelling and any extensions.
:return:
"""
def get_part_value(node, tag_name):
element = node.getElementsByTagName(tag_name)
if element and element[0].firstChild:
return element[0].firstChild.nodeValue
return None
# Each part will correspond to the main
sap_building_parts = self.xml.getElementsByTagName("SAP-Building-Part")
floor_dimensions = []
for building_part in sap_building_parts:
building_part_identifier = building_part.getElementsByTagName("Identifier")[0].firstChild.nodeValue
sap_floor_dimensions = building_part.getElementsByTagName("SAP-Floor-Dimension")
data = [
{
'building_part_identifier': building_part_identifier,
'floor': get_part_value(floor_dimension, 'Floor'),
'floor_construction': get_part_value(floor_dimension, 'Floor-Construction'),
'floor_insulation': get_part_value(floor_dimension, 'Floor-Insulation'),
'heat_loss_perimeter': get_part_value(floor_dimension, 'Heat-Loss-Perimeter'),
'party_wall_length': get_part_value(floor_dimension, 'Party-Wall-Length'),
'total_floor_area': get_part_value(floor_dimension, 'Total-Floor-Area'),
'room_height': get_part_value(floor_dimension, 'Room-Height'),
"room_roof": False
} for floor_dimension in sap_floor_dimensions
]
room_roofs = building_part.getElementsByTagName("SAP-Room-In-Roof")
room_roof_data = [
{
"building_part_identifier": building_part_identifier,
"floor": str(max([int(d["floor"]) for d in data]) + 1),
"floor_construction": "",
"floor_insulation": rr.getElementsByTagName("Insulation")[0].firstChild.nodeValue,
"heat_loss_perimeter": "",
"party_wall_length": "",
"total_floor_area": rr.getElementsByTagName("Floor-Area")[0].firstChild.nodeValue,
"room_height": "",
"room_roof": True
} for rr in room_roofs
]
floor_dimensions.extend(data)
floor_dimensions.extend(room_roof_data)
self.floor_dimensions = floor_dimensions
self.number_of_floors = len(
[f for f in self.floor_dimensions if f["building_part_identifier"] == "Main Dwelling"]
)
# We extract the maximum heat loss perimeter, per building part
max_heat_loss_perimeters = {d['building_part_identifier']: max(
(float(x['heat_loss_perimeter']) for x in self.floor_dimensions if
x['building_part_identifier'] == d['building_part_identifier'] and x['heat_loss_perimeter']),
default=float('-inf')
) for d in self.floor_dimensions}
self.heat_loss_perimeter = sum(max_heat_loss_perimeters.values())
max_party_walls = {
d['building_part_identifier']: max(
(float(x['party_wall_length']) for x in self.floor_dimensions if
x['building_part_identifier'] == d['building_part_identifier'] and x['party_wall_length']),
default=float('-inf')
) for d in self.floor_dimensions
}
self.party_wall_length = sum(max_party_walls.values())
self.perimeter = self.heat_loss_perimeter + self.party_wall_length
@staticmethod
def _parse_windows_content(window, glazing_type_lookup, orientation_lookup):
# There may not be a pvc frame
pvc_frame = window.getElementsByTagName("PVC-Frame")
pvc_frame = pvc_frame[0].firstChild.nodeValue if pvc_frame else None
# There may not be a glazing gap for single glazed windows
glazing_gap = window.getElementsByTagName("Glazing-Gap")
glazing_gap = glazing_gap[0].firstChild.nodeValue if glazing_gap else None
parsed = {
"window_location": window.getElementsByTagName("Window-Location")[0].firstChild.nodeValue,
"window_area": window.getElementsByTagName("Window-Area")[0].firstChild.nodeValue,
"window_type": window.getElementsByTagName("Window-Type")[0].firstChild.nodeValue,
"glazing_type": glazing_type_lookup[
window.getElementsByTagName("Glazing-Type")[0].firstChild.nodeValue
],
"pvc_frame": pvc_frame,
"glazing_gap": glazing_gap,
"orientation": orientation_lookup[window.getElementsByTagName("Orientation")[0].firstChild.nodeValue]
}
return parsed
def get_windows(self):
"""
Extracts data about the windows in the property, including the number of windows and the window type.
:return:
"""
sap_windows = self.xml.getElementsByTagName("SAP-Windows")[0].getElementsByTagName("SAP-Window")
glazing_type_lookup = {
"3": "double glazing, unknown install date",
"5": "Single glazing",
}
orientation_lookup = {
"1": "North",
"2": "North East",
"3": "East",
"4": "South East",
"5": "South",
"6": "South West",
"7": "West",
"8": "North West"
}
self.windows = [
self._parse_windows_content(
window=window,
glazing_type_lookup=glazing_type_lookup,
orientation_lookup=orientation_lookup
) for window in sap_windows
]

View file

@ -1,3 +1,142 @@
from backend.app.db.functions.energy_assessment_functions import bulk_insert_energy_assessments
from sqlalchemy.orm import sessionmaker
from backend.app.db.connection import db_engine
from utils.s3 import read_from_s3, list_files_and_subfolders_in_s3_folder, list_xmls_in_s3_folder, save_csv_to_s3
from utils.logger import setup_logger
from etl.xml_survey_extraction.XmlParser import XmlParser
import os
import pandas as pd
from io import BytesIO
logger = setup_logger()
BUCKET = "retrofit-energy-assessments-dev"
USER_ID = 8
non_invasive_recommendations_filepath = "{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
SCENARIOS = {
101: {
"project_code": "VEC001",
"surveyor": "JAFFERSONS ENERGY CONSULTANTS",
"bodies": [
# Scenario A: Cavity wall insulation
{
"portfolio_id": str(101),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "A",
"trigger_file_path": "",
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": "",
"inclusions": [
"draught_proofing", "secondary_glazing", "trickle_vents", "low_energy_lighting",
],
"budget": None,
"scenario_name": "Quick wins - do now while tenanted",
"multi_plan": True,
},
# Scenario B: CWI, Solar PV, AHSP
{
"portfolio_id": str(101),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "A",
"trigger_file_path": "",
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": "",
"inclusions": [
"draught_proofing",
"secondary_glazing",
"trickle_vents",
"low_energy_lighting",
"suspended_floor_insulation",
"internal_wall_insulation"
],
"budget": None,
"scenario_name": "Do when void",
"multi_plan": True,
},
]
},
}
# TODO: These non-intrusive recommendations should be detected from the EPRs, the scenarios and the condition report?
# For recommendations like trickle vents, we can deduce this from the condition report, depending on the
# ventilation of the room and the presence of trickle vents.
NON_INTRUSITVE_RECOMMENDATIONS = [
{
# 2 Grove Mansions
"uprn": 121016121,
"recommendations": [
{
"type": "draught_proofing",
"cost": None,
"survey": True
},
{"type": "secondary_glazing", "cost": None, "survey": True},
{"type": "trickle_vents", "cost": None, "survey": True},
{"type": "suspended_floor_insulation", "cost": None, "survey": True},
{"type": "internal_wall_insulation", "cost": None, "survey": True},
]
},
{
# 8 Grove Mansions
"uprn": 10024087855,
"recommendations": [
{"type": "draught_proofing", "cost": None, "survey": True},
{"type": "secondary_glazing", "cost": None, "survey": True},
{"type": "trickle_vents", "cost": None, "survey": True},
{"type": "low_energy_lighting", "cost": None, "survey": True},
{"type": "internal_wall_insulation", "cost": None, "survey": True},
]
},
{
# 9 Grove Mansions
"uprn": 121016128,
"recommendations": [
{"type": "draught_proofing", "cost": None, "survey": True},
{"type": "secondary_glazing", "cost": None, "survey": True},
{"type": "trickle_vents", "cost": None, "survey": True},
{"type": "low_energy_lighting", "cost": None, "survey": True},
{"type": "suspended_floor_insulation", "cost": None},
{"type": "internal_wall_insulation", "cost": None, "survey": True},
]
},
{
# 5 Grove Mansions
"uprn": 121016124,
"recommendations": [
{"type": "secondary_glazing", "cost": None, "survey": True},
{"type": "trickle_vents", "cost": None, "survey": True},
{"type": "low_energy_lighting", "cost": None, "survey": True},
{"type": "internal_wall_insulation", "cost": None, "survey": True},
]
},
{
# 14 Grove Mansions
"uprn": 121016117,
"recommendations": [
{"type": "draught_proofing", "cost": None, "survey": True},
{"type": "secondary_glazing", "cost": None, "survey": True},
{"type": "trickle_vents", "cost": None, "survey": True},
{"type": "low_energy_lighting", "cost": None, "survey": True},
{"type": "internal_wall_insulation", "cost": None, "survey": True},
]
},
{
# 19 Grove Mansions
"uprn": 121016117,
"recommendations": [
{"type": "low_energy_lighting", "cost": None, "survey": True},
{"type": "secondary_glazing", "cost": None, "survey": True},
{"type": "internal_wall_insulation", "cost": None, "survey": True},
{"type": "room_roof_insulation", "cost": None, "survey": True},
]
},
]
def main():
"""
This function executes the main process, which will retrieve data from the specified locations, extract the data
@ -6,4 +145,124 @@ def main():
"""
# TODO: Build solution to get this data from Onedrive and store what we need in S3
# In s3, we have a bucket called retrofit-energy-assessments-{stage} which
# In s3, we have a bucket called retrofit-energy-assessments-{stage} which contains the data we need
# The data is stored in a folder called {surveyors}/{project_code}/{uprn}
# We'll need to get the uprn from the folder name, which we can do with EpcSearcher class
# TODO: Pull out county, as in create_epc_records in the router, we pull it from the latest EPC, but we should
# be able to deduce it from just the address. Same for constituency and constituency_label
# TODO: Store the project code in the database
#
for scenario_config in SCENARIOS.values():
energy_assessments = list_files_and_subfolders_in_s3_folder(
bucket_name=BUCKET, folder_name=f"{scenario_config['surveyor']}/{scenario_config['project_code']}/"
)
logger.info(
f"Found {len(energy_assessments)} energy assessments for {scenario_config['surveyor']} and "
f"{scenario_config['project_code']}"
)
assessments_map = {}
for assessment in energy_assessments:
uploaded_xmls = list_xmls_in_s3_folder(
bucket_name=BUCKET, folder_name=os.path.join(assessment, "docs & plans")
)
uprn = int(assessment.rstrip("/").split("/")[-1])
assessments_map[uprn] = uploaded_xmls
logger.info(f"Exatracted XMLS for the energy assessments")
# TODO: IF we have many uploads, we can do them in a batch so we don't try and upload huge amounts of data to
# the database at onece
# TODO: We now have detailed information about primary and secondary walls, so we should use this information
# in our recommendations when we have it
# For example, for 77 Peryn Road, W3 7LT, the energy assessment has a main dwelling and two extensions,
# where
# the physical dimensions and the fabric of each building is constructed in a way as if each building is
# separate. We should use this information to make recommendations that are specific to each building
# part, though the problem here is that while the fabric and dimensions are separate, the actual SAP,
# CO2, etc
# figures span across the entire property.
# Idea: We can collect all of this information by building part and store it separately in the database
# against the uprn. We can have key data for the EPC, but then also additional data for each
# building
# part. We can then use this data to make recommendations that are specific to each building part
# We should probably re-think this data model, so we break up the data in a more considered fasion and
# produce
# the underlying EPC data as a summary of the building parts. Not only do we have data against the main
# dwelling and extensions, but we also have multiple windows with individiaul pieces of information that
# we can use to make recommendations. We should store this data in a way that we can easily access it and
# use it to make recommendations (e.g. we should have a Windows table)
# For each property, we download the xmls and extract the data
database_data = []
for uprn, xmls in assessments_map.items():
extracted_data = {}
for xml in xmls:
xml_data = read_from_s3(bucket_name=BUCKET, s3_file_name=xml)
xml_data_io = BytesIO(xml_data)
xml_parser = XmlParser(
file=xml_data_io,
filekey=os.path.join(f"s3://{BUCKET}", xml),
uprn=uprn,
surveyor_company=scenario_config["surveyor"],
)
xml_parser.run()
if xml_parser.is_lig:
logger.info(f"Extracted data from {xml}")
extracted_epc = xml_parser.epc
extracted_additional_data = xml_parser.additional_data
data_to_update = {
**extracted_epc, **extracted_additional_data
}
# We need to update the keys to match the database schema - i.e. we should replace all hyphens with
# underscores
data_to_update = {k.replace("-", "_"): v for k, v in data_to_update.items()}
extracted_data.update(data_to_update)
database_data.append(extracted_data)
logger.info("Uploading data to the database")
session = sessionmaker(bind=db_engine)()
bulk_insert_energy_assessments(session, database_data)
session.close()
# Create the asset list
asset_list = [
{"uprn": x["uprn"], "address": x["address1"], "postcode": x["postcode"]} for x in database_data
]
asset_list = pd.DataFrame(asset_list)
# Store the asset list in s3
filename = f"{USER_ID}/{scenario_config['bodies'][0]['portfolio_id']}/non_intrusives.csv"
save_csv_to_s3(
dataframe=asset_list,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
for body in scenario_config["bodies"]:
body["trigger_file_path"] = filename
print(body)
# TODO: In order to get the full data associated to the heating system, we need to download and parse the pcdb which
# can be found here: https://www.ncm-pcdb.org.uk/pcdb/pcdb10.dat
# https://www.ncm-pcdb.org.uk/sap/download
# However retrieving this data is not a priority, so we can leave this for now as parsing the database
# is a non-trivial task
# TODO: The condition report contains additional data such as the number of bedrooms and the number of bathrooms
# We can extract this data and store it in the database as well. We can then update our kwargs methodology
# that is passed to the property class, where instead we store this additional data in our database (it could
# be stored in the energy assessment table, or in a separate table) and then when we're passed additional data
# we can query the database for this data and use it to update the property object, instead of storing it
# in the asset list and pulling it out of the asset list
# 1) Bathrooms
# 2) Bedrooms

File diff suppressed because it is too large Load diff

View file

@ -176,7 +176,7 @@ module "retrofit_hotwater_kwh_predictions" {
}
module "retrofit_energy_assessments" {
source = "./modules/s3"
source = "./modules/s3_presignable_bucket"
bucketname = "retrofit-energy-assessments-${var.stage}"
allowed_origins = var.allowed_origins
}

View file

@ -64,6 +64,8 @@ SMART_APPLIANCE_THERMOSTAT_COST = 400
PROGRAMMER_COST = 120
ROOM_THERMOSTAT_COST = 150
TRVS_COST = 35
BYPASS_COST = 350 # Based on desktop research for a complex installation
# https://www.checkatrade.com/blog/cost-guides/cost-install-water-shut-off-valve/
# Cost for TTZC
# Smart thermostat based on checkatrade https://www.checkatrade.com/blog/cost-guides/cost-smart-thermostat/
@ -98,8 +100,8 @@ CONDENSING_BOILER_COSTS = {
# The unit is a 15kw boiler, capable of outputting between 3kw and 15kw. Costs seem to be around £1800
ELECTRIC_BOILER_COSTS = 1800
# Assumes 3 hours to remove each heater (including re-decorating)
ROOM_HEATER_REMOVAL_COST = 120
# Assumes 1 hours to remove each heater (including re-decorating)
ROOM_HEATER_REMOVAL_COST = 50
ROOM_HEATER_REMOVAL_LABOUR_HOURS = 3
# This is a cost quoted by Jim for a system flush - existig system will run more efficiently
@ -1012,7 +1014,7 @@ class Costs:
"labour_days": labour_days
}
def solar_pv(self, wattage: float, has_battery: bool = False):
def solar_pv(self, wattage: float, has_battery: bool = False, array_cost=None):
"""
Calculates the total cost for solar PV based data provided by the MCS dashboard, which contains
@ -1026,13 +1028,17 @@ class Costs:
https://www.checkatrade.com/blog/cost-guides/cost-of-solar-panel-installation/
:param wattage: Peak wattage of the solar PV system]
:param has_battery: Bool, whether the system includes a battery
:param array_cost: float, containing the cost of the solar PV array
"""
# Get the cost data relevant to the region
regional_cost = MCS_SOLAR_PV_COST_DATA["-".join(["average_cost_per_kwh", self.region])]
kw = wattage / 1000
total_cost = kw * regional_cost
if array_cost is not None:
total_cost = array_cost
else:
kw = wattage / 1000
total_cost = kw * regional_cost
if has_battery:
# The battery cost is based on the £3500 quote, recieved from installers
@ -1254,6 +1260,34 @@ class Costs:
"labour_days": labour_days,
}
def programmer_trvs_bypass(self, number_heated_rooms, has_programmer, has_trvs, has_bypass):
total_cost = 0
labour_hours = 0
if not has_programmer:
total_cost += PROGRAMMER_COST
labour_hours += 1
if not has_trvs:
total_cost += TRVS_COST * number_heated_rooms
labour_hours += 0.25 * number_heated_rooms
if not has_bypass:
total_cost += BYPASS_COST
labour_hours += 0.5
subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
vat = total_cost - subtotal_before_vat
return {
"total": total_cost,
"subtotal": subtotal_before_vat,
"vat": vat,
"labour_hours": labour_hours,
"labour_days": 1,
}
def heater_removal(self, n_rooms):
"""
Estimates the costs of removal of heaters, including the redecoration costs of the space behind the heater

View file

@ -0,0 +1,56 @@
from backend.Property import Property
class DraughtProofingRecommendations:
def __init__(self, property_instance: Property):
self.property = property_instance
self.recommendation = []
def recommend(self):
"""
In some cases, we can identify the need for draught proofing from the EPC recommendations, however the initial
implementation of this class will just assume that we are picking up a non-invasive recommendation from the
survey
"""
# For the moment, draught proofing doesn't have a phase impact
draught_proofing_recommendation_config = next(
(r for r in self.property.non_invasive_recommendations if
r["type"] == "draught_proofing"),
{}
)
if not draught_proofing_recommendation_config:
return
description = (
"Draught proof doors and windows to improve energy efficiency" if
not draught_proofing_recommendation_config.get("description")
else draught_proofing_recommendation_config["description"]
)
# We recommend installing two mechanical ventilation systems
self.recommendation = [
{
"phase": None,
"parts": [],
"type": "draught_proofing",
"description": description,
"starting_u_value": None,
"new_u_value": None,
"already_installed": False,
"sap_points": draught_proofing_recommendation_config["sap_points"],
"heat_demand": 0,
"kwh_savings": 0,
"co2_equivalent_savings": 0,
"energy_cost_savings": 0,
"total": draught_proofing_recommendation_config["cost"],
# We use a very simple and rough estimate of 4 hours per unit
"labour_hours": draught_proofing_recommendation_config.get("labour_hours", 8),
"labour_days": draught_proofing_recommendation_config.get("labour_days", 1), # Assume 8 hour day
"survey": True
}
]

View file

@ -50,5 +50,8 @@ class FireplaceRecommendations(Definitions):
# Take a very basic estimate of 6 hours, multipled by the number of open fireplaces to seal
"labour_hours": 6 * number_open_fireplaces,
"labour_days": 6 * number_open_fireplaces / 8, # Assume 8 hour day
"description_simulation": {
"number-open-fireplaces": 0
}
}
]

View file

@ -5,12 +5,14 @@ import pandas as pd
from BaseUtility import Definitions
from datatypes.enums import QuantityUnits
from backend.app.plan.schemas import MEASURE_MAP
from backend.Property import Property
from recommendations.recommendation_utils import (
r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns, update_lowest_selected_u_value,
get_recommended_part, get_floor_u_value, override_costs
get_recommended_part, get_floor_u_value, override_costs, check_simulation_difference
)
from recommendations.Costs import Costs
from etl.epc_clean.epc_attributes.FloorAttributes import FloorAttributes
class FloorRecommendations(Definitions):
@ -62,18 +64,16 @@ class FloorRecommendations(Definitions):
]
]
self.exposed_floor_insulation_materials = [
part for part in materials if part["type"] == "exposed_floor_insulation"
]
def recommend(self, phase=0, measures=None):
# TODO: To be completed
self.exposed_floor_non_insulation_materials = []
measures = MEASURE_MAP["floor_insulation"] if measures is None else measures
if not measures:
return
def recommend(self, phase=0):
u_value = self.property.floor["thermal_transmittance"]
property_type = self.property.data["property-type"]
floor_area = self.property.insulation_floor_area
year_built = self.property.year_built
if self.property.floor["another_property_below"] | (self.property.floor["insulation_thickness"] in [
"average", "above average"
@ -94,14 +94,16 @@ class FloorRecommendations(Definitions):
if u_value:
# By being built more recently than this, it means that the property was likely build with soild
# concrete floors with insulation already
if year_built < self.PART_L_YEAR_CUTOFF:
raise NotImplementedError("Not investigated this use case")
if u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
# The floor is already compliant
return
# In this case where we have the u-value of a floor, we likely don't have any other information about it
# so there is no recommendation that we can practically make
if (
self.property.floor["is_suspended"] or
self.property.floor["is_to_unheated_space"] or
self.property.floor["is_to_external_air"] or
self.property.floor["is_solid"]
):
raise ValueError("This should not be possible")
return
if u_value is None:
u_value = get_floor_u_value(
@ -118,7 +120,11 @@ class FloorRecommendations(Definitions):
if u_value < self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
return
if self.property.floor["is_suspended"]:
if (
self.property.floor["is_suspended"] or
self.property.floor["is_to_unheated_space"] or
self.property.floor["is_to_external_air"]
) and "suspended_floor_insulation" in measures:
# Given the U-value, we recommend underfloor insulation
self.recommend_floor_insulation(
phase=phase,
@ -128,7 +134,7 @@ class FloorRecommendations(Definitions):
)
return
if self.property.floor["is_solid"]:
if self.property.floor["is_solid"] and "solid_floor_insulation" in measures:
# Given the U-value, we recommend solid floor insulation options which are usually solid foam
self.recommend_floor_insulation(
u_value=u_value,
@ -138,10 +144,6 @@ class FloorRecommendations(Definitions):
)
return
if self.property.floor["is_to_unheated_space"] or self.property.floor["is_to_external_air"]:
self.recommend_floor_insulation(u_value=u_value, parts=self.exposed_floor_insulation_parts)
return
raise NotImplementedError("Implement me!")
@staticmethod
@ -197,6 +199,8 @@ class FloorRecommendations(Definitions):
if already_installed:
cost_result = override_costs(cost_result)
new_description = "Suspended, insulated"
elif material["type"] == "solid_floor_insulation":
cost_result = self.costs.solid_floor_insulation(
insulation_floor_area=self.property.insulation_floor_area,
@ -207,9 +211,21 @@ class FloorRecommendations(Definitions):
already_installed = "solid_floor_insulation" in self.property.already_installed
if already_installed:
cost_result = override_costs(cost_result)
new_description = "Solid, insulated"
else:
raise NotImplementedError("Implement me!")
floor_ending_config = FloorAttributes(new_description).process()
floor_simulation_config = check_simulation_difference(
new_config=floor_ending_config, old_config=self.property.floor, prefix="floor_"
)
simulation_config = {
**floor_simulation_config,
"floor_thermal_transmittance_ending": new_u_value,
}
self.recommendations.append(
{
"phase": phase,
@ -227,6 +243,7 @@ class FloorRecommendations(Definitions):
"new_u_value": new_u_value,
"sap_points": None,
"already_installed": already_installed,
"simulation_config": simulation_config,
"description_simulation": {
"floor-description": "Solid, insulated" if
material["type"] == "solid_floor_insulation"

View file

@ -40,7 +40,10 @@ class HeatingControlRecommender:
return
if heating_description in ["Air source heat pump, radiators, electric"]:
# For an ASHP, we can recommend time and temperature zone controls, as well as programmer, trvs and a bypass
# which are common configurations for ASHPs
self.recommend_time_temperature_zone_controls()
# self.recommend_programmer_trvs_bypass()
def recommend_room_heaters_electric_controls(self):
"""
@ -279,3 +282,55 @@ class HeatingControlRecommender:
"description_simulation": description_simulation
}
)
def recommend_programmer_trvs_bypass(self):
# We don't perform any checks here - this is likely to be used in conjunction with an ASHP recommendation
new_controls_description = "Programmer, TRVs and bypass"
ending_config = MainheatControlAttributes(new_controls_description).process()
simulation_config = check_simulation_difference(
new_config=ending_config, old_config=self.property.main_heating_controls
)
# Only adjust if the current system is below good
if self.property.data["mainheatc-energy-eff"] in ["Poor", "Very Poor"]:
simulation_config["mainheatc_energy_eff_ending"] = "Average"
else:
simulation_config["mainheatc_energy_eff_ending"] = self.property.data["mainheatc-energy-eff"]
description_simulation = {
"mainheatcont-description": new_controls_description,
"mainheatc-energy-eff": simulation_config["mainheatc_energy_eff_ending"]
}
has_programmer = self.property.main_heating_controls["switch_system"] == "programmer"
has_trvs = self.property.main_heating_controls["trvs"] is not None
has_bypass = self.property.main_heating_controls["auxiliary_systems"] == "bypass"
cost_result = self.costs.programmer_trvs_bypass(
number_heated_rooms=int(self.property.data["number-heated-rooms"]),
has_trvs=has_trvs,
has_programmer=has_programmer,
has_bypass=has_bypass
)
description = "Install a Bypass valve, TRVs and a Programmer"
already_installed = "heating_control" in self.property.already_installed
if already_installed:
cost_result = override_costs(cost_result)
description = "Heating controls have already been upgraded, no further action needed."
self.recommendation.append(
{
"type": "heating_control",
"parts": [],
"description": description,
**cost_result,
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
"already_installed": already_installed,
"simulation_config": simulation_config,
"description_simulation": description_simulation
}
)

View file

@ -1,6 +1,7 @@
from recommendations.Costs import Costs, BOILER_UPGRADE_SCHEME_ASHP_VALUE
from recommendations.recommendation_utils import check_simulation_difference, override_costs
from backend.Property import Property
from backend.app.plan.schemas import MEASURE_MAP
from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes
from etl.epc_clean.epc_attributes.HotWaterAttributes import HotWaterAttributes
from etl.epc_clean.epc_attributes.MainFuelAttributes import MainFuelAttributes
@ -28,7 +29,7 @@ class HeatingRecommender:
self.property.main_heating["clean_description"] in self.ELECTRIC_HEATING_DESCRIPTIONS
)
def is_high_heat_retention_valid(self):
def is_high_heat_retention_valid(self, ashp_only_heating_recommendation, measures):
"""
Check conditions if high heat retention storage is valid
:return:
@ -40,45 +41,30 @@ class HeatingRecommender:
self.property.main_heating["clean_description"] in ["No system present, electric heaters assumed"]
)
return self.has_electric_heating_description or electric_heating_assumed
has_electric = self.has_electric_heating_description or electric_heating_assumed
def recommend(self, has_cavity_or_loft_recommendations, phase=0, exclusions=None):
return (
has_electric and (not ashp_only_heating_recommendation) and
("high_heat_retention_storage_heater" in measures)
)
def is_boiler_upgrade_suitable(self, measures, ashp_only_heating_recommendation):
"""
Produces heating recommendations
:param has_cavity_or_loft_recommendations: boolean indicating if we have produced a cavity or loft insulation
recommendation. If there are cavity or loft recommendations, the property would need to complete those measures
before being able to get the boiler upgrade scheme benefits. The messaging in the front end would be to
:param phase: indicates the phase of the retrofit programme
:param exclusions: A list of exclusions for the recommendations
These are the conditions we apply to recommend a boiler installation
:return:
"""
# TODO: We could have a system flush recommendation for an existing boiler, where there is no need to replace
# the boiler, but instead flushing the system will make it run more efficiently. There is a cost for this
# in the Costs class, stored as SYSTEM_FLUSH_COST
exclusions = [] if exclusions is None else exclusions
self.heating_recommendations = []
self.heating_control_recommendations = []
# This first iteration of the recommender will provide very basic recommendation
# We recommend heating controls based on the main heating system
if self.is_high_heat_retention_valid():
# Recommend high heat retention storage heaters
# TODO: We need to allow for the possibility that the property aleady has storage heaters, but just
# needs the controls
self.recommend_hhr_storage_heaters(phase=phase, system_change=True, heating_controls_only=False)
# if the property has mains heating with boiler and radiators, we recommend optimal heating controls
# 1) if the property has mains heating with boiler and radiators, we recommend optimal heating controls
has_boiler = self.property.main_heating["clean_description"] in ["Boiler and radiators, mains gas"]
# We also check that the property doesn't have a heating system, but it has access to the mains gas
# 2) If the property doesn't have a heating system, but it has access to the mains gas
no_heating_has_mains = self.property.main_heating["clean_description"] in [
'No system present, electric heaters assumed'
] and self.property.data["mains-gas-flag"]
has_gas_heaters = (
self.property.main_heating["clean_description"] in ["Room heaters, mains gas"] and
# The property is using portable heaters and has access to gas mains
has_room_heaters = (
self.property.main_heating["clean_description"] in ["Room heaters, mains gas", "Room heaters, electric"] and
self.property.data["mains-gas-flag"]
)
@ -91,13 +77,68 @@ class HeatingRecommender:
self.property.data["mains-gas-flag"]
)
if (
has_boiler or
no_heating_has_mains or
electic_heating_has_mains or
has_gas_heaters or
portable_heaters_has_mains
):
is_valid = (
(
has_boiler or
no_heating_has_mains or
electic_heating_has_mains or
has_room_heaters or
portable_heaters_has_mains
) and
(not ashp_only_heating_recommendation) and
("boiler_upgrade" in measures)
)
return is_valid, has_boiler
def recommend(self, has_cavity_or_loft_recommendations, phase=0, measures=None):
"""
Produces heating recommendations
:param has_cavity_or_loft_recommendations: boolean indicating if we have produced a cavity or loft insulation
recommendation. If there are cavity or loft recommendations, the property would need to complete those measures
before being able to get the boiler upgrade scheme benefits. The messaging in the front end would be to
:param phase: indicates the phase of the retrofit programme
:param measures: A list of measures for the recommendations
"""
measures = MEASURE_MAP["heating"] if measures is None else measures
# TODO: We could have a system flush recommendation for an existing boiler, where there is no need to replace
# the boiler, but instead flushing the system will make it run more efficiently. There is a cost for this
# in the Costs class, stored as SYSTEM_FLUSH_COST
# TODO: Right now, we don't have recommendations for electric boilers - we should probably have one
# if we have a non-invasive ashp recommendation, we get the configuration directly from the property instance
non_invasive_ashp_recommendation = next(
(r for r in self.property.non_invasive_recommendations if r["type"] == "air_source_heat_pump"),
{"suitable": True}
)
# We allow for the non-invasive recommendation to be that ASHP is not suitable
# This option will prevent other heating recommendations from being specified, other than an ASHP
ashp_only_heating_recommendation = non_invasive_ashp_recommendation.get(
"ashp_only_heating_recommendation", False
)
self.heating_recommendations = []
self.heating_control_recommendations = []
# This first iteration of the recommender will provide very basic recommendation
# We recommend heating controls based on the main heating system
hhr_valid = self.is_high_heat_retention_valid(ashp_only_heating_recommendation, measures)
if hhr_valid:
# Recommend high heat retention storage heaters
# TODO: We need to allow for the possibility that the property aleady has storage heaters, but just
# needs the controls
self.recommend_hhr_storage_heaters(phase=phase, system_change=True, heating_controls_only=False)
gas_boiler_suitable, has_boiler = self.is_boiler_upgrade_suitable(
measures=measures, ashp_only_heating_recommendation=ashp_only_heating_recommendation
)
if gas_boiler_suitable:
# This indicates that the home previously did not have a boiler in place and so would require
# an overhaul to the system - right now, this is all reasons, apart from if there is an existing boiler
system_change = not has_boiler
@ -116,9 +157,11 @@ class HeatingRecommender:
# In the future, we'll allow overrides, so that non-intrusive surveys can contradict these conditions
# and either allow or prevent the recommendation of an air source heat pump
if self.is_ashp_valid(exclusions=exclusions):
if self.property.is_ashp_valid(measures=measures) and non_invasive_ashp_recommendation["suitable"]:
self.recommend_air_source_heat_pump(
phase=phase, has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations
phase=phase,
has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations,
)
return
@ -186,19 +229,6 @@ class HeatingRecommender:
description = ("Replace the existing boiler and cylinder without a thermostat with a new electric combi "
"boiler")
def is_ashp_valid(self, exclusions):
if "air_source_heat_pump" in self.property.non_invasive_recommendations:
return True
if "air_source_heat_pump" in exclusions:
return False
suitable_property_type = self.property.data["property-type"] in ["House", "Bungalow"]
has_air_source_heat_pump = self.property.main_heating["has_air_source_heat_pump"]
return suitable_property_type and not has_air_source_heat_pump
def recommend_air_source_heat_pump(self, phase, has_cavity_or_loft_recommendations, _return=False):
"""
This method will implement the recommendation for an air source heat pump
@ -207,117 +237,164 @@ class HeatingRecommender:
:return:
"""
# Look for a non-intrusive recommendation
non_intrusive_recommendation = next((
r for r in self.property.non_invasive_recommendations if r["type"] == "air_source_heat_pump"
), {})
controls_recommender = HeatingControlRecommender(self.property)
controls_recommender.recommend(heating_description="Air source heat pump, radiators, electric")
ashp_costs = self.costs.air_source_heat_pump()
# We add the costs of the heating controls, onto each key in the costs dictionary
if controls_recommender.recommendation:
for key in ashp_costs:
ashp_costs[key] += controls_recommender.recommendation[0][key]
if non_intrusive_recommendation:
# Update with non-intrusive recommendation
if non_intrusive_recommendation.get("cost"):
ashp_costs.update(
{"total": non_intrusive_recommendation["cost"], "subtotal": None, "vat": None}
)
already_installed = "air_source_heat_pump" in self.property.already_installed
controls_recommendations = controls_recommender.recommendation
if already_installed or not controls_recommendations:
# We set an empty object, so we just produce one recommendation
controls_recommendations = [None]
if already_installed:
ashp_costs = override_costs(ashp_costs)
description = "The property already has an air source heat pump, no further action needed."
else:
if controls_recommender.recommendation:
description = ("Install an air source heat pump, and upgrade heating controls to Smart Thermostats, "
"room sensors and smart radiator valves (time & temperature zone control).")
else:
if non_intrusive_recommendation and not all([x is None for x in controls_recommendations]):
# We just use the ttzc control
controls_recommendations = [
x for x in controls_recommendations if (
x["description_simulation"]["mainheatcont-description"] == "Time and temperature zone control"
)
]
# This is a map from the heating controls description to the description of the air source heat pump set up
ashp_descriptions = {
"Time and temperature zone control": (
"Install an air source heat pump, and upgrade heating controls to Smart Thermostats, "
"room sensors and smart radiator valves (time & temperature zone control)."
),
"Programmer, TRVs and bypass": (
"Install an air source heat pump, with programmer, TRVs and a Bypass valve."
),
}
new_heating_description = "Air source heat pump, radiators, electric"
new_hot_water_description = "From main system"
ashp_recommendations = []
for controls_rec in controls_recommendations:
ashp_costs_with_controls = ashp_costs.copy()
if controls_rec:
for key in ashp_costs_with_controls:
if ashp_costs_with_controls[key] is not None:
ashp_costs_with_controls[key] += controls_rec[key]
if controls_rec is None:
description = "Install an air source heat pump."
elif already_installed:
description = "The property already has an air source heat pump, no further action needed."
else:
description = ashp_descriptions[controls_rec["description_simulation"]["mainheatcont-description"]]
# If the property does not have existing cavity and loft insulation, we include a note that the cost
# includes the boiler upgrade scheme and that the cavity and loft need to be treated, to ensure access
# to the funding
if has_cavity_or_loft_recommendations:
description = description + (f" The cost includes the £"
f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant. "
f"You must ensure that the property has an insulated cavity and "
f"270mm+ loft insulation to qualify for the grant")
else:
description = description + (f" The cost includes the £"
f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant")
if not non_intrusive_recommendation:
if has_cavity_or_loft_recommendations:
description = description + (
f" The cost includes the £"
f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant. "
f"You must ensure that the property has an insulated cavity and "
f"270mm+ loft insulation to qualify for the grant"
)
else:
description = description + (
f" The cost includes the £{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant"
)
new_heating_description = "Air source heat pump, radiators, electric"
new_hot_water_description = "From main system"
simulation_config = {
"mainheat_energy_eff_ending": "Good",
"hot_water_energy_eff_ending": "Good"
}
description_simulation = {
"mainheat-description": new_heating_description,
"mainheat-energy-eff": simulation_config["mainheat_energy_eff_ending"],
"hot-water-energy-eff": simulation_config["hot_water_energy_eff_ending"],
"hotwater-description": new_hot_water_description,
}
# Installation of a boiler improves the hot water system so we need to reflect this in
# the outcome of the recommendation
heating_ending_config = MainHeatAttributes(new_heating_description).process()
hotwater_ending_config = HotWaterAttributes(new_hot_water_description).process()
# If the property does not currently have electric main fuel, we'll simulate the change
fuel_ending_config = {}
if self.property.main_fuel["fuel_type"] != "electricity":
new_fuel_description = "electricity (not community)"
fuel_ending_config = MainFuelAttributes(new_fuel_description).process()
description_simulation = {
**description_simulation,
"main-fuel": new_fuel_description
simulation_config = {
"mainheat_energy_eff_ending": "Good",
"hot_water_energy_eff_ending": "Good"
}
description_simulation = {
"mainheat-description": new_heating_description,
"mainheat-energy-eff": simulation_config["mainheat_energy_eff_ending"],
"hot-water-energy-eff": simulation_config["hot_water_energy_eff_ending"],
"hotwater-description": new_hot_water_description,
}
# Installation of a boiler improves the hot water system so we need to reflect this in
# the outcome of the recommendation
heating_ending_config = MainHeatAttributes(new_heating_description).process()
hotwater_ending_config = HotWaterAttributes(new_hot_water_description).process()
# Check the simulation differences
heating_simulation_config = check_simulation_difference(
new_config=heating_ending_config, old_config=self.property.main_heating
)
hotwater_simulation_config = check_simulation_difference(
new_config=hotwater_ending_config, old_config=self.property.hotwater
)
fuel_simulation_config = check_simulation_difference(
new_config=fuel_ending_config, old_config=self.property.main_fuel
)
# If the property does not currently have electric main fuel, we'll simulate the change
fuel_ending_config = {}
if self.property.main_fuel["fuel_type"] != "electricity":
new_fuel_description = "electricity (not community)"
fuel_ending_config = MainFuelAttributes(new_fuel_description).process()
description_simulation = {
**description_simulation,
"main-fuel": new_fuel_description
}
simulation_config = {
**simulation_config,
**heating_simulation_config,
**hotwater_simulation_config,
**fuel_simulation_config,
}
# Check the simulation differences
heating_simulation_config = check_simulation_difference(
new_config=heating_ending_config, old_config=self.property.main_heating
)
hotwater_simulation_config = check_simulation_difference(
new_config=hotwater_ending_config, old_config=self.property.hotwater
)
fuel_simulation_config = check_simulation_difference(
new_config=fuel_ending_config, old_config=self.property.main_fuel
)
if controls_recommender.recommendation:
# We should have just the single recommendation for heat controls, which is time
# and temperature zone controls
if len(controls_recommender.recommendation) != 1:
raise NotImplementedError("More than one heat controls recommendation for air source heat pump")
simulation_config = {
**simulation_config,
**controls_recommender.recommendation[0]["simulation_config"]
**heating_simulation_config,
**hotwater_simulation_config,
**fuel_simulation_config,
}
description_simulation = {
**description_simulation,
**controls_recommender.recommendation[0]["description_simulation"]
if controls_rec is not None:
# We should have just the single recommendation for heat controls, which is time
# and temperature zone controls
simulation_config = {
**simulation_config,
**controls_rec["simulation_config"]
}
description_simulation = {
**description_simulation,
**controls_rec["description_simulation"]
}
ashp_recommendation = {
"phase": phase,
"parts": [
# TODO
],
"type": "heating",
"description": description,
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
"already_installed": already_installed,
"simulation_config": simulation_config,
"description_simulation": description_simulation,
**ashp_costs_with_controls
}
ashp_recommendation = {
"phase": phase,
"parts": [
# TODO
],
"type": "heating",
"description": description,
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
"already_installed": already_installed,
"simulation_config": simulation_config,
"description_simulation": description_simulation,
**ashp_costs
}
ashp_recommendations.append(ashp_recommendation)
if _return:
return [ashp_recommendation]
self.heating_recommendations.append(ashp_recommendation)
return [ashp_recommendations]
self.heating_recommendations.extend(ashp_recommendations)
@staticmethod
def check_simulation_difference(old_config, new_config):

View file

@ -66,6 +66,11 @@ class LightingRecommendations:
if self.property.lighting["low_energy_proportion"] == 100:
return
leds_recommendation_config = next(
(r for r in self.property.non_invasive_recommendations if r["type"] == "low_energy_lighting"),
{}
)
number_lighting_outlets = self.property.number_lighting_outlets
# Number non lel outlets
@ -79,6 +84,9 @@ class LightingRecommendations:
return
# Get the cost of the fittings
if leds_recommendation_config.get("cost"):
raise NotImplementedError("Costs from for low energy lighting have not been implemented")
cost_result = self.costs.low_energy_lighting(
number_of_lights=number_non_lel_outlets,
number_current_lel_lights=number_lighting_outlets - number_non_lel_outlets,
@ -97,6 +105,12 @@ class LightingRecommendations:
cost_result = override_costs(cost_result)
description = "Low energy lighting has already been installed, no further action required"
if leds_recommendation_config.get("sap_points") is not None:
# This could be zero points
sap_points = leds_recommendation_config["sap_points"]
else:
sap_points = round(2 * (number_non_lel_outlets / number_lighting_outlets), 2)
self.recommendation = [
{
"phase": phase,
@ -108,13 +122,14 @@ class LightingRecommendations:
"already_installed": already_installed,
# For SAP points, we use the fact that lighting is usually worth 2 points and we scale this to
# the proportion of lights that will be set to low energy
"sap_points": round(2 * (number_non_lel_outlets / number_lighting_outlets), 2),
"sap_points": sap_points,
"kwh_savings": heat_demand_change,
"co2_equivalent_savings": carbon_change,
"description_simulation": {
"lighting-energy-eff": "Very Good",
"lighting-description": "Low energy lighting in all fixed outlets",
},
**cost_result
**cost_result,
"survey": leds_recommendation_config.get("survey", False)
}
]

File diff suppressed because it is too large Load diff

View file

@ -1,13 +1,16 @@
import math
import pandas as pd
from backend.Property import Property
from backend.app.plan.schemas import MEASURE_MAP
from typing import List
from datatypes.enums import QuantityUnits
from recommendations.recommendation_utils import (
get_roof_u_value, r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns,
update_lowest_selected_u_value, get_recommended_part, convert_thickness_to_numeric, override_costs
update_lowest_selected_u_value, get_recommended_part, convert_thickness_to_numeric, override_costs,
check_simulation_difference
)
from recommendations.Costs import Costs
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
class RoofRecommendations:
@ -76,22 +79,43 @@ class RoofRecommendations:
return self.recommendations
def is_loft_already_insulated(self):
def is_loft_already_insulated(self, measures):
"""
Check if the loft is already insulated
"""
# If we have a non-invasive recommendation for the loft insulation, we can assume that the loft is not insulated
if "loft_insulation" in self.property.non_invasive_recommendations:
if "loft_insulation" in measures:
return False
return (self.insulation_thickness > self.MINIMUM_LOFT_ISULATION_MM) and self.property.roof["is_pitched"]
def recommend(self, phase):
def is_room_roof_insulated(self):
"""
Check if the room roof is already insulated
"""
full_insulated_room_roof = (
self.property.roof["is_roof_room"] and
self.property.roof["insulation_thickness"] in ["average", "above_average"]
)
room_roof_insulated_at_rafters = (
self.property.roof["is_pitched"] and
self.property.roof["is_at_rafters"] and
self.property.roof["insulation_thickness"] in ["average", "above_average"]
)
return full_insulated_room_roof or room_roof_insulated_at_rafters
def recommend(self, phase, measures=None):
if self.property.roof["has_dwelling_above"]:
return
measures = MEASURE_MAP["roof_insulation"] if measures is None else measures
u_value = self.property.roof["thermal_transmittance"]
# We check if the roof is already insulated and if so, we exit
@ -99,14 +123,14 @@ class RoofRecommendations:
# Building regulations part L recommend installing at least 270mm of insulation, however generally we
# experience diminishing returns in terms of SAP once we go beyond around 150mm of insulation
# This only holds true for pitched roofs.
if self.is_loft_already_insulated():
if self.is_loft_already_insulated(measures):
return
if (self.insulation_thickness >= self.MINIMUM_FLAT_ROOF_ISULATION_MM) and self.property.roof["is_flat"]:
return
if self.property.roof["is_roof_room"]:
raise ValueError("Update convert_thickness_to_numeric for room roof and implement")
if self.is_room_roof_insulated():
return
# If we have a u-value already, need to implement this
if u_value:
@ -118,23 +142,37 @@ class RoofRecommendations:
return
raise NotImplementedError("Implement me")
u_value = get_roof_u_value(**{**self.property.roof, "age_band": self.property.age_band})
u_value = get_roof_u_value(
insulation_thickness=self.property.roof["insulation_thickness"],
has_dwelling_above=self.property.roof["has_dwelling_above"],
is_loft=self.property.roof["is_loft"],
is_roof_room=self.property.roof["is_roof_room"],
is_thatched=self.property.roof["is_thatched"],
age_band=self.property.age_band,
is_flat=self.property.roof["is_flat"],
is_pitched=self.property.roof["is_pitched"],
is_at_rafters=self.property.roof["is_at_rafters"],
)
self.estimated_u_value = u_value
if (u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE) and (
"loft_insulation" not in self.property.non_invasive_recommendations
if (u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE) or (
"loft_insulation" not in measures
):
# The Roof is already compliant
return
if self.property.roof["is_pitched"] or self.property.roof["is_flat"]:
insulation_thickness = (
0 if "loft_insulation" not in self.property.non_invasive_recommendations else self.insulation_thickness
)
if (self.property.roof["is_pitched"] and "loft_insulation" in measures) or (
self.property.roof["is_flat"] and "flat_roof_insulation" in measures
):
insulation_thickness = 0 if "loft_insulation" not in measures else self.insulation_thickness
self.recommend_roof_insulation(u_value, insulation_thickness, self.property.roof, phase)
return
if self.property.roof["is_roof_room"]:
# There are cases where the property might have a room roof as the second roof, but we have a recommendation for
# it, so we allow this override
if self.property.roof["is_roof_room"] and ("room_roof_insulation" in measures) or (
"room_roof_insulation" in [x["type"] for x in self.property.non_invasive_recommendations]
):
self.recommend_room_roof_insulation(u_value, phase)
return
@ -245,6 +283,40 @@ class RoofRecommendations:
if already_installed:
cost_result = override_costs(cost_result)
new_thickness = insulation_thickness + material["depth"]
# This is based on the values we have in the training data
valid_numeric_values = [
12,
25,
50,
75,
100,
150,
200,
250,
270,
300,
350,
400,
]
proposed_depth = new_thickness
if (new_thickness not in valid_numeric_values) and material["type"] == "loft_insulation":
# Take the nearest value for scoring
proposed_depth = min(
valid_numeric_values, key=lambda x: abs(x - proposed_depth)
)
if proposed_depth >= 270:
new_efficiency = "Very Good"
else:
if self.property.data["walls-energy-eff"] not in ["Good", "Very Good"]:
new_efficiency = "Good"
else:
new_efficiency = "Very Good"
new_description = f"Pitched, {int(proposed_depth)}mm loft insulation"
elif material["type"] == "flat_roof_insulation":
cost_result = self.costs.flat_roof_insulation(
floor_area=self.property.insulation_floor_area,
@ -254,38 +326,21 @@ class RoofRecommendations:
already_installed = "flat_roof_insulation" in self.property.already_installed
if already_installed:
cost_result = override_costs(cost_result)
new_thickness = None
new_description = "Flat, insulated"
new_efficiency = "Good"
else:
raise ValueError("Invalid material type")
# This is based on the values we have in the training data
valid_numeric_values = [
12,
25,
50,
75,
100,
150,
200,
250,
270,
300,
350,
400,
]
roof_ending_config = RoofAttributes(new_description).process()
roof_simulation_config = check_simulation_difference(
new_config=roof_ending_config, old_config=self.property.roof, prefix="roof_"
)
proposed_depth = new_thickness
if new_thickness not in valid_numeric_values:
# Take the nearest value for scoring
proposed_depth = min(
valid_numeric_values, key=lambda x: abs(x - proposed_depth)
)
if proposed_depth >= 270:
new_efficiency = "Very Good"
else:
if self.property.data["walls-energy-eff"] not in ["Good", "Very Good"]:
new_efficiency = "Good"
simulation_config = {
**roof_simulation_config,
"roof_thermal_transmittance_ending": new_u_value,
"roof_energy_eff_ending": new_efficiency
}
recommendations.append(
{
@ -304,9 +359,9 @@ class RoofRecommendations:
"new_u_value": new_u_value,
"sap_points": None,
"already_installed": already_installed,
"new_thickness": new_thickness,
"simulation_config": simulation_config,
"description_simulation": {
"roof-description": f"Pitched, {int(proposed_depth)}mm loft insulation",
"roof-description": new_description,
"roof-energy-eff": new_efficiency
},
**cost_result
@ -357,18 +412,27 @@ class RoofRecommendations:
:return:
"""
roof_roof_insulation_materials = [m for m in self.materials if m["type"] == "room_roof_insulation"]
if not roof_roof_insulation_materials:
raise ValueError("No room in roof insulation materials found")
# TODO: We temporarilty use costs from SCIS for RIR insulation. The costing was £180/m2 floor
roof_roof_insulation_materials = [
{
"type": "room_roof_insulation",
"description": "Insulating the ceiling of the roof roof and re-decorate",
"depths": [100],
"depth_unit": "mm",
"r_value_per_mm": 0.038,
"thermal_conductivity": 0.022,
"cost": [180],
}
]
if self.property.pitched_roof_area is None:
raise ValueError("pitched_roof_area not included as property attribute")
rir_non_invasive_recommendation = next(
(x for x in self.property.non_invasive_recommendations if x["type"] == "room_roof_insulation"), {}
)
lowest_selected_u_value = None
# lowest_selected_u_value = None
recommendations = []
for material in roof_roof_insulation_materials:
for depth, cost_per_unit in zip(material["depths"], material["cost"]):
part_u_value = r_value_per_mm_to_u_value(depth, material["r_value_per_mm"])
_, new_u_value = calculate_u_value_uplift(u_value, part_u_value)
@ -380,36 +444,69 @@ class RoofRecommendations:
# If I have a lowest U value and my new u value is lower than the lowest value, it's
# further into the diminishing returns threshold and can shouldn't be
if is_diminishing_returns(
recommendations, new_u_value, lowest_selected_u_value, self.DIMINISHING_RETURNS_U_VALUE
):
continue
# if is_diminishing_returns(
# recommendations, new_u_value, lowest_selected_u_value, self.DIMINISHING_RETURNS_U_VALUE
# ):
# continue
# We allow a small tolerance for error so we don't discount the recommendation entirely
if new_u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
lowest_selected_u_value = update_lowest_selected_u_value(lowest_selected_u_value, new_u_value)
# if new_u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
# lowest_selected_u_value = update_lowest_selected_u_value(lowest_selected_u_value, new_u_value)
estimated_cost = cost_per_unit * self.property.pitched_roof_area
estimated_cost = (
cost_per_unit * self.property.insulation_floor_area if
rir_non_invasive_recommendation.get("cost") is None else
rir_non_invasive_recommendation.get("cost")
)
recommendations.append(
{
"phase": phase,
"parts": [
get_recommended_part(
part=material,
selected_depth=depth,
quantity=self.property.pitched_roof_area,
quantity_unit=QuantityUnits.m2.value,
selected_total_cost=estimated_cost
)
],
"type": "room_roof_insulation",
"description": self.make_room_roof_insulation_description(material, depth),
"starting_u_value": u_value,
"new_u_value": new_u_value,
"sap_points": None,
"cost": estimated_cost,
}
)
sap_points = rir_non_invasive_recommendation.get("sap_points", None)
# Could also be Roof room(s), ceiling insulated
new_descriptin = "Pitched, insulated at rafters"
roof_ending_config = RoofAttributes(new_descriptin).process()
roof_simulation_config = check_simulation_difference(
new_config=roof_ending_config, old_config=self.property.roof, prefix="roof_"
)
if self.property.data["roof-energy-eff"] in ["Very Poor", "Poor"]:
new_efficiency = "Average"
else:
new_efficiency = self.property.data["roof-energy-eff"]
simulation_config = {
**roof_simulation_config,
"roof_thermal_transmittance_ending": new_u_value,
"roof_energy_eff_ending": new_efficiency
}
already_installed = "flat_roof_insulation" in self.property.already_installed
cost_result = {
"total": estimated_cost,
"labour_hours": 80,
"labour_days": 5,
}
if already_installed:
cost_result = override_costs(cost_result)
recommendations.append(
{
"phase": phase,
"parts": [
# TODO
],
"type": "room_roof_insulation",
"description": "Insulate room in roof at rafters and re-decorate",
"starting_u_value": u_value,
"new_u_value": None,
"sap_points": sap_points,
"simulation_config": simulation_config,
"description_simulation": {
"roof-description": new_descriptin,
"roof-energy-eff": new_efficiency
},
**cost_result,
"already_installed": already_installed,
"survey": rir_non_invasive_recommendation.get("survey", None)
}
)
self.recommendations = recommendations

View file

@ -60,6 +60,9 @@ class SecondaryHeating:
**costs,
"simulation_config": {
"secondheat_description_ending": "None"
},
"description_simulation": {
"secondheat-description": "None"
}
}
)

View file

@ -1,6 +1,8 @@
import numpy as np
import pandas as pd
from recommendations.Costs import Costs
from recommendations.recommendation_utils import override_costs
from recommendations.recommendation_utils import override_costs, esimtate_pitched_roof_area
class SolarPvRecommendations:
@ -78,23 +80,6 @@ class SolarPvRecommendations:
}
]
def is_solar_pv_valid(self):
# If the property is a flat but we are looking at building solar potential, we can include this
if (self.property.building_id is not None) and (self.property.solar_panel_configuration is not None):
return True
is_valid_property_type = self.property.data["property-type"] in ["House", "Bungalow", "Maisonette"]
is_valid_roof_type = (
self.property.roof["is_flat"] or self.property.roof["is_pitched"] or self.property.roof["is_roof_room"]
)
# If there is no existing solar PV, the photo-supply field will be None or a missing value
has_no_existing_solar_pv = self.property.data["photo-supply"] in [
None, 0, self.property.DATA_ANOMALY_MATCHES
]
return is_valid_property_type and is_valid_roof_type and has_no_existing_solar_pv
def recommend_building_analysis(self, phase):
"""
This recommendation approach handles the case of producing solar PV recommendations at the building level,
@ -114,10 +99,14 @@ class SolarPvRecommendations:
best_configurations = panel_performance.head(1).reset_index(drop=True)
for rank, recommendation_config in best_configurations.iterrows():
roof_coverage_percent = round(recommendation_config["panneled_roof_area"] / total_roof_area * 100)
# If we dont have the panneled_roof_area in the recommendation_config we calculate it
if recommendation_config.get("panneled_roof_area", None):
roof_coverage_percent = round(recommendation_config["panneled_roof_area"] / total_roof_area * 100)
else:
raise Exception("IMPLEMENT ME")
# Spread the cost to the individual units - adding a 20% contingency
total_cost = recommendation_config["total_cost"] / n_units
kw = np.floor(recommendation_config["array_warrage"] / 100) / 10
kw = np.floor(recommendation_config["array_wattage"] / 100) / 10
# Default to a weeks work for a team of 3 people doing 8 hour days
labour_days = 5
labour_hours = 3 * 8 * labour_days
@ -159,7 +148,7 @@ class SolarPvRecommendations:
:return:
"""
if not self.is_solar_pv_valid():
if not self.property.is_solar_pv_valid():
return
# If we have a buiilding level analysis, we implement separate logic
@ -167,84 +156,77 @@ class SolarPvRecommendations:
self.recommend_building_analysis(phase)
return
solar_pv_percentage = self.property.solar_pv_percentage
# We round up to the neaest 10%
solar_pv_percentage = np.ceil(solar_pv_percentage * 10) / 10
non_invasive_recommendation = next(
(r for r in self.property.non_invasive_recommendations if r["type"] == "solar_pv"), {"suitable": True}
)
# For the solar recommendations, we produce the following scenarios:
# 1) Solar panels only, we present a high, medium and low coverage
# 2) With and without battery
roof_coverage_scenarios = [
solar_pv_percentage - 0.1, solar_pv_percentage,
]
if solar_pv_percentage <= 0.4:
roof_coverage_scenarios.append(solar_pv_percentage + 0.1)
# We make sure we haven't gone too low or high - we allow no more than 60% coverage
roof_coverage_scenarios = [v for v in roof_coverage_scenarios if 0 <= v <= 0.6]
# If we only have two scenarios, we add a coverage scenario 10% less than the smallest
if len(roof_coverage_scenarios) == 2:
roof_coverage_scenarios.insert(0, roof_coverage_scenarios[0] - 0.1)
battery_scenarios = [False, True]
# We allow for the non-invasive recommendation to be that solar PV is not suitable
if not non_invasive_recommendation["suitable"]:
return
scenarios_with_wattage = []
for roof_coverage in roof_coverage_scenarios:
# We now have a property which is potentially suitable for solar PV
solar_pv_roof_area = self.property.get_solar_pv_roof_area(roof_coverage)
if non_invasive_recommendation.get("array_wattage") is not None:
number_solar_panels = np.floor(solar_pv_roof_area / self.SOLAR_PANEL_AREA)
solar_panel_wattage = number_solar_panels * self.SOLAR_PANEL_WATTAGE
if solar_panel_wattage < self.MIN_SYSTEM_WATTAGE:
continue
solar_panel_wattage = np.clip(
a=solar_panel_wattage, a_min=self.MIN_SYSTEM_WATTAGE, a_max=self.MAX_SYSTEM_WATTAGE
)
scenarios_with_wattage.append((roof_coverage, solar_panel_wattage))
# We trim the scenarios, so that we don't have duplicate wattages
scenarios_with_wattage = self.trim_solar_wattage_options(scenarios_with_wattage)
# Produce the cross product of the scenarios
scenarios = [
(roof, wattage, battery) for roof, wattage in scenarios_with_wattage for battery in battery_scenarios
]
# We deduce the wattage of the solar panels based on the roof coverage
for roof_coverage, solar_panel_wattage, has_battery in scenarios:
# We now have a property which is potentially suitable for solar PV
roof_coverage_percent = round(roof_coverage * 100)
# Given the wattage, we estimate the cost of the solar PV system. This is based on the MCS database
# of solar PV installations
cost_result = self.costs.solar_pv(wattage=solar_panel_wattage, has_battery=has_battery)
kw = np.floor(solar_panel_wattage / 100) / 10
if has_battery:
description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) panel system on "
f"{round(roof_coverage_percent)}% the roof, with a battery storage system.")
if self.property.roof["is_flat"]:
roof_area = self.property.insulation_floor_area
else:
description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) p"
f"anel system on {round(roof_coverage_percent)}% the roof.")
already_installed = "solar_pv" in self.property.already_installed
if already_installed:
cost_result = override_costs(cost_result)
self.recommendation.append(
{
"phase": phase,
"parts": [],
"type": "solar_pv",
"description": description,
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
"already_installed": already_installed,
**cost_result,
# This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we scale
# back up here
"photo_supply": 100 * roof_coverage,
"has_battery": has_battery,
"description_simulation": {"photo-supply": 100 * roof_coverage},
}
roof_area = esimtate_pitched_roof_area(
floor_area=self.property.insulation_floor_area, floor_height=self.property.data["floor-height"]
)
solar_configurations = pd.DataFrame(
[
{
"array_wattage": non_invasive_recommendation["array_wattage"],
"initial_ac_kwh_per_year": non_invasive_recommendation["initial_ac_kwh_per_year"],
"panneled_roof_area": non_invasive_recommendation["panneled_roof_area"]
}
]
)
else:
# TODO: There may be some instances where we don't want to use the solar API so we should cover for them
panel_performance = self.property.solar_panel_configuration["panel_performance"]
roof_area = self.property.roof_area
solar_configurations = panel_performance.head(3).reset_index(drop=True)
# We combine each of these configurations with estimates with and without a battery
for rank, recommendation_config in solar_configurations.iterrows():
roof_coverage_percent = round(recommendation_config["panneled_roof_area"] / roof_area * 100)
# We round up to the nearest 10
roof_coverage_percent = np.ceil(roof_coverage_percent / 10) * 10
for has_battery in [False, True]:
cost_result = self.costs.solar_pv(
wattage=recommendation_config["array_wattage"],
has_battery=has_battery,
array_cost=non_invasive_recommendation.get("cost", None)
)
kw = np.floor(recommendation_config["array_wattage"] / 100) / 10
if has_battery:
description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) panel system on "
f"{round(roof_coverage_percent)}% the roof, with a battery storage system.")
else:
description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) p"
f"anel system on {round(roof_coverage_percent)}% the roof.")
already_installed = "solar_pv" in self.property.already_installed
if already_installed:
cost_result = override_costs(cost_result)
self.recommendation.append(
{
"phase": phase,
"parts": [],
"type": "solar_pv",
"description": description,
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
"already_installed": already_installed,
**cost_result,
# This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we
# scale
# back up here
"photo_supply": roof_coverage_percent,
"has_battery": has_battery,
"initial_ac_kwh_per_year": recommendation_config["initial_ac_kwh_per_year"],
"description_simulation": {"photo-supply": roof_coverage_percent},
}
)

View file

@ -81,3 +81,44 @@ class VentilationRecommendations(Definitions):
"labour_days": labour_days # Assume 8 hour day
}
]
def recommend_trickle_vents(self):
"""
This is not something that we can identify completely non-invasively, however a recommendation which may come
about as a result of an energy assessment is the installation of trickle vents. This function handles that
"""
trickle_vents_recommendation_config = next(
(r for r in self.property.non_invasive_recommendations if r["type"] == "trickle_vents"), {}
)
if not trickle_vents_recommendation_config:
return
description = (
"Install trickle vents on your windows" if
not trickle_vents_recommendation_config.get("description")
else trickle_vents_recommendation_config["description"]
)
return [
{
"phase": None,
"parts": [],
"type": "trickle_vents",
"description": description,
"starting_u_value": None,
"new_u_value": None,
"already_installed": False,
"sap_points": 0,
"heat_demand": 0,
"kwh_savings": 0,
"co2_equivalent_savings": 0,
"energy_cost_savings": 0,
"total": trickle_vents_recommendation_config["cost"],
# We use a very simple and rough estimate of 4 hours per unit
"labour_hours": trickle_vents_recommendation_config.get("labour_hours", 8),
"labour_days": trickle_vents_recommendation_config.get("labour_days", 1), # Assume 8 hour day
"survey": True
}
]

View file

@ -5,6 +5,7 @@ import pandas as pd
from datatypes.enums import QuantityUnits
from backend.Property import Property
from backend.app.plan.schemas import MEASURE_MAP
from BaseUtility import Definitions
from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes
from recommendations.recommendation_utils import (
@ -13,6 +14,7 @@ from recommendations.recommendation_utils import (
)
from recommendations.config import PARTIALLY_FILLED_PERCENTAGE_ASSUMPTION
from recommendations.Costs import Costs
from recommendations.wall_energy_efficiency_values import cavity_wall_energy_eff, iwi_energy_eff, ewi_energy_eff
from utils.logger import setup_logger
logger = setup_logger()
@ -60,10 +62,13 @@ class WallRecommendations(Definitions):
"Cavity wall, as built, insulated": "Cavity wall, filled cavity and external insulation",
"Solid brick, as built, no insulation": "Solid brick, with external insulation",
"Solid brick, as built, insulated": "Solid brick, with external insulation",
"Solid brick, as built, partial insulation": "Solid brick, with external insulation",
"Cob, as built": "Cob, with external insulation",
"System built, as built, no insulation": "System built, with external insulation",
"Granite or whinstone, as built, no insulation": 'Granite or whinstone, with external insulation',
"Timber frame, as built, no insulation": "Timber frame, with external insulation",
'Timber frame, as built, partial insulation': 'Timber frame, with external insulation',
"Sandstone or limestone, as built, no insulation": "Sandstone or limestone, with external insulation",
}
# These are the ending descriptions we consider for walls with internal insulation
@ -71,10 +76,13 @@ class WallRecommendations(Definitions):
"Cavity wall, as built, insulated": "Cavity wall, filled cavity and internal insulation",
"Solid brick, as built, no insulation": "Solid brick, with internal insulation",
"Solid brick, as built, insulated": "Solid brick, with internal insulation",
"Solid brick, as built, partial insulation": "Solid brick, with internal insulation",
"Cob, as built": "Cob, with internal insulation",
"System built, as built, no insulation": "System built, with internal insulation",
"Granite or whinstone, as built, no insulation": 'Granite or whinstone, with internal insulation',
"Timber frame, as built, no insulation": "Timber frame, with internal insulation",
'Timber frame, as built, partial insulation': 'Timber frame, with internal insulation',
"Sandstone or limestone, as built, no insulation": "Sandstone or limestone, with internal insulation",
}
def __init__(
@ -155,7 +163,7 @@ class WallRecommendations(Definitions):
)
# Test filling cavity
self.find_cavity_insulation(u_value, insulation_thickness, phase)
self.find_cavity_insulation(u_value, insulation_thickness, phase, measures)
return self.recommendations
@ -183,11 +191,15 @@ class WallRecommendations(Definitions):
return ewi_recommendations
def recommend(self, phase=0):
def recommend(self, phase=0, measures=None):
# if building built after 1990 + we're able to identify U-value +
# U-value less than 0.18 and if in or close to a conversation area,
# recommend internal wall insulation as a possible measure
measures = MEASURE_MAP["wall_insulation"] if measures is None else measures
if not measures:
return
u_value = self.property.walls["thermal_transmittance"]
u_value = None if pd.isnull(u_value) else u_value
@ -200,7 +212,7 @@ class WallRecommendations(Definitions):
or self.property.walls["is_filled_cavity"]
) and (
"cavity_extract_and_refill"
not in self.property.non_invasive_recommendations
not in measures
):
return
@ -228,15 +240,15 @@ class WallRecommendations(Definitions):
and (u_value >= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE)
):
# Recommend insulation
self.find_insulation(u_value, phase)
self.find_insulation(u_value, phase, measures)
return
# We can't detect it's a cavity wall, but it was built after 1990 so likely built with insulation already
# + it already has a U-value better than the building regulations, so we don't need to recommend anything
if (
(not is_cavity_wall)
and (self.property.year_built >= self.YEAR_WALLS_BUILT_WITH_INSULATION)
and (u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE)
and ((self.property.year_built >= self.YEAR_WALLS_BUILT_WITH_INSULATION)
or (u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE))
):
# Recommend nothing
return
@ -252,22 +264,22 @@ class WallRecommendations(Definitions):
self.estimated_u_value = u_value
if is_cavity_wall or "cavity_extract_and_refill" in self.property.non_invasive_recommendations:
if (is_cavity_wall and "cavity_wall_insulation" in measures) or "cavity_extract_and_refill" in measures:
if u_value >= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
# Test filling cavity
self.find_cavity_insulation(u_value, insulation_thickness, phase)
self.find_cavity_insulation(u_value, insulation_thickness, phase, measures)
return
# Remaining wall types are treated with IWI or EWI
if (u_value >= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE) and self.is_suitable_for_solid_insulation():
self.find_insulation(u_value, phase)
self.find_insulation(u_value, phase, measures=measures)
return
# If the u-value is within regulations, we don't do anything
return
def find_cavity_insulation(self, u_value, insulation_thickness, phase):
def find_cavity_insulation(self, u_value, insulation_thickness, phase, measures):
"""
This method tests different materials to fill the cavity wall, determining which
material will give us the best U-value.
@ -287,6 +299,8 @@ class WallRecommendations(Definitions):
:param u_value: u_value of the starting wall
:param insulation_thickness: describes the insulation level of the wall. If "below average", we have a partially
filled cavity wall
:param phase: The phase of the recommendation
:param measures: The measures we're considering
"""
insulation_materials = pd.DataFrame(self.cavity_wall_insulation_materials)
@ -321,7 +335,7 @@ class WallRecommendations(Definitions):
is_extraction_and_refill = (
"cavity_extract_and_refill"
in self.property.non_invasive_recommendations
in measures
)
cost_result = self.costs.cavity_wall_insulation(
@ -404,11 +418,28 @@ class WallRecommendations(Definitions):
simulation_config = {}
if self.property.data["walls-energy-eff"] not in ["Good", "Very Good"]:
if wall_ending_config["is_cavity_wall"]:
efficiency_data = [
x for x in cavity_wall_energy_eff if
x["construction-age-band"] == self.property.construction_age_band
][0]
elif wall_ending_config["internal_insulation"]:
efficiency_data = [
x for x in iwi_energy_eff if
x["construction-age-band"] == self.property.construction_age_band
][0]
else:
efficiency_data = [
x for x in ewi_energy_eff if
x["construction-age-band"] == self.property.construction_age_band
][0]
simulation_config = {
"walls_energy_eff_ending": "Good"
"walls_energy_eff_ending": efficiency_data["walls-energy-eff"]
}
# We check if we have double insulation in any instances
# TODO: We should pull the energy efficiency categories on double insulation instances, though it's quite rate
double_insulation = (
(wall_ending_config["is_filled_cavity"] and wall_ending_config["external_insulation"]) or
(wall_ending_config["is_filled_cavity"] and wall_ending_config["internal_insulation"]) or
@ -423,6 +454,16 @@ class WallRecommendations(Definitions):
lowest_selected_u_value = None
recommendations = []
iwi_non_invasive_recommendations = next(
(r for r in self.property.non_invasive_recommendations if r["type"] == "internal_wall_insulation"), {}
)
ewi_non_invasive_recommendations = next(
(r for r in self.property.non_invasive_recommendations if r["type"] == "external_wall_insulation"), {}
)
if ewi_non_invasive_recommendations:
raise NotImplementedError("Implement ewi non-invasive recommendations")
for _, insulation_material_group in insulation_materials.groupby("description"):
for _, material in insulation_material_group.iterrows():
@ -455,6 +496,15 @@ class WallRecommendations(Definitions):
)
if material["type"] == "internal_wall_insulation":
if iwi_non_invasive_recommendations.get("cost") is not None:
raise NotImplementedError(
"Not handled passing costs from non-invasive recommendations for iwi"
)
sap_points = iwi_non_invasive_recommendations.get("sap_points", None)
survey = iwi_non_invasive_recommendations.get("survey", False)
cost_result = self.costs.internal_wall_insulation(
wall_area=self.property.insulation_wall_area,
material=material.to_dict(),
@ -472,6 +522,10 @@ class WallRecommendations(Definitions):
)
elif material["type"] == "external_wall_insulation":
sap_points = ewi_non_invasive_recommendations.get("sap_points", None)
survey = ewi_non_invasive_recommendations.get("survey", False)
cost_result = self.costs.external_wall_insulation(
wall_area=self.property.insulation_wall_area,
material=material.to_dict(),
@ -522,19 +576,20 @@ class WallRecommendations(Definitions):
"starting_u_value": u_value,
"new_u_value": new_u_value,
"already_installed": already_installed,
"sap_points": None,
"sap_points": sap_points,
"simulation_config": simulation_config,
"description_simulation": {
"walls-description": new_description,
"walls-energy-eff": simulation_config["walls_energy_eff_ending"]
},
**cost_result
**cost_result,
"survey": survey
}
)
return recommendations
def find_insulation(self, u_value, phase):
def find_insulation(self, u_value, phase, measures):
"""
This function contains the logic for finding potential insulation measures for a property, depending
on the parts available and whether the property can have external wall insulation installed
@ -547,7 +602,7 @@ class WallRecommendations(Definitions):
# consider diminishing returns between the two as they are considered to be separate measures
ewi_recommendations = []
if self.ewi_valid():
if self.ewi_valid() and "external_wall_insulation" in measures:
ewi_recommendations = self._find_insulation(
u_value=u_value,
insulation_materials=pd.DataFrame(
@ -557,12 +612,14 @@ class WallRecommendations(Definitions):
phase=phase,
)
iwi_recommendations = self._find_insulation(
u_value=u_value,
insulation_materials=pd.DataFrame(self.internal_wall_insulation_materials),
non_insulation_materials=self.internal_wall_non_insulation_materials,
phase=phase,
)
iwi_recommendations = []
if "internal_wall_insulation" in measures:
iwi_recommendations = self._find_insulation(
u_value=u_value,
insulation_materials=pd.DataFrame(self.internal_wall_insulation_materials),
non_insulation_materials=self.internal_wall_non_insulation_materials,
phase=phase,
)
self.recommendations += ewi_recommendations + iwi_recommendations

View file

@ -3,8 +3,9 @@ from typing import List
import numpy as np
from backend.Property import Property
from etl.epc_clean.epc_attributes.WindowAttributes import WindowAttributes
from recommendations.Costs import Costs
from recommendations.recommendation_utils import override_costs
from recommendations.recommendation_utils import override_costs, check_simulation_difference
class WindowsRecommendations:
@ -48,6 +49,7 @@ class WindowsRecommendations:
is_secondary_glazing = self.property.restricted_measures or (
self.property.windows["glazing_type"] == "secondary"
)
windows_area = self.property.windows_area
if not number_of_windows:
raise ValueError("Number of windows not specified")
@ -57,6 +59,9 @@ class WindowsRecommendations:
):
return
if windows_area is not None:
raise Exception("We have windows area, we should use this data for our recommendations!!!")
# We scale the number of windows based on the proportion of existing glazing
if self.property.data["multi-glaze-proportion"] != "":
n_windows_scalar = 1 - (
@ -124,3 +129,64 @@ class WindowsRecommendations:
}
}
]
def recommend_mixed_glazing(self, phase):
"""
This function will recommend mixed glazing to the property. This is a more specific recommendation than
the general windows recommendation, but is almost certain to arise from a survey
:return:
"""
mixed_glazing_recommendation_config = next(
(r for r in self.property.non_invasive_recommendations if r["type"] == "mixed_glazing"), {}
)
if not mixed_glazing_recommendation_config:
return
description = (
"Install a combination of secondary and double glazing to single glazed windows" if
not mixed_glazing_recommendation_config.get("description")
else mixed_glazing_recommendation_config["description"]
)
windows_ending_config = WindowAttributes("Full secondary glazing").process()
windows_simulation_config = check_simulation_difference(
new_config=windows_ending_config, old_config=self.property.windows, prefix="windows_"
)
windows_simulation_config = {
**windows_simulation_config,
"windows_energy_eff_ending": "Average",
"glazed_type_ending": "secondary glazing",
"multi_glaze_proportion_ending": 100,
}
return [
{
"phase": phase,
"parts": [],
"type": "mixed_glazing",
"description": description,
"starting_u_value": None,
"new_u_value": None,
"already_installed": False,
"sap_points": mixed_glazing_recommendation_config["sap_points"],
"heat_demand": None, # We will predict this
"kwh_savings": None, # We will predict this
"co2_equivalent_savings": None, # We will predict this
"energy_cost_savings": None, # We will predict this
"total": mixed_glazing_recommendation_config["cost"],
# We use a very simple and rough estimate of 4 hours per unit
"labour_hours": mixed_glazing_recommendation_config.get("labour_hours", 8),
"labour_days": mixed_glazing_recommendation_config.get("labour_days", 1), # Assume 8 hour day
"survey": mixed_glazing_recommendation_config["survey"],
"simulation_config": windows_simulation_config,
"description_simulation": {
"multi-glaze-proportion": 100,
"windows-energy-eff": "Average",
"windows-description": "Multiple glazing throughout",
"glazed-type": "secondary glazing",
},
}
]

View file

@ -161,6 +161,9 @@ county_to_region_map = {
# Additional mappings requried, based on what we find in the EPC database
'Greater London Authority': 'Inner London',
'Herefordshire, County of': 'West Midlands',
"North Northamptonshire": 'East Midlands',
"West Northamptonshire": 'East Midlands',
# We have a bunch of inner London local authority mappings, which can be used if the county is not found
'Barking and Dagenham': 'Inner London', 'Barnet': 'Inner London', 'Bexley': 'Inner London',
'Brent': 'Inner London', 'Bromley': 'Inner London', 'Camden': 'Inner London', 'City of London': 'Inner London',

View file

@ -9,7 +9,7 @@ def prepare_input_measures(property_recommendations, goal):
"""
goal_map = {
"Increase EPC": "sap_points"
"Increasing EPC": "sap_points"
}
goal_key = goal_map[goal]

View file

@ -514,8 +514,8 @@ FLOOR_LEVEL_MAP = {
"top floor": 5,
"20+": 20,
"21st or above": 21,
**{str(i).zfill(2): i for i in range(0, 21)},
**{ordinal(i): i for i in range(-1, 21)},
**{str(i): i for i in range(-1, 21)},
**{i: i for i in range(-1, 21)},
**{str(i).zfill(2): i for i in range(0, 51)},
**{ordinal(i): i for i in range(-1, 51)},
**{str(i): i for i in range(-1, 51)},
**{i: i for i in range(-1, 51)},
}

View file

@ -205,10 +205,22 @@ def get_wall_u_value(
return float(mapped_value)
def get_u_value_from_s9(thickness, s9, is_loft, is_roof_room, is_thatched):
def get_u_value_from_s9(thickness, s9, is_loft, is_roof_room, is_thatched, is_at_rafters):
"""Get the U-value from table S9 based on the insulation thickness."""
# If the roof as pitched & insulated at the rafters, it's a room roof
if is_roof_room or is_at_rafters:
# We re-map the thickness
thickness_map = {
"below average": "50",
"average": "100",
"above average": "270",
"none": "0",
}
thickness = thickness_map[thickness]
if thickness in ["below average", "average", "above average", "none", None] or (
not is_loft and not is_roof_room
not is_loft and not is_roof_room and not is_at_rafters
):
return None
elif thickness.endswith("+"):
@ -280,6 +292,7 @@ def get_roof_u_value(
is_loft=is_loft,
is_roof_room=is_roof_room,
is_thatched=is_thatched,
is_at_rafters=is_at_rafters
)
if u_value is not None:
@ -676,7 +689,7 @@ def estimate_windows(
property_type, built_form, construction_age_band, floor_area, number_habitable_rooms
):
# If there is an extension, that will boost the number of habitable rooms
# Base window count based on habitable rooms
window_count = number_habitable_rooms

View file

@ -0,0 +1,391 @@
testing_examples = [
{
"epc": {
'lmk-key': '948324269042014090409224502942098', 'address1': '15, Ringwood Crescent', 'address2': None,
'address3': None, 'postcode': 'TS19 9DN', 'building-reference-number': 1016769078,
'current-energy-rating': 'C', 'potential-energy-rating': 'B', 'current-energy-efficiency': 79,
'potential-energy-efficiency': 85, 'property-type': 'House', 'built-form': 'Semi-Detached',
'inspection-date': '2014-08-21', 'local-authority': 'E06000004', 'constituency': 'E14000970',
'county': None,
'lodgement-date': '2014-09-04', 'transaction-type': 'none of the above', 'environment-impact-current': 77,
'environment-impact-potential': 85, 'energy-consumption-current': 152,
'energy-consumption-potential': 103.0, 'co2-emissions-current': 2.2, 'co2-emiss-curr-per-floor-area': 30,
'co2-emissions-potential': 1.5, 'lighting-cost-current': 61.0, 'lighting-cost-potential': 47.0,
'heating-cost-current': 625.0, 'heating-cost-potential': 522.0, 'hot-water-cost-current': 100.0,
'hot-water-cost-potential': 71.0, 'total-floor-area': 74.0, 'energy-tariff': 'Single',
'mains-gas-flag': 'Y', 'floor-level': 'NODATA!', 'flat-top-storey': None, 'flat-storey-count': None,
'main-heating-controls': 2106.0, 'multi-glaze-proportion': 100.0,
'glazed-type': 'double glazing installed before 2002', 'glazed-area': 'Normal', 'extension-count': 0.0,
'number-habitable-rooms': 3.0, 'number-heated-rooms': 3.0, 'low-energy-lighting': 70.0,
'number-open-fireplaces': 0.0, 'hotwater-description': 'From main system', 'hot-water-energy-eff': 'Good',
'hot-water-env-eff': 'Good', 'floor-description': 'Solid, no insulation (assumed)',
'floor-energy-eff': None,
'floor-env-eff': None, 'windows-description': 'Fully double glazed', 'windows-energy-eff': 'Average',
'windows-env-eff': 'Average', 'walls-description': 'Cavity wall, filled cavity', 'walls-energy-eff': 'Good',
'walls-env-eff': 'Good', 'secondheat-description': 'Room heaters, mains gas', 'sheating-energy-eff': None,
'sheating-env-eff': None, 'roof-description': 'Pitched, 50 mm loft insulation', 'roof-energy-eff': 'Poor',
'roof-env-eff': 'Poor', 'mainheat-description': 'Boiler and radiators, mains gas',
'mainheat-energy-eff': 'Good', 'mainheat-env-eff': 'Good',
'mainheatcont-description': 'Programmer, room thermostat and TRVs', 'mainheatc-energy-eff': 'Good',
'mainheatc-env-eff': 'Good', 'lighting-description': 'Low energy lighting in 70% of fixed outlets',
'lighting-energy-eff': 'Very Good', 'lighting-env-eff': 'Very Good',
'main-fuel': 'mains gas (not community)', 'wind-turbine-count': 0.0, 'heat-loss-corridor': 'NO DATA!',
'unheated-corridor-length': None, 'floor-height': 2.5, 'photo-supply': 50.0,
'solar-water-heating-flag': None,
'mechanical-ventilation': 'natural', 'address': '15, Ringwood Crescent',
'local-authority-label': 'Stockton-on-Tees', 'constituency-label': 'Stockton North',
'posttown': 'STOCKTON-ON-TEES', 'construction-age-band': 'England and Wales: 1950-1966',
'lodgement-datetime': '2014-09-04 09:22:45', 'tenure': 'owner-occupied',
'fixed-lighting-outlets-count': 10.0, 'low-energy-fixed-light-count': 7.0, 'uprn': 100110195416.0,
'uprn-source': 'Address Matched'
},
"heating_recommendation_descriptions": [
"Install an air source heat pump, and upgrade heating controls to Smart Thermostats, room sensors and "
"smart radiator valves (time & temperature zone control). The cost includes the £7500 boiler upgrade "
"scheme grant",
],
"heating_controls_recommendation_descriptions": [
"Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & "
"temperature zone control)"
],
"notes": "This property has a boiler, radiators & mains gas with good efficiency so the only recommendation"
"we expect here is for an air source heat pump. The heating controls are a programmer, room thermostat"
"and TRVs and so we should expect a TTZC recommendation"
},
{
"epc": {
'lmk-key': '153995620832008100717310934068296', 'address1': 'Apartment 13 The Quays',
'address2': 'Burscough', 'address3': None, 'postcode': 'L40 5TW',
'building-reference-number': 2604281568, 'current-energy-rating': 'C', 'potential-energy-rating': 'B',
'current-energy-efficiency': 69, 'potential-energy-efficiency': 84, 'property-type': 'Flat',
'built-form': 'Detached', 'inspection-date': '2008-10-06', 'local-authority': 'E07000127',
'constituency': 'E14001033', 'county': 'Lancashire', 'lodgement-date': '2008-10-07',
'transaction-type': 'marketed sale', 'environment-impact-current': 78,
'environment-impact-potential': 78, 'energy-consumption-current': 195,
'energy-consumption-potential': 192.0, 'co2-emissions-current': 1.7,
'co2-emiss-curr-per-floor-area': 29, 'co2-emissions-potential': 1.7, 'lighting-cost-current': 35,
'lighting-cost-potential': 38, 'heating-cost-current': 108, 'heating-cost-potential': 89,
'hot-water-cost-current': 256, 'hot-water-cost-potential': 104, 'total-floor-area': 57.2,
'energy-tariff': 'Single', 'mains-gas-flag': 'N', 'floor-level': '1st', 'flat-top-storey': 'Y',
'flat-storey-count': 2.0, 'main-heating-controls': 2603.0, 'multi-glaze-proportion': 100.0,
'glazed-type': 'double glazing installed during or after 2002', 'glazed-area': 'Normal',
'extension-count': 0.0, 'number-habitable-rooms': 3.0, 'number-heated-rooms': 3.0,
'low-energy-lighting': 77.0, 'number-open-fireplaces': 0.0,
'hotwater-description': 'Electric immersion, standard tariff', 'hot-water-energy-eff': 'Very Poor',
'hot-water-env-eff': 'Poor', 'floor-description': '(other premises below)', 'floor-energy-eff': None,
'floor-env-eff': None, 'windows-description': 'Fully double glazed', 'windows-energy-eff': 'Good',
'windows-env-eff': 'Good', 'walls-description': 'Cavity wall, as built, insulated (assumed)',
'walls-energy-eff': 'Good', 'walls-env-eff': 'Good',
'secondheat-description': 'Portable electric heaters', 'sheating-energy-eff': None,
'sheating-env-eff': None, 'roof-description': '(another dwelling above)', 'roof-energy-eff': None,
'roof-env-eff': None, 'mainheat-description': 'Room heaters, electric',
'mainheat-energy-eff': 'Very Poor', 'mainheat-env-eff': 'Poor',
'mainheatcont-description': 'Programmer and appliance thermostats', 'mainheatc-energy-eff': 'Good',
'mainheatc-env-eff': 'Good', 'lighting-description': 'Low energy lighting in 77% of fixed outlets',
'lighting-energy-eff': 'Very Good', 'lighting-env-eff': 'Very Good',
'main-fuel': 'electricity - this is for backwards compatibility only and should not be used',
'wind-turbine-count': 0.0, 'heat-loss-corridor': 'heated corridor', 'unheated-corridor-length': None,
'floor-height': 2.3, 'photo-supply': 0.0, 'solar-water-heating-flag': 'N',
'mechanical-ventilation': 'natural', 'address': 'Apartment 13 The Quays, Burscough',
'local-authority-label': 'West Lancashire', 'constituency-label': 'West Lancashire',
'posttown': 'ORMSKIRK', 'construction-age-band': 'England and Wales: 2003-2006',
'lodgement-datetime': '2008-10-07 17:31:09', 'tenure': 'owner-occupied',
'fixed-lighting-outlets-count': None, 'low-energy-fixed-light-count': None, 'uprn': 10012342725.0,
'uprn-source': 'Address Matched',
},
"heating_recommendation_descriptions": [
"Install high heat retention electric storage heaters and upgrade heating controls to High Heat Retention "
"Storage Heater Controls"
],
"heating_controls_recommendation_descriptions": [],
"notes": "This property has electric room heaters and is off gas so a boiler recommendation is not appropriate."
"We would expect a high heat retention storage recommendation. The property is a flat and therefore"
"we don't expect an air source heat pump recommendation. We also wouldn't expect a specific heating"
"control recommendation here"
},
{
"epc": {
'lmk-key': '751851300152012022010205497220090', 'address1': '21, Fullers Close', 'address2': 'Kelvedon',
'address3': None, 'postcode': 'CO5 9JX', 'building-reference-number': 8075968, 'current-energy-rating': 'D',
'potential-energy-rating': 'D', 'current-energy-efficiency': 55, 'potential-energy-efficiency': 56,
'property-type': 'Bungalow', 'built-form': 'Detached', 'inspection-date': '2012-02-20',
'local-authority': 'E07000067', 'constituency': 'E14001045', 'county': 'Essex',
'lodgement-date': '2012-02-20',
'transaction-type': 'non marketed sale', 'environment-impact-current': 39,
'environment-impact-potential': 39,
'energy-consumption-current': 475, 'energy-consumption-potential': 472.0, 'co2-emissions-current': 5.4,
'co2-emiss-curr-per-floor-area': 84, 'co2-emissions-potential': 5.4, 'lighting-cost-current': 53.0,
'lighting-cost-potential': 40.0, 'heating-cost-current': 674.0, 'heating-cost-potential': 678.0,
'hot-water-cost-current': 110.0, 'hot-water-cost-potential': 110.0, 'total-floor-area': 64.45,
'energy-tariff': 'dual', 'mains-gas-flag': 'N', 'floor-level': 'NODATA!', 'flat-top-storey': None,
'flat-storey-count': None, 'main-heating-controls': '2402', 'multi-glaze-proportion': 100.0,
'glazed-type': 'double glazing installed before 2002', 'glazed-area': 'Normal', 'extension-count': 0.0,
'number-habitable-rooms': 3.0, 'number-heated-rooms': 3.0, 'low-energy-lighting': 67.0,
'number-open-fireplaces': 0.0, 'hotwater-description': 'Electric immersion, off-peak',
'hot-water-energy-eff': 'Average', 'hot-water-env-eff': 'Very Poor',
'floor-description': 'Suspended, no insulation (assumed)', 'floor-energy-eff': None, 'floor-env-eff': None,
'windows-description': 'Fully double glazed', 'windows-energy-eff': 'Average', 'windows-env-eff': 'Average',
'walls-description': 'Cavity wall, as built, insulated (assumed)', 'walls-energy-eff': 'Good',
'walls-env-eff': 'Good', 'secondheat-description': 'Room heaters, electric', 'sheating-energy-eff': None,
'sheating-env-eff': None, 'roof-description': 'Pitched, 300+ mm loft insulation',
'roof-energy-eff': 'Very Good',
'roof-env-eff': 'Very Good', 'mainheat-description': 'Electric storage heaters',
'mainheat-energy-eff': 'Poor',
'mainheat-env-eff': 'Very Poor', 'mainheatcont-description': 'Automatic charge control',
'mainheatc-energy-eff': 'Average', 'mainheatc-env-eff': 'Average',
'lighting-description': 'Low energy lighting in 67% of fixed outlets', 'lighting-energy-eff': 'Good',
'lighting-env-eff': 'Good', 'main-fuel': 'electricity (not community)', 'wind-turbine-count': 0.0,
'heat-loss-corridor': 'NO DATA!', 'unheated-corridor-length': None, 'floor-height': 2.38,
'photo-supply': 0.0,
'solar-water-heating-flag': None, 'mechanical-ventilation': 'natural',
'address': '21, Fullers Close, Kelvedon',
'local-authority-label': 'Braintree', 'constituency-label': 'Witham', 'posttown': 'COLCHESTER',
'construction-age-band': 'England and Wales: 1983-1990', 'lodgement-datetime': '2012-02-20 10:20:54',
'tenure': 'owner-occupied', 'fixed-lighting-outlets-count': 6.0, 'low-energy-fixed-light-count': 4.0,
'uprn': 100090311351.0, 'uprn-source': 'Address Matched', 'property-type_y': None, 'built-form_y': None,
},
"heating_recommendation_descriptions": [],
"heating_controls_recommendation_descriptions": [],
"notes": "This test has electric storage heaters with automatic charge control - this case should be researched"
"and checked that a high heat retention storage recommendation is actually sensible. If it's not, "
"we should adjust accordingly or perhaps have just a control recommendation"
},
{
"epc": {
'lmk-key': '1356416458532015082116515621278108', 'address1': '19a, St. Stephens Road', 'address2': None,
'address3': None, 'postcode': 'TW3 2BH', 'building-reference-number': 5821158378,
'current-energy-rating': 'E', 'potential-energy-rating': 'C', 'current-energy-efficiency': 54,
'potential-energy-efficiency': 76, 'property-type': 'Maisonette', 'built-form': 'Semi-Detached',
'inspection-date': '2015-08-21', 'local-authority': 'E09000018', 'constituency': 'E14000593',
'county': 'Greater London Authority', 'lodgement-date': '2015-08-21', 'transaction-type': 'marketed sale',
'environment-impact-current': 48, 'environment-impact-potential': 78, 'energy-consumption-current': 383,
'energy-consumption-potential': 155, 'co2-emissions-current': 3.4, 'co2-emiss-curr-per-floor-area': 68,
'co2-emissions-potential': 1.4, 'lighting-cost-current': 52, 'lighting-cost-potential': 34,
'heating-cost-current': 560, 'heating-cost-potential': 255, 'hot-water-cost-current': 166,
'hot-water-cost-potential': 102, 'total-floor-area': 51.0, 'energy-tariff': 'Single', 'mains-gas-flag': 'Y',
'floor-level': '1st', 'flat-top-storey': 'Y', 'flat-storey-count': None, 'main-heating-controls': '2104',
'multi-glaze-proportion': 100.0, 'glazed-type': 'double glazing, unknown install date',
'glazed-area': 'Normal', 'extension-count': 0.0, 'number-habitable-rooms': 3.0, 'number-heated-rooms': 3.0,
'low-energy-lighting': 50.0, 'number-open-fireplaces': 0.0, 'hotwater-description': 'From main system',
'hot-water-energy-eff': 'Average', 'hot-water-env-eff': 'Average',
'floor-description': '(another dwelling below)', 'floor-energy-eff': 'NO DATA!', 'floor-env-eff': None,
'windows-description': 'Fully double glazed', 'windows-energy-eff': 'Average', 'windows-env-eff': 'Average',
'walls-description': 'Solid brick, as built, no insulation (assumed)', 'walls-energy-eff': 'Very Poor',
'walls-env-eff': 'Very Poor', 'secondheat-description': 'Room heaters, mains gas',
'sheating-energy-eff': None, 'sheating-env-eff': None,
'roof-description': 'Pitched, 100 mm loft insulation',
'roof-energy-eff': 'Average', 'roof-env-eff': 'Average',
'mainheat-description': 'Boiler and radiators, mains gas', 'mainheat-energy-eff': 'Good',
'mainheat-env-eff': 'Good', 'mainheatcont-description': 'Programmer and room thermostat',
'mainheatc-energy-eff': 'Average', 'mainheatc-env-eff': 'Average',
'lighting-description': 'Low energy lighting in 50% of fixed outlets', 'lighting-energy-eff': 'Good',
'lighting-env-eff': 'Good', 'main-fuel': 'mains gas (not community)', 'wind-turbine-count': 0.0,
'heat-loss-corridor': 'no corridor', 'unheated-corridor-length': None, 'floor-height': 2.5,
'photo-supply': None, 'solar-water-heating-flag': 'N', 'mechanical-ventilation': 'natural',
'address': '19a, St. Stephens Road', 'local-authority-label': 'Hounslow',
'constituency-label': 'Brentford and Isleworth', 'posttown': 'HOUNSLOW',
'construction-age-band': 'England and Wales: 1930-1949', 'lodgement-datetime': '2015-08-21 16:51:56',
'tenure': 'owner-occupied', 'fixed-lighting-outlets-count': None, 'low-energy-fixed-light-count': None,
'uprn': 100021560521.0, 'uprn-source': 'Address Matched',
},
"heating_recommendation_descriptions": [],
"heating_controls_recommendation_descriptions": [],
"notes": ""
},
{
"epc": {
'lmk-key': '1164410099442014062611405027442168', 'address1': '31, Brightside Road', 'address2': None,
'address3': None, 'postcode': 'SE13 6EP', 'building-reference-number': 5481394278,
'current-energy-rating': 'E', 'potential-energy-rating': 'C', 'current-energy-efficiency': 48,
'potential-energy-efficiency': 79, 'property-type': 'House', 'built-form': 'Mid-Terrace',
'inspection-date': '2014-06-26', 'local-authority': 'E09000023', 'constituency': 'E14000789',
'county': 'Greater London Authority', 'lodgement-date': '2014-06-26',
'transaction-type': 'assessment for green deal', 'environment-impact-current': 44,
'environment-impact-potential': 77, 'energy-consumption-current': 334,
'energy-consumption-potential': 121.0, 'co2-emissions-current': 5.1, 'co2-emiss-curr-per-floor-area': 64,
'co2-emissions-potential': 1.9, 'lighting-cost-current': 70.0, 'lighting-cost-potential': 49.0,
'heating-cost-current': 964.0, 'heating-cost-potential': 571.0, 'hot-water-cost-current': 107.0,
'hot-water-cost-potential': 72.0, 'total-floor-area': 80.0, 'energy-tariff': 'Single',
'mains-gas-flag': 'Y', 'floor-level': 'NODATA!', 'flat-top-storey': None, 'flat-storey-count': None,
'main-heating-controls': '2102', 'multi-glaze-proportion': 100.0,
'glazed-type': 'double glazing installed before 2002', 'glazed-area': 'Normal', 'extension-count': 1.0,
'number-habitable-rooms': 3.0, 'number-heated-rooms': 3.0, 'low-energy-lighting': 56.0,
'number-open-fireplaces': 0.0, 'hotwater-description': 'From main system', 'hot-water-energy-eff': 'Good',
'hot-water-env-eff': 'Good', 'floor-description': 'Suspended, no insulation (assumed)',
'floor-energy-eff': None, 'floor-env-eff': None, 'windows-description': 'Fully double glazed',
'windows-energy-eff': 'Average', 'windows-env-eff': 'Average',
'walls-description': 'Solid brick, as built, no insulation (assumed)', 'walls-energy-eff': 'Very Poor',
'walls-env-eff': 'Very Poor', 'secondheat-description': 'Room heaters, mains gas',
'sheating-energy-eff': None, 'sheating-env-eff': None,
'roof-description': 'Pitched, no insulation (assumed)',
'roof-energy-eff': 'Very Poor', 'roof-env-eff': 'Very Poor',
'mainheat-description': 'Boiler and radiators, mains gas', 'mainheat-energy-eff': 'Good',
'mainheat-env-eff': 'Good', 'mainheatcont-description': 'Programmer, no room thermostat',
'mainheatc-energy-eff': 'Very Poor', 'mainheatc-env-eff': 'Very Poor',
'lighting-description': 'Low energy lighting in 56% of fixed outlets', 'lighting-energy-eff': 'Good',
'lighting-env-eff': 'Good', 'main-fuel': 'mains gas (not community)', 'wind-turbine-count': 0.0,
'heat-loss-corridor': 'NO DATA!', 'unheated-corridor-length': None, 'floor-height': 2.5,
'photo-supply': 0.0,
'solar-water-heating-flag': None, 'mechanical-ventilation': 'natural', 'address': '31, Brightside Road',
'local-authority-label': 'Lewisham', 'constituency-label': 'Lewisham, Deptford', 'posttown': 'LONDON',
'construction-age-band': 'England and Wales: before 1900', 'lodgement-datetime': '2014-06-26 11:40:50',
'tenure': 'owner-occupied', 'fixed-lighting-outlets-count': 9.0, 'low-energy-fixed-light-count': 5.0,
'uprn': 100021936225.0, 'uprn-source': 'Address Matched',
},
"heating_recommendation_descriptions": [
'Install an air source heat pump, and upgrade heating controls to Smart Thermostats, room sensors and '
'smart radiator valves (time & temperature zone control). The cost includes the £7500 boiler upgrade '
'scheme grant',
],
"heating_controls_recommendation_descriptions": [
'upgrade heating controls to Room thermostat, programmer and TRVs',
'Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & '
'temperature zone control)'
],
"notes": "Because this property already has a boiler, we don't recommend HHR. We only have a "
"heating recommendation for an air source heat pump. Because the heating controls are "
"Programmer, no room thermostat, we have a programmer, room thermostat and trvs recommendation"
"for heating controls and for TTZC."
},
{
"epc": {
'lmk-key': '1139584119102014052116014126342698', 'address1': '13, Starbuck Street', 'address2': 'Rudry',
'address3': None, 'postcode': 'CF83 3DP', 'building-reference-number': 2187913278,
'current-energy-rating': 'E', 'potential-energy-rating': 'D', 'current-energy-efficiency': 44,
'potential-energy-efficiency': 61, 'property-type': 'Flat', 'built-form': 'Semi-Detached',
'inspection-date': '2014-05-21', 'local-authority': 'W06000018', 'constituency': 'W07000076',
'county': None,
'lodgement-date': '2014-05-21', 'transaction-type': 'rental (private)', 'environment-impact-current': 49,
'environment-impact-potential': 64, 'energy-consumption-current': 343,
'energy-consumption-potential': 240.0, 'co2-emissions-current': 4.0, 'co2-emiss-curr-per-floor-area': 61,
'co2-emissions-potential': 2.8, 'lighting-cost-current': 49.0, 'lighting-cost-potential': 49.0,
'heating-cost-current': 752.0, 'heating-cost-potential': 429.0, 'hot-water-cost-current': 281.0,
'hot-water-cost-potential': 281.0, 'total-floor-area': 66.0, 'energy-tariff': 'Single',
'mains-gas-flag': 'N', 'floor-level': '1st', 'flat-top-storey': 'Y', 'flat-storey-count': None,
'main-heating-controls': 2602.0, 'multi-glaze-proportion': 100.0,
'glazed-type': 'double glazing installed during or after 2002', 'glazed-area': 'Normal',
'extension-count': 0.0, 'number-habitable-rooms': 4.0, 'number-heated-rooms': 4.0,
'low-energy-lighting': 86.0, 'number-open-fireplaces': 0.0,
'hotwater-description': 'Electric immersion, standard tariff', 'hot-water-energy-eff': 'Very Poor',
'hot-water-env-eff': 'Very Poor', 'floor-description': '(other premises below)', 'floor-energy-eff': None,
'floor-env-eff': None, 'windows-description': 'Fully double glazed', 'windows-energy-eff': 'Good',
'windows-env-eff': 'Good', 'walls-description': 'Cavity wall, as built, no insulation (assumed)',
'walls-energy-eff': 'Poor', 'walls-env-eff': 'Poor', 'secondheat-description': 'None',
'sheating-energy-eff': None, 'sheating-env-eff': None,
'roof-description': 'Pitched, 200 mm loft insulation',
'roof-energy-eff': 'Good', 'roof-env-eff': 'Good', 'mainheat-description': 'Room heaters, electric',
'mainheat-energy-eff': 'Very Poor', 'mainheat-env-eff': 'Very Poor',
'mainheatcont-description': 'Appliance thermostats', 'mainheatc-energy-eff': 'Good',
'mainheatc-env-eff': 'Good', 'lighting-description': 'Low energy lighting in 86% of fixed outlets',
'lighting-energy-eff': 'Very Good', 'lighting-env-eff': 'Very Good',
'main-fuel': 'electricity (not community)', 'wind-turbine-count': 0.0, 'heat-loss-corridor': 'no corridor',
'unheated-corridor-length': None, 'floor-height': 2.5, 'photo-supply': 0.0,
'solar-water-heating-flag': None,
'mechanical-ventilation': 'natural', 'address': '13, Starbuck Street, Rudry',
'local-authority-label': 'Caerphilly', 'constituency-label': 'Caerphilly', 'posttown': 'CAERPHILLY',
'construction-age-band': 'England and Wales: 1950-1966', 'lodgement-datetime': '2014-05-21 16:01:41',
'tenure': 'rental (private)', 'fixed-lighting-outlets-count': 7.0, 'low-energy-fixed-light-count': 6.0,
'uprn': 43088770.0, 'uprn-source': 'Address Matched',
},
"heating_recommendation_descriptions": [
'Install high heat retention electric storage heaters and upgrade heating controls to High Heat Retention '
'Storage Heater Controls'
],
"heating_controls_recommendation_descriptions": [],
"notes": "This property is a flat so we don't have an ASHP recommendation. It also doesn't have access to the "
"mains and so it can't have a gas boiler. We don't expect any controls recommendations"
},
{
"epc": {
'lmk-key': '492646189022010060208143796198410', 'address1': '67, Ridgeway Road', 'address2': None,
'address3': None, 'postcode': 'HP5 2EW', 'building-reference-number': 1976846768,
'current-energy-rating': 'D', 'potential-energy-rating': 'D', 'current-energy-efficiency': 64,
'potential-energy-efficiency': 68, 'property-type': 'Bungalow', 'built-form': 'Detached',
'inspection-date': '2010-06-01', 'local-authority': 'E07000005', 'constituency': 'E14000631',
'county': 'Buckinghamshire', 'lodgement-date': '2010-06-02', 'transaction-type': 'marketed sale',
'environment-impact-current': 67, 'environment-impact-potential': 70, 'energy-consumption-current': 249,
'energy-consumption-potential': 231.0, 'co2-emissions-current': 3.5, 'co2-emiss-curr-per-floor-area': 35,
'co2-emissions-potential': 3.2, 'lighting-cost-current': 89.0, 'lighting-cost-potential': 51.0,
'heating-cost-current': 627.0, 'heating-cost-potential': 603.0, 'hot-water-cost-current': 105.0,
'hot-water-cost-potential': 105.0, 'total-floor-area': 76.0, 'energy-tariff': 'Single',
'mains-gas-flag': 'Y', 'floor-level': 'NO DATA!', 'flat-top-storey': None, 'flat-storey-count': None,
'main-heating-controls': 2104.0, 'multi-glaze-proportion': 100.0,
'glazed-type': 'double glazing installed during or after 2002', 'glazed-area': 'Normal',
'extension-count': 0.0, 'number-habitable-rooms': 7.0, 'number-heated-rooms': 7.0,
'low-energy-lighting': 25.0, 'number-open-fireplaces': 1.0, 'hotwater-description': 'From main system',
'hot-water-energy-eff': 'Very Good', 'hot-water-env-eff': 'Very Good',
'floor-description': 'Suspended, no insulation (assumed)', 'floor-energy-eff': None, 'floor-env-eff': None,
'windows-description': 'Fully double glazed', 'windows-energy-eff': 'Good', 'windows-env-eff': 'Good',
'walls-description': 'Cavity wall, filled cavity', 'walls-energy-eff': 'Good', 'walls-env-eff': 'Good',
'secondheat-description': 'Room heaters, wood logs', 'sheating-energy-eff': None, 'sheating-env-eff': None,
'roof-description': 'Pitched, 150 mm loft insulation', 'roof-energy-eff': 'Good', 'roof-env-eff': 'Good',
'mainheat-description': 'Boiler and radiators, mains gas', 'mainheat-energy-eff': 'Very Good',
'mainheat-env-eff': 'Very Good', 'mainheatcont-description': 'Programmer and room thermostat',
'mainheatc-energy-eff': 'Average', 'mainheatc-env-eff': 'Average',
'lighting-description': 'Low energy lighting in 25% of fixed outlets', 'lighting-energy-eff': 'Average',
'lighting-env-eff': 'Average',
'main-fuel': 'mains gas - this is for backwards compatibility only and should not be used',
'wind-turbine-count': 0.0, 'heat-loss-corridor': 'NO DATA!', 'unheated-corridor-length': None,
'floor-height': 2.4, 'photo-supply': 0.0, 'solar-water-heating-flag': 'N',
'mechanical-ventilation': 'natural', 'address': '67, Ridgeway Road', 'local-authority-label': 'Chiltern',
'constituency-label': 'Chesham and Amersham', 'posttown': 'CHESHAM',
'construction-age-band': 'England and Wales: 1930-1949', 'lodgement-datetime': '2010-06-02 08:14:37',
'tenure': 'owner-occupied', 'fixed-lighting-outlets-count': None, 'low-energy-fixed-light-count': None,
'uprn': 100080513604.0, 'uprn-source': 'Address Matched'
},
"heating_recommendation_descriptions": [
'Install an air source heat pump, and upgrade heating controls to Smart Thermostats, room sensors and '
'smart radiator valves (time & temperature zone control). The cost includes the £7500 boiler upgrade '
'scheme grant'
],
"heating_controls_recommendation_descriptions": [
'upgrade heating controls to Room thermostat, programmer and TRVs',
'Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & '
'temperature zone control)'
],
"notes": "This has a very efficient boiler and is a detached bungalow, but only has "
"Programmer and room thermostat for heating controls so we'd expect an ASHP heating recommendation"
"as the only option, and heating controls recommendations for programmer, room thermostats and trvs"
"as well as ttzc"
}
]
import random
from pathlib import Path
import inspect
import pandas as pd
# this can be used to get example data to build the test cases
src_file_path = inspect.getfile(lambda: None)
EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates"
epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
directory = random.sample(epc_directories, 1)[0]
data = pd.read_csv(directory / "certificates.csv", low_memory=False)
# Rename the columns to the same format as the api returns
data.columns = [c.replace("_", "-").lower() for c in data.columns]
data["floor-height"] = data["floor-height"].fillna(2.45)
used_examples = pd.DataFrame(
[
{
"mainheat-description": x["epc"]["mainheat-description"],
"mainheat-energy-eff": x["epc"]["mainheat-energy-eff"],
"property-type": x["epc"]["property-type"],
"built-form": x["epc"]["built-form"],
"used": True
} for x in testing_examples
]
)
data = data.merge(
used_examples, how="left", on=["mainheat-description", "mainheat-energy-eff", "built-form", "property-type"]
)
data = data[pd.isnull(data["used"])].drop(columns=["used"])
eg = data.sample(1).to_dict("records")[0]
print(eg["mainheat-description"])
print(eg["mainheat-energy-eff"])
print(eg["property-type"])
print(eg["built-form"])
print(eg["mainheatcont-description"])

View file

@ -0,0 +1,124 @@
from datetime import datetime
import pandas as pd
import msgpack
from utils.s3 import read_dataframe_from_s3_parquet, read_from_s3
import pytest
from backend.Property import Property
from etl.epc.Record import EPCRecord
from etl.bill_savings.KwhData import KwhData
from recommendations.HeatingRecommender import HeatingRecommender
from recommendations.tests.test_data.heating_recommendations_data import testing_examples
class TestHeatingRecommendations:
@pytest.fixture
def cleaning_data(self):
return read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
)
@pytest.fixture
def cleaned(self):
df = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-dev"
)
df = msgpack.unpackb(df, raw=False)
return df
@pytest.fixture
def kwh_client(self):
client = KwhData(bucket="retrofit-data-dev", read_consumption_data=False)
# We fix this pricing table for these tests
client.retail_price_comparison = pd.DataFrame(
[
{
"Date": datetime.today().strftime("%Y-%m-%d"),
'Average standard variable tariff (Large legacy suppliers)': 1
}
]
)
client.retail_price_comparison["Date"] = pd.to_datetime(client.retail_price_comparison["Date"])
return client
@pytest.mark.parametrize(
"test_case",
testing_examples
)
def test_recommend(self, test_case, cleaning_data, cleaned, kwh_client):
"""
With this function, we test out multiple heating descriptions and check which recomendations
we retrieve alongside them
:return:
"""
if test_case["epc"]["uprn"] == 100090311351:
raise Exception(
"This test has electric storage heaters with automatic charge control - this case should be researched"
"and checked that a high heat retention storage recommendation is actually sensible. If it's not, "
"we should adjust accordingly or perhaps have just a control recommendation"
)
if test_case["epc"]["uprn"] == 100021560521:
raise Exception("Finish this test - could do so while on the train")
epc_records = {"original_epc": test_case["epc"].copy(), "full_sap_epc": {}, "old_data": []}
epc_record = EPCRecord(
epc_records=epc_records,
run_mode="newdata",
cleaning_data=cleaning_data
)
p = Property(
id=0,
postcode=test_case["epc"]["postcode"],
address=test_case["epc"]["address"],
epc_record=epc_record,
energy_assessment={
"condition": {},
"energy_assessment_is_newer": False
}
)
# For these tests, this can be fixed
kwh_predictions = {
"heating_kwh_predictions": pd.DataFrame(
[
{"id": p.uprn, "predictions": 12000}
]
),
"hotwater_kwh_predictions": pd.DataFrame(
[
{"id": p.uprn, "predictions": 3000}
]
),
}
p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=kwh_predictions)
recommender = HeatingRecommender(property_instance=p)
# Check they're empty
assert not recommender.heating_recommendations
assert not recommender.heating_control_recommendations
recommender.recommend(has_cavity_or_loft_recommendations=False)
assert len(recommender.heating_recommendations) == len(test_case["heating_recommendation_descriptions"])
assert (
len(recommender.heating_control_recommendations) ==
len(test_case["heating_controls_recommendation_descriptions"])
)
# Check the exact descriptions
assert (
{x["description"] for x in recommender.heating_recommendations} ==
set(test_case["heating_recommendation_descriptions"])
)
assert (
{x["description"] for x in recommender.heating_control_recommendations} ==
set(test_case["heating_controls_recommendation_descriptions"])
)

View file

@ -0,0 +1,56 @@
cavity_wall_energy_eff = [
{'construction-age-band': 'England and Wales: 1950-1966', 'walls-energy-eff': 'Average', 'count': 605820},
{'construction-age-band': 'England and Wales: 1967-1975', 'walls-energy-eff': 'Average', 'count': 410998},
{'construction-age-band': 'England and Wales: 1930-1949', 'walls-energy-eff': 'Average', 'count': 263575},
{'construction-age-band': 'England and Wales: 1976-1982', 'walls-energy-eff': 'Good', 'count': 206654},
{'construction-age-band': 'England and Wales: 1983-1990', 'walls-energy-eff': 'Good', 'count': 106489},
{'construction-age-band': 'England and Wales: 1900-1929', 'walls-energy-eff': 'Average', 'count': 58399},
{'construction-age-band': 'England and Wales: 1991-1995', 'walls-energy-eff': 'Good', 'count': 58252},
{'construction-age-band': 'England and Wales: 1996-2002', 'walls-energy-eff': 'Good', 'count': 35141},
{'construction-age-band': 'England and Wales: 2003-2006', 'walls-energy-eff': 'Good', 'count': 7194},
{'construction-age-band': 'England and Wales: 2007-2011', 'walls-energy-eff': 'Good', 'count': 2639},
{'construction-age-band': 'England and Wales: before 1900', 'walls-energy-eff': 'Average', 'count': 2495},
{'construction-age-band': 'England and Wales: 2012 onwards', 'walls-energy-eff': 'Very Good', 'count': 1158},
{'construction-age-band': 'England and Wales: 2007 onwards', 'walls-energy-eff': 'Good', 'count': 357},
{'construction-age-band': 'INVALID!', 'walls-energy-eff': 'Very Good', 'count': 88}
]
iwi_energy_eff = [
{'construction-age-band': 'England and Wales: 1900-1929', 'walls-energy-eff': 'Good', 'count': 22415},
{'construction-age-band': 'England and Wales: before 1900', 'walls-energy-eff': 'Good',
'count': 13422},
{'construction-age-band': 'England and Wales: 1930-1949', 'walls-energy-eff': 'Good', 'count': 6640},
{'construction-age-band': 'England and Wales: 1950-1966', 'walls-energy-eff': 'Good', 'count': 1391},
{'construction-age-band': 'England and Wales: 1967-1975', 'walls-energy-eff': 'Good', 'count': 663},
{'construction-age-band': 'England and Wales: 2003-2006', 'walls-energy-eff': 'Very Good',
'count': 516},
{'construction-age-band': 'England and Wales: 2007-2011', 'walls-energy-eff': 'Very Good',
'count': 463},
{'construction-age-band': 'England and Wales: 2012 onwards', 'walls-energy-eff': 'Very Good',
'count': 353},
{'construction-age-band': 'England and Wales: 1996-2002', 'walls-energy-eff': 'Good', 'count': 218},
{'construction-age-band': 'England and Wales: 1983-1990', 'walls-energy-eff': 'Very Good',
'count': 166},
{'construction-age-band': 'England and Wales: 1976-1982', 'walls-energy-eff': 'Very Good',
'count': 121},
{'construction-age-band': 'England and Wales: 1991-1995', 'walls-energy-eff': 'Good', 'count': 104},
{'construction-age-band': 'England and Wales: 2007 onwards', 'walls-energy-eff': 'Very Good',
'count': 74}, {'construction-age-band': 'INVALID!', 'walls-energy-eff': 'Very Good', 'count': 26}
]
ewi_energy_eff = [
{'construction-age-band': 'England and Wales: 1900-1929', 'walls-energy-eff': 'Good', 'count': 18427},
{'construction-age-band': 'England and Wales: 1930-1949', 'walls-energy-eff': 'Good', 'count': 17803},
{'construction-age-band': 'England and Wales: 1950-1966', 'walls-energy-eff': 'Good', 'count': 4306},
{'construction-age-band': 'England and Wales: before 1900', 'walls-energy-eff': 'Good', 'count': 2955},
{'construction-age-band': 'England and Wales: 1967-1975', 'walls-energy-eff': 'Good', 'count': 647},
{'construction-age-band': 'England and Wales: 1976-1982', 'walls-energy-eff': 'Very Good', 'count': 188},
{'construction-age-band': 'England and Wales: 2007-2011', 'walls-energy-eff': 'Very Good', 'count': 73},
{'construction-age-band': 'England and Wales: 2003-2006', 'walls-energy-eff': 'Very Good', 'count': 49},
{'construction-age-band': 'England and Wales: 2012 onwards', 'walls-energy-eff': 'Very Good', 'count': 37},
{'construction-age-band': 'England and Wales: 1983-1990', 'walls-energy-eff': 'Good', 'count': 31},
{'construction-age-band': 'England and Wales: 1996-2002', 'walls-energy-eff': 'Very Good', 'count': 21},
{'construction-age-band': 'England and Wales: 1991-1995', 'walls-energy-eff': 'Good', 'count': 14},
{'construction-age-band': 'England and Wales: 2007 onwards', 'walls-energy-eff': 'Very Good', 'count': 8},
{'construction-age-band': 'INVALID!', 'walls-energy-eff': 'Very Good', 'count': 4}
]

View file

@ -229,6 +229,39 @@ def read_excel_from_s3(bucket_name, file_key, header_row, drop_all_na=True):
return df
def save_excel_to_s3(df, bucket_name, file_key):
"""
Save a pandas DataFrame as an Excel file on S3.
:param df: DataFrame to save.
:param bucket_name: S3 bucket name.
:param file_key: S3 file key. This includes the file name and path.
"""
# Ensure the DataFrame is not empty
if df.empty:
raise ValueError("The DataFrame is empty. Nothing to save to Excel.")
# Ensure the file_key ends with an appropriate Excel file extension
if not file_key.endswith((".xls", ".xlsx")):
raise ValueError("The specified file key does not appear to be an Excel file.")
# Create a BytesIO buffer
output = BytesIO()
# Save DataFrame to an Excel file buffer
df.to_excel(output, index=False)
output.seek(0) # Important: move back to the beginning of the buffer
# Initialize a session using boto3
session = boto3.session.Session()
s3 = session.resource('s3')
# Upload the Excel file from the buffer to S3
bucket = s3.Bucket(bucket_name)
bucket.put_object(Body=output, Key=file_key)
logger.info(f"Excel file saved to S3 bucket '{bucket_name}' with key '{file_key}'")
def read_csv_from_s3(bucket_name, filepath):
s3 = boto3.client('s3')
@ -276,3 +309,86 @@ def list_files_in_s3_folder(bucket_name, folder_name):
except Exception as e:
logger.error(f'Failed to list files in folder {folder_name} in bucket {bucket_name}: {str(e)}')
return []
def list_files_and_subfolders_in_s3_folder(bucket_name, folder_name):
"""
List all files and immediate subfolders in a given folder in an S3 bucket.
E.g. if we have a folder structure in S3 like this:
- folder1/
- file1.csv
- file2.csv
- subfolder1/
- file3.csv
Then calling list_files_and_subfolders_in_s3_folder(bucket_name='my-bucket', folder_name='folder1/')
would return ['folder1/file1.csv', 'folder1/file2.csv', 'folder1/subfolder1/'].
Namely, the nested files are not included in the list, only the immediate files and subfolders.
:param bucket_name: The name of the S3 bucket.
:param folder_name: The folder name within the S3 bucket.
:return: A list of file keys and subfolder prefixes in the specified S3 folder.
"""
# For this function, folder_name should end with a forward slash
if not folder_name.endswith('/'):
folder_name += '/'
try:
s3 = boto3.client('s3')
response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_name, Delimiter='/')
items = []
# Add files to the list
if 'Contents' in response:
items.extend([content['Key'] for content in response['Contents'] if content['Key'] != folder_name])
# Add immediate subfolders to the list
if 'CommonPrefixes' in response:
items.extend([prefix['Prefix'] for prefix in response['CommonPrefixes']])
return items
except NoCredentialsError:
logger.error("Credentials not available.")
return []
except PartialCredentialsError:
logger.error("Incomplete credentials provided.")
return []
except Exception as e:
logger.error(f'Failed to list files and subfolders in folder {folder_name} in bucket {bucket_name}: {str(e)}')
return []
def list_xmls_in_s3_folder(bucket_name, folder_name):
"""
List all XML files in a given folder in an S3 bucket.
:param bucket_name: The name of the S3 bucket.
:param folder_name: The folder name within the S3 bucket.
:return: A list of XML file keys in the specified S3 folder.
"""
try:
s3 = boto3.client('s3')
response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_name)
if 'Contents' not in response:
logger.info(f"No files found in folder {folder_name} in bucket {bucket_name}.")
return []
# Filter XML files
xml_files = [content['Key'] for content in response['Contents'] if content['Key'].endswith('.xml')]
return xml_files
except NoCredentialsError:
logger.error("Credentials not available.")
return []
except PartialCredentialsError:
logger.error("Incomplete credentials provided.")
return []
except Exception as e:
logger.error(f'Failed to list XML files in folder {folder_name} in bucket {bucket_name}: {str(e)}')
return []