Merge pull request #334 from Hestia-Homes/kwh-bills-model-fixes

Kwh bills model fixes
This commit is contained in:
KhalimCK 2024-09-03 17:23:58 +01:00 committed by GitHub
commit 6c8beed5e5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
43 changed files with 5662 additions and 1272 deletions

View file

@ -2,13 +2,13 @@ import os
import ast
from itertools import groupby
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from etl.epc.Dataset import TrainingDataset
from etl.epc.Record import EPCRecord
from etl.epc.settings import LATEST_FIELD, MANDATORY_FIXED_FEATURES
from etl.epc_clean.epc_attributes.all_cleaners import all_cleaner_map
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
from utils.logger import setup_logger
from utils.s3 import read_dataframe_from_s3_parquet
from etl.epc.settings import DATA_ANOMALY_MATCHES
@ -17,10 +17,11 @@ from recommendations.recommendation_utils import (
estimate_perimeter,
get_wall_type,
estimate_external_wall_area,
esimtate_pitched_roof_area,
estimate_windows,
)
from backend.ml_models.AnnualBillSavings import AnnualBillSavings
from backend.app.utils import sap_to_epc
import backend.app.assumptions as assumptions
ENVIRONMENT = os.environ.get("ENVIRONMENT", "dev")
DATA_BUCKET = os.environ.get(
@ -91,6 +92,7 @@ class Property:
self.data = {
k.replace("_", "-"): v for k, v in epc_record.get("prepared_epc").items()
}
self.old_data = epc_record.get("old_data")
self.property_dimensions = None
# This is a list of measures that have already been installed in the property, typically found as a result
@ -171,8 +173,8 @@ class Property:
self.windows_area = None
self.solar_pv_percentage = None
self.current_adjusted_energy = None
self.expected_adjusted_energy = None
self.current_energy_consumption = None
self.current_energy_consumption_heating_hotwater = None
self.current_energy_bill = None
self.expected_energy_bill = None
@ -181,6 +183,7 @@ class Property:
self.recommendations_scoring_data = []
self.simulation_epcs = {}
self.updated_simulation_epcs = []
# This additional condition data should change how we pass kwargs to this. We should no longer need to pass
# kwargs to this class, but instead, we should pass the energy assessment condition data
@ -211,17 +214,32 @@ class Property:
if n_bedrooms not in [None, ""]:
n_bedrooms = int(round(float(n_bedrooms) + 1e-5))
number_of_floors = kwargs.get("number_of_floors", None)
if number_of_floors not in [None, ""]:
number_of_floors = int(round(float(number_of_floors) + 1e-5))
insulation_floor_area = kwargs.get("insulation_floor_area", None)
if insulation_floor_area not in [None, ""]:
insulation_floor_area = float(insulation_floor_area)
insulation_wall_area = kwargs.get("insulation_wall_area", None)
if insulation_wall_area not in [None, ""]:
insulation_wall_area = float(insulation_wall_area)
return {
"n_bathrooms": n_bathrooms,
"n_bedrooms": n_bedrooms,
"number_of_floors": number_of_floors,
"insulation_floor_area": insulation_floor_area,
"insulation_wall_area": insulation_wall_area,
"building_id": kwargs.get("building_id", None),
}
def parse_kwargs(self, kwargs):
# We extract the elements from kwargs that we recognise. Anything additional is ignored
self.n_bathrooms = kwargs.get("n_bathrooms", None)
self.n_bedrooms = kwargs.get("n_bedrooms", None)
self.building_id = kwargs.get("building_id", None)
for arg, val in kwargs.items():
if val is not None:
setattr(self, arg, val)
def create_base_difference_epc_record(self, cleaned_lookup: dict):
"""
@ -359,68 +377,88 @@ class Property:
)
self.recommendations_scoring_data.append(scoring_dict)
# We also use the representative recommendations to produce transformed EPCs
represenative_recs_to_this_phase = [
r for r in property_representative_recommendations
if r["phase"] <= phase
]
simulation_epc = self.epc_record.prepared_epc.copy()
# Insert static values
simulation_epc["lodgement_date"] = simulation_lodgment_date
simulation_epc = {k.replace("_", "-"): v for k, v in simulation_epc.items()}
# TODO: This is placeholder, but it's to handle the case of having both internal and external wall
# insulation as options. This will cause the process below to fall over, so we take just
# external wall insulation in epc_transformations, if we have both
types = [
x["type"] for x in represenative_recs_to_this_phase
]
if "external_wall_insulation" in types and "internal_wall_insulation" in types:
types = [x["type"] for x in previous_phase_representatives]
if "external_wall_insulation" in types and "internal_wall_insulation" in types:
raise Exception("We shouldn't have this in the representative recommendations")
# We include previous phases + the recommendation itself in the EPC transformations
epc_transformations = [
x["description_simulation"] for x in represenative_recs_to_this_phase if
x["type"] != "internal_wall_insulation"
]
else:
epc_transformations = [
x["description_simulation"] for x in represenative_recs_to_this_phase
x["description_simulation"] for x in previous_phase_representatives + [rec]
]
# It is possible that we could have two simulations applied to the same descriptions
# We extract these out
phase_epc_transformation = {}
for config in epc_transformations:
for k, v in config.items():
if k in phase_epc_transformation:
if "-energy-eff" in k:
# We take the highest value
if phase_epc_transformation[k] == "Very Good":
# It is possible that we could have two simulations applied to the same descriptions
# We extract these out
phase_epc_transformation = {}
for config in epc_transformations:
for k, v in config.items():
if k in phase_epc_transformation:
if "-energy-eff" in k:
# We take the highest value
if phase_epc_transformation[k] == "Very Good":
continue
elif phase_epc_transformation[k] == "Good":
if v == "Very Good":
phase_epc_transformation[k] = v
elif phase_epc_transformation[k] == "Average":
if v in ["Good", "Very Good"]:
phase_epc_transformation[k] = v
elif phase_epc_transformation[k] == "Poor":
if v in ["Average", "Good", "Very Good"]:
phase_epc_transformation[k] = v
else:
phase_epc_transformation[k] = v
continue
elif phase_epc_transformation[k] == "Good":
if v == "Very Good":
phase_epc_transformation[k] = v
elif phase_epc_transformation[k] == "Average":
if v in ["Good", "Very Good"]:
phase_epc_transformation[k] = v
elif phase_epc_transformation[k] == "Poor":
if v in ["Average", "Good", "Very Good"]:
phase_epc_transformation[k] = v
else:
phase_epc_transformation[k] = v
continue
if phase_epc_transformation[k] == v:
continue
if phase_epc_transformation[k] == v:
continue
raise NotImplementedError(
"Already have this key in the phase_epc_transformation - implement me"
)
phase_epc_transformation[k] = v
simulation_epc.update(phase_epc_transformation)
self.simulation_epcs[rec["recommendation_id"]] = simulation_epc
raise NotImplementedError(
"Already have this key in the phase_epc_transformation - implement me"
)
phase_epc_transformation[k] = v
def update_simulation_epcs(self, impact_summary):
"""
This method will insert the high level measures, such as SAP, heat demand, carbon, etc
:return:
"""
if self.simulation_epcs is None:
raise ValueError("Simulation EPCs have not been created")
simulation_epc = self.epc_record.prepared_epc.copy()
# Insert static values
simulation_epc["lodgement_date"] = simulation_lodgment_date
rec_ids = sorted(list(self.simulation_epcs.keys()))
updated_simulation_epcs = []
for rec_id in rec_ids:
sim_epc = self.simulation_epcs[rec_id].copy()
rec_impact = [x for x in impact_summary if x["recommendation_id"] == rec_id][0]
# We update all of the features that should have an impact on the kwh model
# Replace the understores with hyphens
simulation_epc = {k.replace("_", "-"): v for k, v in simulation_epc.items()}
simulation_epc.update(phase_epc_transformation)
self.simulation_epcs[phase] = simulation_epc
sim_epc.update(
{
# CO₂ emissions per square metre floor area per year in kg/m². Since CO₂ emissions are in tonnes
# per year, we multiply by 1000 to get kg/m²
"co2-emiss-curr-per-floor-area": round(
1000 * (rec_impact["carbon"] / self.data["total-floor-area"])
),
"co2-emissions-current": rec_impact["carbon"],
"current-energy-rating": sap_to_epc(rec_impact["sap"]),
"current-energy-efficiency": int(np.floor(rec_impact["sap"])),
"energy-consumption-current": rec_impact["heat_demand"],
"id": "+".join([str(self.id), rec_id])
}
)
updated_simulation_epcs.append(sim_epc)
# Now we havet this data inthe
self.updated_simulation_epcs = updated_simulation_epcs
return updated_simulation_epcs
@staticmethod
def create_recommendation_scoring_data(
@ -455,81 +493,6 @@ class Property:
for recommendation in recommendations:
# For the list of recommendations we have, we iteratively update the output
# Update description to indicate it's insulate
if recommendation["type"] in [
"solid_floor_insulation",
"suspended_floor_insulation",
"exposed_floor_insulation",
]:
if len(recommendation["parts"]) > 1:
raise NotImplementedError(
"Have more than 1 floor insulation part - handle this case"
)
# We don't really see above average for this in the training data
output["floor_insulation_thickness_ending"] = "average"
else:
if output["floor_thermal_transmittance_ending"] is None:
raise ValueError("We should not have a None value for the u value")
if output["floor_insulation_thickness_ending"] is None:
output["floor_insulation_thickness_ending"] = "none"
if recommendation["type"] in [
"loft_insulation",
"room_roof_insulation",
"flat_roof_insulation",
]:
output["roof_thermal_transmittance_ending"] = recommendation[
"new_u_value"
]
parts = recommendation["parts"]
if len(parts) != 1:
raise ValueError(
"More than one part for roof insulation - investiage me"
)
# This is based on the values we have in the training data
valid_numeric_values = [
12,
25,
50,
75,
100,
150,
200,
250,
270,
300,
350,
400,
]
proposed_depth = recommendation["new_thickness"]
if proposed_depth not in valid_numeric_values:
# Take the nearest value for scoring
proposed_depth = min(
valid_numeric_values, key=lambda x: abs(x - proposed_depth)
)
output["roof_insulation_thickness_ending"] = str(int(proposed_depth))
if recommendation["type"] == "loft_insulation":
if proposed_depth >= 270:
output["roof_energy_eff_ending"] = "Very Good"
else:
if output["roof_energy_eff_ending"] not in ["Good", "Very Good"]:
output["roof_energy_eff_ending"] = "Good"
else:
output["roof_energy_eff_ending"] = "Very Good"
else:
# Fill missing roof u-values - this fill is not based on recommended upgrades
if output["roof_thermal_transmittance_ending"] is None:
raise ValueError("We should not have a None value for the u value")
if output["roof_insulation_thickness_ending"] is None:
output["roof_insulation_thickness_ending"] = "none"
if recommendation["type"] == "sealing_open_fireplace":
output["number_open_fireplaces_ending"] = 0
@ -573,12 +536,28 @@ class Property:
if recommendation["type"] in [
"heating", "hot_water_tank_insulation", "heating_control", "secondary_heating",
"internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
"cylinder_thermostat"
"cylinder_thermostat", "loft_insulation", "room_roof_insulation", "flat_roof_insulation",
"solid_floor_insulation", "suspended_floor_insulation",
]:
# We update the data, as defined in the recommendaton
if output["walls_insulation_thickness_ending"] is None:
output["walls_insulation_thickness_ending"] = "none"
if output["walls_thermal_transmittance_ending"] is None:
raise ValueError("We should not have a None value for the u value")
if output["roof_insulation_thickness_ending"] is None:
output["roof_insulation_thickness_ending"] = "none"
if output["roof_thermal_transmittance_ending"] is None:
raise ValueError("We should not have a None value for the u value")
if output["floor_thermal_transmittance_ending"] is None:
raise ValueError("We should not have a None value for the u value")
if output["floor_insulation_thickness_ending"] is None:
output["floor_insulation_thickness_ending"] = "none"
simulation_config = recommendation["simulation_config"]
# If any entries in simulation_config are None, we will set them to "Unknown" which is the cleaning
# value
@ -595,7 +574,7 @@ class Property:
"sealing_open_fireplace", "low_energy_lighting",
"internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
"loft_insulation", "room_roof_insulation", "flat_roof_insulation",
"solid_floor_insulation", "suspended_floor_insulation", "exposed_floor_insulation",
"solid_floor_insulation", "suspended_floor_insulation",
"windows_glazing", "solar_pv", "heating", "hot_water_tank_insulation",
"heating_control", "secondary_heating", "cylinder_thermostat"
]:
@ -607,17 +586,18 @@ class Property:
return output
def get_components(
def set_features(
self,
cleaned,
energy_consumption_client
kwh_client,
kwh_predictions
):
"""
Given the cleaning that has been performed, we'll use this to identify the property
components, from roof to walls to windows, heating and hot water
:param cleaned: This is the dictionary of components found in cleaner.cleaned
:param energy_consumption_client: Contains the heating and hot water kwh models - used to predict current
energy annual consumption in kWh
:param kwh_client: The client that will be used to convert the energy costs to today's costs
:param kwh_predictions: Contains the kwh predictions for heating and hot water
:return:
"""
@ -682,7 +662,7 @@ class Property:
self.set_windows_count()
self.set_energy_source()
self.find_energy_sources()
self.set_current_energy_bill(energy_consumption_client)
self.set_current_energy_bill(kwh_client, kwh_predictions)
def set_solar_panel_configuration(
self, solar_panel_configuration, roof_area
@ -695,7 +675,7 @@ class Property:
# We also set the roof area
self.roof_area = roof_area
def set_current_energy_bill(self, energy_consumption_client):
def set_current_energy_bill(self, kwh_client, kwh_predictions):
"""
Given what we know about the property now, estimates the current energy consumption using the UCL paper
https://www.sciencedirect.com/science/article/pii/S0378778823002542
@ -707,15 +687,7 @@ class Property:
# 2) Predicted KwH
# Today's costs
todays_heating_cost = energy_consumption_client.convert_cost_to_today(
original_cost=float(self.data["heating-cost-current"]),
lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"]).tz_localize(None)
)
todays_hot_water_cost = energy_consumption_client.convert_cost_to_today(
original_cost=float(self.data["hot-water-cost-current"]),
lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"]).tz_localize(None)
)
todays_lighting_cost = energy_consumption_client.convert_cost_to_today(
todays_lighting_cost = kwh_client.convert_cost_to_today(
original_cost=float(self.data["lighting-cost-current"]),
lodgement_date=pd.Timestamp(self.epc_record.prepared_epc["lodgement_date"]).tz_localize(None)
)
@ -723,97 +695,50 @@ class Property:
# If we have the kwh figures, we don't need to predict them
condition_data = self.energy_assessment_condition_data.copy()
scoring_df = pd.DataFrame([self.epc_record.prepared_epc])
# Change columns from underscores to hyphens
scoring_df.columns = [
x.lower().replace("_", "-") for x in scoring_df.columns
]
for col in ["heating_kwh", "hot_water_kwh"]:
scoring_df[col] = None
energy_consumption_client.data = None
heating_kwh_predictions = kwh_predictions["heating_kwh_predictions"]
hotwater_kwh_predictions = kwh_predictions["hotwater_kwh_predictions"]
heating_prediction = (
float(condition_data["space_heating_kwh"]) if condition_data.get("space_heating_kwh") is not None
else energy_consumption_client.score_new_data(
new_data=scoring_df, target="heating_kwh"
)[0]
condition_data.get("space_heating_kwh") if condition_data.get("space_heating_kwh") is not None else
heating_kwh_predictions[
heating_kwh_predictions["id"].astype(int) == self.uprn
]["predictions"].values[0]
)
hot_water_prediction = (
float(condition_data["water_heating_kwh"]) if condition_data.get("water_heating_kwh") is not None
else energy_consumption_client.score_new_data(
new_data=scoring_df, target="hot_water_kwh"
)[0]
condition_data.get("water_heating_kwh") if condition_data.get("water_heating_kwh") is not None else
hotwater_kwh_predictions[
hotwater_kwh_predictions["id"].astype(int) == self.uprn
]["predictions"].values[0]
)
# We convert the lighting cost into kwh, just using the price cap
lighting_kwh = float(self.data["lighting-cost-current"]) / AnnualBillSavings.ELECTRICITY_PRICE_CAP
lighting_kwh = todays_lighting_cost / AnnualBillSavings.ELECTRICITY_PRICE_CAP
appliances_kwh = AnnualBillSavings.estimate_appliances_energy_use(total_floor_area=self.floor_area)
adjusted_heating_kwh = AnnualBillSavings.adjust_energy_to_metered(
epc_energy=heating_prediction,
current_epc_rating=self.data["current-energy-rating"],
)
unadjusted_kwh_estimates = {
"heating": float(heating_prediction),
"hot_water": float(hot_water_prediction),
"lighting": float(lighting_kwh),
"appliances": float(appliances_kwh)
}
adjusted_hot_water_kwh = AnnualBillSavings.adjust_energy_to_metered(
epc_energy=hot_water_prediction,
current_epc_rating=self.data["current-energy-rating"],
)
adjusted_lighting_kwh = AnnualBillSavings.adjust_energy_to_metered(
epc_energy=lighting_kwh,
current_epc_rating=self.data["current-energy-rating"],
)
adjusted_applicances_kwh = AnnualBillSavings.adjust_energy_to_metered(
epc_energy=appliances_kwh,
current_epc_rating=self.data["current-energy-rating"],
)
# Adjust today's cost figures with the UCL model
adjusted_heating_cost = AnnualBillSavings.adjust_energy_to_metered(
epc_energy=todays_heating_cost,
current_epc_rating=self.data["current-energy-rating"],
)
adjusted_hot_water_cost = AnnualBillSavings.adjust_energy_to_metered(
epc_energy=todays_hot_water_cost,
current_epc_rating=self.data["current-energy-rating"],
)
adjusted_lighting_cost = AnnualBillSavings.adjust_energy_to_metered(
epc_energy=todays_lighting_cost,
current_epc_rating=self.data["current-energy-rating"],
)
adjusted_appliances_cost = AnnualBillSavings.adjust_energy_to_metered(
epc_energy=appliances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP,
current_epc_rating=self.data["current-energy-rating"],
)
unadjusted_heating_costs = {
"heating": None,
"hot_water": None,
"lighting": float(todays_lighting_cost),
"appliances": float(appliances_kwh) * AnnualBillSavings.ELECTRICITY_PRICE_CAP
}
# Sum up the adjusted kwh figures
self.current_adjusted_energy = (
adjusted_heating_kwh + adjusted_hot_water_kwh + adjusted_lighting_kwh + adjusted_applicances_kwh
)
self.current_energy_bill = (
adjusted_heating_cost + adjusted_hot_water_cost + adjusted_lighting_cost + adjusted_appliances_cost
self.current_energy_consumption = sum(list(unadjusted_kwh_estimates.values()))
self.current_energy_consumption_heating_hotwater = (
unadjusted_kwh_estimates["heating"] + unadjusted_kwh_estimates["hot_water"]
)
self.energy_cost_estimates = {
"adjusted": {
"heating": adjusted_heating_cost,
"hot_water": adjusted_hot_water_cost,
"lighting": adjusted_lighting_cost,
"appliances": adjusted_appliances_cost
},
"unadjusted": {
"heating": todays_heating_cost,
"hot_water": todays_hot_water_cost,
"lighting": todays_lighting_cost,
"appliances": appliances_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
},
"unadjusted": unadjusted_heating_costs,
"epc": {
"heating": float(self.data["heating-cost-current"]),
"hot_water": float(self.data["hot-water-cost-current"]),
@ -822,18 +747,7 @@ class Property:
}
self.energy_consumption_estimates = {
"adjusted": {
"heating": adjusted_heating_kwh,
"hot_water": adjusted_hot_water_kwh,
"lighting": adjusted_lighting_kwh,
"appliances": adjusted_applicances_kwh
},
"unadjusted": {
"heating": heating_prediction,
"hot_water": hot_water_prediction,
"lighting": lighting_kwh,
"appliances": appliances_kwh
}
"unadjusted": unadjusted_kwh_estimates
}
def set_spatial(self, spatial: pd.DataFrame):
@ -972,7 +886,8 @@ class Property:
"energy_tariff": self.data["energy-tariff"],
"primary_energy_consumption": self.energy["primary_energy_consumption"],
"co2_emissions": self.energy["co2_emissions"],
"adjusted_energy_consumption": self.current_adjusted_energy,
"current_energy_demand": self.current_energy_consumption,
"current_energy_demand_heating_hotwater": self.current_energy_consumption_heating_hotwater,
"estimated": self.data.get("estimated", False),
}
@ -1060,18 +975,22 @@ class Property:
# We can update the number of floors if we have this information in the condition data
self.number_of_floors = int(self.energy_assessment_condition_data["number_of_floors"]) \
if condition_data.get("number_of_floors") is not None \
if (condition_data.get("number_of_floors") is not None) and (self.number_of_floors is not None) \
else self.number_of_floors
self.perimeter = float(self.energy_assessment_condition_data["perimeter"]) \
if condition_data.get("perimeter") is not None \
else estimate_perimeter(
floor_area=self.floor_area / self.number_of_floors,
num_rooms=self.number_of_rooms / self.number_of_floors
)
# If we already have this, we re-engineer the perimeter
if self.insulation_floor_area is not None:
self.perimeter = np.sqrt(self.insulation_floor_area) * 4
else:
self.perimeter = float(self.energy_assessment_condition_data["perimeter"]) \
if condition_data.get("perimeter") is not None \
else estimate_perimeter(
floor_area=self.floor_area / self.number_of_floors,
num_rooms=self.number_of_rooms / self.number_of_floors
)
self.insulation_wall_area = float(self.energy_assessment_condition_data["insulation_wall_area"]) \
if condition_data.get("insulation_wall_area") is not None \
if (condition_data.get("insulation_wall_area") is not None) and (self.insulation_wall_area is not None) \
else estimate_external_wall_area(
num_floors=self.number_of_floors,
floor_height=self.floor_height,
@ -1079,9 +998,12 @@ class Property:
built_form=self.data["built-form"],
)
self.insulation_floor_area = float(self.energy_assessment_condition_data["main_dwelling_ground_floor_area"]) \
if condition_data.get("main_dwelling_ground_floor_area") is not None \
else self.floor_area / self.number_of_floors
if self.insulation_floor_area is None:
self.insulation_floor_area = float(
self.energy_assessment_condition_data["main_dwelling_ground_floor_area"]
) if (condition_data.get("main_dwelling_ground_floor_area") is not None) else (
self.floor_area / self.number_of_floors
)
def set_floor_level(self):
self.floor_level = (
@ -1163,16 +1085,6 @@ class Property:
return component_data
def set_adjusted_energy(
self, expected_adjusted_energy, expected_energy_bill
):
"""
Stores these values for usage later
"""
self.expected_adjusted_energy = expected_adjusted_energy
self.expected_energy_bill = expected_energy_bill
def set_windows_count(self):
"""
Using the estimate_windows function, this method will set the number of windows in the property
@ -1237,7 +1149,9 @@ class Property:
'has_exhaust_source_heat_pump': 'Electricity',
'has_community_heat_pump': 'Electricity',
'has_wood_pellets': 'Wood Pellets',
'has_community_scheme': 'Varied (Community Scheme)'
'has_community_scheme': 'Varied (Community Scheme)',
"has_dual_fuel_mineral_and_wood": 'Wood Logs',
"has_electricaire": 'Electricity',
}
# Hot water
@ -1263,20 +1177,57 @@ class Property:
'community scheme': 'Community Scheme'
}
self.heating_energy_source = [
self.heating_energy_source = list({
fuel for key, fuel in heating_fuel_mapping.items() if self.main_heating.get(key, False)
]
})
if set(self.heating_energy_source) == {'Electricity', 'Natural Gas'}:
# It means they have mixed heating so we take the primary one, based on main fuel
# This will probably happen in the case of an extension
if self.main_fuel["clean_description"] in ["Mains gas not community", "Mains gas community"]:
self.heating_energy_source = ['Natural Gas']
else:
self.heating_energy_source = ['Electricity']
if set(self.heating_energy_source) == {'Natural Gas', 'Wood Logs'}:
# It means they have mixed heating so we take the primary one, based on main fuel
# This will probably happen in the case of an extension
if self.main_fuel["clean_description"] in ["Mains gas not community", "Mains gas community"]:
self.heating_energy_source = ['Natural Gas']
else:
self.heating_energy_source = ['Wood Logs']
if len(self.heating_energy_source) == 0 or len(self.heating_energy_source) > 1:
raise Exception("Investigate em")
raise Exception("Investigate me")
self.heating_energy_source = self.heating_energy_source[0]
if self.heating_energy_source == "Varied (Community Scheme)":
if self.main_fuel["fuel_type"] == "mains gas":
self.heating_energy_source = "Natural Gas (Community Scheme)"
else:
raise Exception("Implement me")
if self.hotwater["heater_type"] is not None:
self.hot_water_energy_source = heater_type_to_fuel[self.hotwater["heater_type"]]
if self.hotwater["extra_features"] == "plus solar":
self.hot_water_energy_source = self.heating_energy_source + " + Solar Thermal"
return
else:
fuel = system_type_modification[self.hotwater["system_type"]]
if fuel == 'Main System':
if self.hotwater["extra_features"] == "plus solar":
self.hot_water_energy_source = self.heating_energy_source + " + Solar Thermal"
return
if fuel in ['Main System', "Community Scheme"]:
self.hot_water_energy_source = self.heating_energy_source
elif fuel in ['Secondary System']:
# Check the secondary heating system
secondary_heating = self.data["secondheat-description"]
self.hot_water_energy_source = assumptions.DESCRIPTIONS_TO_FUEL_TYPES[secondary_heating]["fuel"]
else:
raise Exception("Investiage me")
@ -1329,29 +1280,39 @@ class Property:
exclusions = [] if exclusions is None else exclusions
if (self.main_fuel["fuel_type"] == "electricity") or (
self.main_fuel["fuel_type"] == "mains gas" and not self.is_ashp_valid(exclusions=exclusions)
# If the property currently has an ASHP, we don't gain from any efficiency improvements
if not self.is_ashp_valid(exclusions=exclusions):
return self.current_energy_consumption
# If the property currently has an electric boiler, it will still benefit from the ASHP efficiency gain
remap_fuel_sources = [
"Natural Gas", "LPG", "Wood Logs", "Oil", "Electricity", "Coal", "Smokeless Fuel",
"Natural Gas + Solar Thermal", "Anthracite", "Wood Pellets", "LPG + Solar Thermal"
]
heating_energy_source = self.heating_energy_source
hot_water_energy_source = self.hot_water_energy_source
heating_consumption = self.energy_consumption_estimates["unadjusted"]["heating"]
hotwater_consumption = self.energy_consumption_estimates["unadjusted"]["hot_water"]
if (heating_energy_source not in remap_fuel_sources) or (
hot_water_energy_source not in remap_fuel_sources + ["Electricity + Solar Thermal"]
):
# if the primary fuel is already electricity, we don't need to adjust the consumpion
return self.current_adjusted_energy
raise NotImplementedError("Have not implemented estimating electrical consumption for this fuel type")
if self.main_fuel["fuel_type"] == "mains gas" and self.is_ashp_valid(exclusions=exclusions):
# if the primary fuel is gas, we need to adjust the consumption to reflect the expected
# efficiency of an ASHP.
# We should adjust the energy consumption to reflect the 200-400% efficiency of an ASHP with
# electrified heating, so that the solar panel can cover heating generation.
heating_consumption = self.energy_consumption_estimates["adjusted"]["heating"]
hot_water_consumption = self.energy_consumption_estimates["adjusted"]["hot_water"]
if heating_energy_source in remap_fuel_sources:
# Adjust the heating consumption to reflect the expected efficiency of an ASHP
heating_consumption = heating_consumption / (assumed_ashp_efficiency / 100)
systems_consumptions = heating_consumption + hot_water_consumption
if hot_water_energy_source in remap_fuel_sources:
# Adjust the hot water consumption to reflect the expected efficiency of an ASHP
hotwater_consumption = hotwater_consumption / (assumed_ashp_efficiency / 100)
adjusted_consumption = systems_consumptions / (assumed_ashp_efficiency / 100)
electric_consumption = (
adjusted_consumption +
self.energy_consumption_estimates["adjusted"]["lighting"] +
self.energy_consumption_estimates["adjusted"]["appliances"]
)
electric_consumption = (
heating_consumption +
hotwater_consumption +
self.energy_consumption_estimates["unadjusted"]["lighting"] +
self.energy_consumption_estimates["unadjusted"]["appliances"]
)
return electric_consumption
raise NotImplementedError("Have not implemented estimating electrical consumption for this fuel type")
return electric_consumption

View file

@ -292,8 +292,7 @@ class SearchEpc:
"error": str(e)
}
@staticmethod
def filter_rows(rows, property_type=None, address=None):
def filter_rows(self, rows, property_type=None, address=None):
"""
This method should not be used when property_type and address are both not None
:param rows:
@ -321,8 +320,21 @@ class SearchEpc:
if address is not None:
# We can do a filter on the property type
best_match = process.extractOne(address, [r["address"] for r in rows], score_cutoff=0)
rows_filtered = [r for r in rows if r["address"] == best_match[0]]
# We check if the full address contains the postcode and if it does, remove
if self.postcode in address:
address = address.replace(self.postcode, "").strip().rstrip(",")
# We check if post town is included in the address
if any([r["posttown"].lower() in address.lower() for r in rows]):
best_match = process.extractOne(
address, [", ".join([r["address"], r["posttown"]]) for r in rows], score_cutoff=0
)
# Get all of the scores
rows_filtered = [r for r in rows if ", ".join([r["address"], r["posttown"]]) == best_match[0]]
else:
best_match = process.extractOne(address, [r["address"] for r in rows], score_cutoff=0)
# Get all of the scores
rows_filtered = [r for r in rows if r["address"] == best_match[0]]
if rows_filtered:
return rows_filtered

View file

@ -17,10 +17,6 @@ logger = setup_logger()
class GoogleSolarApi:
NORTH_FACING_AZIMUTH_RANGE = (-30, 30)
# Conservative estimate of the proportion of electricity that will be consumed, whereas the rest will
# be exported
SOLAR_CONSUMPTION_PROPORTION = 0.5
# These are variables, described in the documentation for cost analysis for non-us locations, seen here
# https://developers.google.com/maps/documentation/solar/calculate-costs-non-us
# We use the default figures that the API uses for US locations
@ -152,7 +148,7 @@ class GoogleSolarApi:
# Extract key data from the insights response
self.roof_segments = self.insights_data["solarPotential"].get('roofSegmentStats', [])
# Automatically exclude north-facing segments
self.exclude_north_facing_segments()
self.exclude_north_facing_segments(property_instance=property_instance)
# If a property is semi-detached, it's possible for us to include segments from an attached unit
if (property_instance.data["built-form"] == "Semi-Detached") and (
property_instance.data["extension-count"] == 0
@ -262,7 +258,7 @@ class GoogleSolarApi:
# Remove any north facing roof segments
panel_performance = []
for config in self.insights_data["solarPotential"]["solarPanelConfigs"]:
for config in self.insights_data["solarPotential"].get("solarPanelConfigs", []):
roof_segment_summaries = config["roofSegmentSummaries"]
# Filter on just the segments in self.roof_segment_indexes
roof_segment_summaries = [
@ -295,6 +291,8 @@ class GoogleSolarApi:
)
roi_summary = pd.DataFrame(roi_summary)
if roi_summary.empty:
continue
weighted_ratio = np.average(
roi_summary["ratio"].values, weights=roi_summary["generated_dc_energy"].values
@ -314,13 +312,49 @@ class GoogleSolarApi:
)
panel_performance = pd.DataFrame(panel_performance)
# We can have duplicate configurations
if panel_performance.empty:
self.panel_performance = pd.DataFrame(
columns=[
"n_panels",
"yearly_dc_energy",
"total_cost",
"panneled_roof_area",
"array_wattage",
"initial_ac_kwh_per_year",
"lifetime_ac_kwh",
"roi",
"expected_payback_years",
"lifetime_dc_kwh"
]
)
return
# We can have duplicate configurations
panel_performance = panel_performance.drop_duplicates()
# If we look at the building level, we don't include any projects fewer than 10 panels, otherwise the
# minimum is 4
min_panels = 10 if is_building else 4
panel_performance = panel_performance[panel_performance["n_panels"] >= min_panels]
if panel_performance.empty:
self.panel_performance = pd.DataFrame(
columns=[
"n_panels",
"yearly_dc_energy",
"total_cost",
"panneled_roof_area",
"array_wattage",
"initial_ac_kwh_per_year",
"lifetime_ac_kwh",
"roi",
"expected_payback_years",
"lifetime_dc_kwh"
]
)
return
panel_performance["initial_ac_kwh_per_year"] = panel_performance["yearly_dc_energy"] * self.dc_to_ac_rate
# Remove anything where the total ac energy is less than half of the array wattage
@ -455,7 +489,7 @@ class GoogleSolarApi:
self.panel_performance = panel_performance
def exclude_north_facing_segments(self):
def exclude_north_facing_segments(self, property_instance):
"""
Filter out any north-facing roof segments from the roof_segments attribute.
@ -466,7 +500,9 @@ class GoogleSolarApi:
for segment_index, segment in enumerate(self.roof_segments):
segment["segmentIndex"] = segment_index
# Check if the segment is north-facing
if self.NORTH_FACING_AZIMUTH_RANGE[0] <= segment['azimuthDegrees'] <= self.NORTH_FACING_AZIMUTH_RANGE[1]:
if (
self.NORTH_FACING_AZIMUTH_RANGE[0] <= segment['azimuthDegrees'] <= self.NORTH_FACING_AZIMUTH_RANGE[1]
) and not property_instance.roof["is_flat"]:
continue
filtered_segments.append(segment)

View file

@ -1,3 +1,44 @@
# Assumes that the average efficiency of an air source heat pump is 300%, taking the median of the 200-400% range,
# Assumes that the average efficiency of an air source heat pump is 250%, taking the median of the 200-400% range,
# which is often quoted as a sensible efficiency range for air source heat pumps.
PESSIMISTIC_ASHP_EFFICIENCY = 200
AVERAGE_ASHP_EFFICIENCY = 300
# Conservative estimate of the proportion of electricity that will be consumed, whereas the rest will
# be exported
SOLAR_CONSUMPTION_PROPORTION = 0.5
DESCRIPTIONS_TO_FUEL_TYPES = {
"Air source heat pump, radiators, electric": {
"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100
},
"Boiler and radiators, mains gas": {"fuel": 'Natural Gas', "cop": 0.9},
'Electric storage heaters': {"fuel": 'Electricity', "cop": 1},
"Electric immersion, off-peak": {"fuel": 'Electricity', "cop": 1},
"Electric storage heaters, radiators": {"fuel": 'Electricity', "cop": 1},
"Room heaters, electric": {"fuel": 'Electricity', "cop": 1},
"Electric immersion, standard tariff": {"fuel": 'Electricity', "cop": 1},
"Portable electric heaters assumed for most rooms": {"fuel": 'Electricity', "cop": 1},
"Boiler and radiators, LPG": {"fuel": 'LPG', "cop": 0.9},
"Room heaters, dual fuel (mineral and wood)": {"fuel": 'Wood Logs', "cop": 1},
"Room heaters, mains gas": {"fuel": 'Natural Gas', "cop": 0.9},
"Warm air, mains gas": {"fuel": 'Natural Gas', "cop": 0.9},
"Boiler, mains gas": {"fuel": 'Natural Gas', "cop": 0.9},
"Gas multipoint": {"fuel": "Natural Gas", "cop": 0.9},
"Warm air, Electricaire": {"fuel": "Electricity", "cop": 1},
"Gas boiler/circulator": {"fuel": "Natural Gas", "cop": 0.9},
"Boiler and underfloor heating, mains gas": {"fuel": "Natural Gas", "cop": 0.9},
"No system present: electric heaters assumed": {"fuel": "Electricity", "cop": 1},
"Electric instantaneous at point of use": {"fuel": "Electricity", "cop": 1},
"Boiler and radiators, oil": {"fuel": "Oil", "cop": 0.9},
"Electric storage heaters, Electric storage heaters": {"fuel": "Electricity", "cop": 1},
"Boiler and radiators, electric": {"fuel": "Electricity", "cop": 0.9},
"Gas boiler/circulator, no cylinder thermostat": {"fuel": "Natural Gas", "cop": 0.9},
"Boiler and radiators, dual fuel (mineral and wood)": {"fuel": "Wood Logs", "cop": 0.9},
"Electric immersion, standard tariff, plus solar": {"fuel": "Electricity + Solar Thermal", "cop": 1},
"From main system, flue gas heat recovery": {"fuel": "Natural Gas", "cop": 0.9},
"Electric underfloor heating": {"fuel": "Electricity", "cop": 1},
"No system present: electric immersion assumed": {"fuel": "Electricity", "cop": 1},
"Air source heat pump, underfloor, electric": {
"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100
},
}

View file

@ -30,6 +30,8 @@ class Settings(BaseSettings):
LIGHTING_COST_PREDICTIONS_BUCKET: str
HEATING_COST_PREDICTIONS_BUCKET: str
HOT_WATER_COST_PREDICTIONS_BUCKET: str
HEATING_KWH_PREDICTIONS_BUCKET: str
HOTWATER_KWH_PREDICTIONS_BUCKET: str
class Config:
env_file = "backend/.env"
@ -48,5 +50,7 @@ def get_prediction_buckets():
"carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET,
"lighting_cost_predictions": get_settings().LIGHTING_COST_PREDICTIONS_BUCKET,
"heating_cost_predictions": get_settings().HEATING_COST_PREDICTIONS_BUCKET,
"hot_water_cost_predictions": get_settings().HOT_WATER_COST_PREDICTIONS_BUCKET
"hot_water_cost_predictions": get_settings().HOT_WATER_COST_PREDICTIONS_BUCKET,
"heating_kwh_predictions": get_settings().HEATING_KWH_PREDICTIONS_BUCKET,
"hotwater_kwh_predictions": get_settings().HOTWATER_KWH_PREDICTIONS_BUCKET,
}

View file

@ -3,6 +3,7 @@ import pytz
import datetime
from sqlalchemy import Column, Integer, Text, Boolean, Float, DateTime, Enum, ForeignKey, CheckConstraint
from sqlalchemy.ext.declarative import declarative_base
from backend.app.db.models.users import UserModel # noqa
Base = declarative_base()
@ -168,7 +169,8 @@ class PropertyDetailsEpcModel(Base):
energy_tariff = Column(Text)
primary_energy_consumption = Column(Float)
co2_emissions = Column(Float)
adjusted_energy_consumption = Column(Float)
current_energy_demand = Column(Float)
current_energy_demand_heating_hotwater = Column(Float)
estimated = Column(Boolean, default=False)
@ -204,3 +206,13 @@ class PropertyTargetsModel(Base):
created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
epc = Column(Enum(Epc))
heat_demand = Column(Text)
class PortfolioUsers(Base):
__tablename__ = "portfolioUsers"
id = Column(Integer, primary_key=True, autoincrement=True)
user_id = Column(Integer, ForeignKey('user.id'), nullable=False)
portfolioId = Column(Integer, ForeignKey('portfolio.id'), nullable=False)
role = Column(Text, nullable=False)
created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))

View file

@ -20,7 +20,7 @@ from backend.app.db.functions.property_functions import (
update_or_create_property_spatial_details
)
from backend.app.db.functions.recommendations_functions import (
create_plan, create_plan_recommendations, upload_recommendations, create_scenario
create_plan, upload_recommendations, create_scenario
)
from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn
from backend.app.db.models.portfolio import rating_lookup
@ -30,9 +30,9 @@ from backend.app.plan.utils import get_cleaned
from backend.app.utils import epc_to_sap_lower_bound, sap_to_epc
from backend.ml_models.api import ModelApi
from backend.ml_models.AnnualBillSavings import AnnualBillSavings
from backend.Property import Property
from backend.apis.GoogleSolarApi import GoogleSolarApi
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
from recommendations.optimiser.CostOptimiser import CostOptimiser
from recommendations.optimiser.GainOptimiser import GainOptimiser
@ -42,7 +42,11 @@ from recommendations.Mds import Mds
from utils.logger import setup_logger
from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3
from backend.ml_models.Valuation import PropertyValuation
from etl.bill_savings.EnergyConsumptionModel import EnergyConsumptionModel
from etl.bill_savings.KwhData import KwhData
from etl.spatial.OpenUprnClient import OpenUprnClient
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
logger = setup_logger()
@ -129,8 +133,8 @@ def extract_portfolio_aggregation_data(
[r["energy_cost_savings"] for r in default_recommendations]
)
pre_retrofit_energy_consumption = p.current_adjusted_energy
post_retrofit_energy_consumption = p.current_adjusted_energy - sum(
pre_retrofit_energy_consumption = p.current_energy_consumption
post_retrofit_energy_consumption = p.current_energy_consumption - sum(
[r["kwh_savings"] for r in default_recommendations]
)
@ -244,8 +248,8 @@ def create_epc_records(epc_searcher: SearchEpc, energy_assessment: dict):
# We insert county into the epc, since right now this isn't something that we pull out from the energy
# assessment
epc["county"] = epc_searcher.newest_epc["county"]
epc["constituency"] = epc_searcher.newest_epc["constituency"]
for col in ["county", "constituency", "constituency-label", "local-authority", "local-authority-label"]:
epc[col] = epc_searcher.newest_epc[col]
# We check if the energy assessment is newer than the newest EPC
if pd.to_datetime(energy_assessment_date) > pd.to_datetime(epc_searcher.newest_epc["inspection-date"]):
@ -283,6 +287,80 @@ def create_epc_records(epc_searcher: SearchEpc, energy_assessment: dict):
}, energy_assessment_is_newer
def get_on_site_data(body: PlanTriggerRequest):
"""
This function will read in the on-site data from the S3 bucket
:param body: The request body
:return:
"""
patches = []
if body.patches_file_path:
patches = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.patches_file_path)
already_installed = []
if body.already_installed_file_path:
already_installed = read_csv_from_s3(
bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.already_installed_file_path
)
non_invasive_recommendations = []
if body.non_invasive_recommendations_file_path:
non_invasive_recommendations = read_csv_from_s3(
bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.non_invasive_recommendations_file_path
)
return patches, already_installed, non_invasive_recommendations
def extract_property_on_site_recommendations(config, patches, already_installed, non_invasive_recommendations, uprn):
patch_has_uprn = "uprn" in patches[0] if patches else True
if patch_has_uprn:
patch = next((
x for x in patches if str(x["uprn"]) == str(config["uprn"])
), {})
else:
patch = next((
x for x in patches if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
), {})
property_already_installed = next((
x for x in already_installed if
(x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
), {})
# Because we have some non-invasive recommendations that match on address and postcode, but not UPRN
# we need to check existence of uprn
has_uprn = "uprn" in non_invasive_recommendations[0] if non_invasive_recommendations else True
if has_uprn:
property_non_invasive_recommendations = next((
x for x in non_invasive_recommendations if
(str(x["uprn"]) == str(uprn))
), {})
# We patch the non-invasive recs that are ['cavity_extract_and_refill']
else:
property_non_invasive_recommendations = next((
x for x in non_invasive_recommendations if
(x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
), {})
if isinstance(property_non_invasive_recommendations.get("recommendations"), str):
import ast
property_non_invasive_recommendations["recommendations"] = ast.literal_eval(
property_non_invasive_recommendations["recommendations"]
)
transformed = []
for rec in property_non_invasive_recommendations["recommendations"]:
if isinstance(rec, str):
transformed.append({"type": rec, })
else:
transformed.append(rec)
property_non_invasive_recommendations["recommendations"] = str(transformed)
return patch, property_already_installed, property_non_invasive_recommendations
router = APIRouter(
prefix="/plan",
tags=["plan"],
@ -304,21 +382,7 @@ async def trigger_plan(body: PlanTriggerRequest):
logger.info("Getting the inputs")
plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path)
# If we have patches or overrides, we should read them in here
patches = []
if body.patches_file_path:
patches = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.patches_file_path)
already_installed = []
if body.already_installed_file_path:
already_installed = read_csv_from_s3(
bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.already_installed_file_path
)
non_invasive_recommendations = []
if body.non_invasive_recommendations_file_path:
non_invasive_recommendations = read_csv_from_s3(
bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.non_invasive_recommendations_file_path
)
patches, already_installed, non_invasive_recommendations = get_on_site_data(body)
cleaning_data = read_dataframe_from_s3_parquet(
bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet",
@ -326,7 +390,6 @@ async def trigger_plan(body: PlanTriggerRequest):
input_properties = []
for config in tqdm(plan_input):
# We validate each record in the file. If the record is NOT valid, we need to handle this accordingly
uprn = config.get("uprn", None)
if uprn:
@ -370,9 +433,13 @@ async def trigger_plan(body: PlanTriggerRequest):
epc_records, energy_assessment["energy_assessment_is_newer"] = create_epc_records(
epc_searcher, energy_assessment
)
patch = next((
x for x in patches if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
), {})
patch, property_already_installed, property_non_invasive_recommendations = (
extract_property_on_site_recommendations(
config, patches, already_installed, non_invasive_recommendations, uprn
)
)
epc_records = patch_epc(patch, epc_records)
prepared_epc = EPCRecord(
@ -381,16 +448,6 @@ async def trigger_plan(body: PlanTriggerRequest):
cleaning_data=cleaning_data
)
property_already_installed = next((
x for x in already_installed if
(x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
), {})
property_non_invasive_recommendations = next((
x for x in non_invasive_recommendations if
(x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
), {})
input_properties.append(
Property(
id=property_id,
@ -415,11 +472,6 @@ async def trigger_plan(body: PlanTriggerRequest):
materials = get_materials(session)
cleaned = get_cleaned()
uprn_filenames = read_dataframe_from_s3_parquet(
bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
)
solar_api_client = GoogleSolarApi(api_key=get_settings().GOOGLE_SOLAR_API_KEY)
dataset_version = "2024-07-08"
energy_consumption_client = EnergyConsumptionModel(
model_paths={
@ -432,18 +484,38 @@ async def trigger_plan(body: PlanTriggerRequest):
environment=get_settings().ENVIRONMENT
)
logger.info("Getting spatial data")
for p in input_properties:
p.get_components(cleaned=cleaned, energy_consumption_client=energy_consumption_client)
p.get_spatial_data(uprn_filenames)
kwh_client = KwhData(bucket=get_settings().DATA_BUCKET, read_consumption_data=True)
model_api = ModelApi(
portfolio_id=body.portfolio_id,
timestamp=created_at,
prediction_buckets=get_prediction_buckets()
)
epcs_for_scoring = kwh_client.transform(data=kwh_client.prepare_epc(input_properties), cleaned=cleaned)
kwh_preds = model_api.paginated_predictions(
data=epcs_for_scoring,
bucket=get_settings().DATA_BUCKET,
model_prefixes=["heating_kwh_predictions", "hotwater_kwh_predictions"],
extract_ids=False,
batch_size=SCORING_BATCH_SIZE
)
# Insert the spatial data
logger.info("Getting spatial data")
input_properties = OpenUprnClient.set_spatial_data(input_properties, bucket_name=get_settings().DATA_BUCKET)
[p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=kwh_preds) for p in input_properties]
logger.info("Performing solar analysis")
# TODO: Tidy this up
# TODO: If a property is semi-detached, we might get roof surfaces for the main building + the neighbour
# TODO: If we can't get high image quality, should we use the solar API? Maybe just for semi-detached units with
# extensions, since it doesn't seem to do a great job
# TODO: For simple properties, we should do a comparison/check between the solar API's roof area and the
# basic estimate of roof area
building_ids = [
{
"building_id": p.building_id,
@ -514,6 +586,7 @@ async def trigger_plan(body: PlanTriggerRequest):
energy_consumption = sum(
[entry['energy_consumption'] for entry in building_ids if entry['building_id'] == building_id]
)
solar_api_client = GoogleSolarApi(api_key=get_settings().GOOGLE_SOLAR_API_KEY)
solar_api_client.get(
longitude=coordinates["longitude"],
latitude=coordinates["latitude"],
@ -528,7 +601,8 @@ async def trigger_plan(body: PlanTriggerRequest):
}
# Store the data in the database
# TODO: Rather than just doing a straight insert, we should overwrite what's already there if it exists
# TODO: Rather than just doing a straight insert, we should overwrite what's already there if it
# exists
solar_api_client.save_to_db(
session=session, uprns_to_location=building_uprns[building_id], scenario_type="building"
)
@ -543,15 +617,18 @@ async def trigger_plan(body: PlanTriggerRequest):
energy_consumption
)
p.set_solar_panel_configuration(unit_solar_panel_configuration)
if individual_units:
# Model the solar potential at the property level
for unit in individual_units:
for unit in tqdm(individual_units):
property_instance = [p for p in input_properties if p.id == unit["property_id"]][0]
# At this level, we check if the property is suitable for solar and if now, skip
if not property_instance.is_solar_pv_valid():
continue
# We check if we have a solar non-invasive recommendation
if [r for r in property_instance.non_invasive_recommendations if r["type"] == "solar_pv"]:
continue
solar_api_client = GoogleSolarApi(api_key=get_settings().GOOGLE_SOLAR_API_KEY)
solar_api_client.get(
longitude=unit["longitude"],
latitude=unit["latitude"],
@ -563,7 +640,8 @@ async def trigger_plan(body: PlanTriggerRequest):
)
# Store the data in the database
# TODO: Rather than just doing a straight insert, we should overwrite what's already there if it exists
# TODO: Rather than just doing a straight insert, we should overwrite what's already there if it
# exists
solar_api_client.save_to_db(
session=session,
uprns_to_location=[
@ -585,12 +663,11 @@ async def trigger_plan(body: PlanTriggerRequest):
roof_area=solar_api_client.roof_area
)
logger.info("Getting components and epc recommendations")
logger.info("Identifying property recommendations")
recommendations = {}
recommendations_scoring_data = []
representative_recommendations = {}
for p in tqdm(input_properties):
recommender = Recommendations(property_instance=p, materials=materials, exclusions=body.exclusions)
property_recommendations, property_representative_recommendations = recommender.recommend()
@ -608,7 +685,6 @@ async def trigger_plan(body: PlanTriggerRequest):
recommendations_scoring_data.extend(p.recommendations_scoring_data)
# TODO: Make sure that number_habitable_rooms has been dropped
logger.info("Preparing data for scoring in sap change api")
recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
@ -617,54 +693,69 @@ async def trigger_plan(body: PlanTriggerRequest):
"carbon_ending"]
)
model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
all_predictions = model_api.paginated_predictions(
data=recommendations_scoring_data,
bucket=get_settings().DATA_BUCKET,
batch_size=SCORING_BATCH_SIZE
)
all_predictions = model_api.predictions_template()
to_loop_over = range(0, recommendations_scoring_data.shape[0], SCORING_BATCH_SIZE)
for chunk in tqdm(to_loop_over, total=len(to_loop_over)):
predictions_dict = model_api.predict_all(
df=recommendations_scoring_data.iloc[chunk:chunk + SCORING_BATCH_SIZE],
bucket=get_settings().DATA_BUCKET,
prediction_buckets=get_prediction_buckets()
# Insert the predictions into the recommendations, and get the impact summary
scoring_epcs = [] # For scoring the kwh models
for property_id in recommendations.keys():
property_instance = [p for p in input_properties if p.id == property_id][0]
recommendations_with_impact, impact_summary = (
Recommendations.calculate_recommendation_impact(
property_instance=property_instance,
all_predictions=all_predictions,
recommendations=recommendations,
)
)
# Append the predictions to the predictions dictionary
for key, scored in predictions_dict.items():
all_predictions[key] = pd.concat([all_predictions[key], scored])
# We use the impact_summary to update the simulation_epcs with the new SAP, heat demand, carbon, cost etc
# at each phase
property_instance.update_simulation_epcs(impact_summary)
scoring_epcs.extend(property_instance.updated_simulation_epcs)
recommendations[property_id] = recommendations_with_impact
# We call the API with the scoring epcs
scoring_epcs = pd.DataFrame(scoring_epcs)
scoring_epcs = kwh_client.transform(data=scoring_epcs, cleaned=cleaned)
kwh_simulation_predictions = model_api.paginated_predictions(
data=scoring_epcs,
bucket=get_settings().DATA_BUCKET,
model_prefixes=["heating_kwh_predictions", "hotwater_kwh_predictions"],
batch_size=SCORING_BATCH_SIZE
)
# We now insert kwh estimates and costs into the recommendations
# TODO: We should join the methodology which maps the heating and hot water descriptions to the fuel types in
# Recommendations, but also the Property class
logger.info("Calculating tenant savings - kwh and bills")
for property_id in tqdm([p.id for p in input_properties]):
property_recommendations = recommendations.get(property_id, [])
property_instance = [p for p in input_properties if p.id == property_id][0]
property_current_energy_bill = Recommendations.calculate_recommendation_tenant_savings(
property_instance=property_instance,
kwh_simulation_predictions=kwh_simulation_predictions,
property_recommendations=property_recommendations
)
property_instance.current_energy_bill = property_current_energy_bill
# Insert the predictions into the recommendations and run the optimiser
# TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a
# possibility with heating system
# TODO: After optimising, if there are any cheap, quick win measures (e.g. insulate water tank with hot water
# cylinder jacket), we should add these to the recommendations as default
logger.info("Optimising recommendations")
for property_id in recommendations.keys():
property_instance = [p for p in input_properties if p.id == property_id][0]
for p in input_properties:
if not recommendations.get(p.id):
continue
input_measures = prepare_input_measures(recommendations[p.id], body.goal)
(
recommendations_with_impact,
expected_adjusted_energy,
expected_energy_bill
) = (
Recommendations.calculate_recommendation_impact(
property_instance=property_instance,
all_predictions=all_predictions,
recommendations=recommendations,
representative_recommendations=representative_recommendations,
energy_consumption_client=energy_consumption_client
)
)
# Store the resulting adjusted energy in the property instance
property_instance.set_adjusted_energy(
expected_adjusted_energy=expected_adjusted_energy,
expected_energy_bill=expected_energy_bill
)
input_measures = prepare_input_measures(recommendations_with_impact, body.goal)
current_sap_points = int(property_instance.data["current-energy-efficiency"])
current_sap_points = int(p.data["current-energy-efficiency"])
target_sap_points = epc_to_sap_lower_bound(body.goal_value)
sap_gain = CostOptimiser.calculate_sap_gain_with_slack(target_sap_points - current_sap_points)
@ -691,7 +782,7 @@ async def trigger_plan(body: PlanTriggerRequest):
"internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation"
]):
ventilation_rec = next(
(r[0] for r in recommendations_with_impact if r[0]["type"] == "mechanical_ventilation"),
(r[0] for r in recommendations[p.id] if r[0]["type"] == "mechanical_ventilation"),
None
)
@ -705,29 +796,14 @@ async def trigger_plan(body: PlanTriggerRequest):
{**rec, "default": True if rec["recommendation_id"] in selected_recommendations else False}
for rec in recommendations_by_type
]
for recommendations_by_type in recommendations_with_impact
for recommendations_by_type in recommendations[p.id]
]
# We'll also unlist the recommendations so they're a bit easier to handle from here onwards
final_recommendations = [
rec for recommendations_by_type in final_recommendations for rec in recommendations_by_type
]
recommendations[property_id] = final_recommendations
# df = []
# for rec in recommendations[list(recommendations.keys())[0]]:
# df.append(
# {
# "id": rec["recommendation_id"],
# "description": rec["description"],
# "sap": rec["sap_points"],
# }
# )
# df = pd.DataFrame(df)
# 1) the property data
# 2) the property details (epc)
# 3) the recommendations
recommendations[p.id] = final_recommendations
logger.info("Uploading recommendations to the database")
# If we have any work to do, we create a new scenario
@ -1001,7 +1077,7 @@ async def build_mds(body: MdsRequest):
recommendations = {}
for p in tqdm(input_properties):
p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
p.set_features(cleaned, photo_supply_lookup, floor_area_decile_thresholds)
mds = Mds(property_instance=p, materials=materials, optimise_measures=optimise_measures)
mds_recommendations, property_representative_recommendations, errors = mds.build()
@ -1050,7 +1126,9 @@ async def build_mds(body: MdsRequest):
"carbon_ending"]
)
model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
model_api = ModelApi(
portfolio_id=body.portfolio_id, timestamp=created_at, prediction_buckets=get_prediction_buckets()
)
all_predictions = {
"sap_change_predictions": pd.DataFrame(),
@ -1061,12 +1139,6 @@ async def build_mds(body: MdsRequest):
for chunk in tqdm(to_loop_over, total=len(to_loop_over)):
predictions_dict = model_api.predict_all(
df=recommendations_scoring_data.iloc[chunk:chunk + SCORING_BATCH_SIZE],
bucket=get_settings().DATA_BUCKET,
prediction_buckets={
"sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET,
"heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET,
"carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET
}
)
# Append the predictions to the predictions dictionary

View file

@ -33,6 +33,11 @@ class PlanTriggerRequest(BaseModel):
"solar_pv",
# Specific measures
"air_source_heat_pump",
"internal_wall_insulation",
"external_wall_insulation",
"secondary_heating",
"boiler_upgrade",
"high_heat_retention_storage_heater",
}
_allowed_goals = {"Increasing EPC"}

View file

@ -1,4 +1,6 @@
import numpy as np
import pandas as pd
import backend.app.assumptions as assumptions
QUARTERLY_ENERGY_PRICES = [
# 2024 Q1
@ -40,6 +42,53 @@ class AnnualBillSavings:
DAILY_STANDARD_CHARGE_GAS = 0.3143
DAILY_STANDARD_CHARGE_ELECTRICITY = 0.601
# Based on https://www.nottenergy.com/advice-and-tools/project-energy-cost-comparison
# For July 2024. These quotes are based on the east midlands region, so we
FUEL_DATA = pd.DataFrame([
{"Fuel": "Electricity Standard", "Price (p)": 28.58, "Unit": "kWh", "Boiler Efficiency (%)": 100,
"Energy Content, Net Calorific value (kWh/unit)": 1.00, "Price per kWh (p) (inc boiler efficiency)": 28.58,
"CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.275},
{"Fuel": "Mains Gas Standard", "Price (p)": 6.31, "Unit": "kWh", "Boiler Efficiency (%)": 90,
"Energy Content, Net Calorific value (kWh/unit)": 1.00, "Price per kWh (p) (inc boiler efficiency)": 7.01,
"CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.213},
{"Fuel": "Kerosene", "Price (p)": 62.49, "Unit": "Litre", "Boiler Efficiency (%)": 90,
"Energy Content, Net Calorific value (kWh/unit)": 9.79, "Price per kWh (p) (inc boiler efficiency)": 7.09,
"CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.298},
{"Fuel": "Gas oil", "Price (p)": 94.50, "Unit": "Litre", "Boiler Efficiency (%)": 90,
"Energy Content, Net Calorific value (kWh/unit)": 9.96, "Price per kWh (p) (inc boiler efficiency)": 10.54,
"CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.316},
{"Fuel": "LPG", "Price (p)": 55.00, "Unit": "Litre", "Boiler Efficiency (%)": 90,
"Energy Content, Net Calorific value (kWh/unit)": 6.78, "Price per kWh (p) (inc boiler efficiency)": 9.01,
"CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.240},
{"Fuel": "Butane", "Price (p)": 216.58, "Unit": "Litre", "Boiler Efficiency (%)": 90,
"Energy Content, Net Calorific value (kWh/unit)": 6.64, "Price per kWh (p) (inc boiler efficiency)": 36.24,
"CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.248},
{"Fuel": "Propane", "Price (p)": 157.67, "Unit": "Litre", "Boiler Efficiency (%)": 90,
"Energy Content, Net Calorific value (kWh/unit)": 7.22, "Price per kWh (p) (inc boiler efficiency)": 24.25,
"CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.239},
{"Fuel": "Kiln Dried (logs)", "Price (p)": 36.52, "Unit": "kg", "Boiler Efficiency (%)": 85,
"Energy Content, Net Calorific value (kWh/unit)": 4.09, "Price per kWh (p) (inc boiler efficiency)": 10.51,
"CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.024},
{"Fuel": "Pellets (Bagged)", "Price (p)": 39.62, "Unit": "kg", "Boiler Efficiency (%)": 90,
"Energy Content, Net Calorific value (kWh/unit)": 4.80, "Price per kWh (p) (inc boiler efficiency)": 9.17,
"CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.049},
{"Fuel": "Pellets (Blown bulk)", "Price (p)": 33.92, "Unit": "kg", "Boiler Efficiency (%)": 90,
"Energy Content, Net Calorific value (kWh/unit)": 4.80, "Price per kWh (p) (inc boiler efficiency)": 7.85,
"CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.049},
{"Fuel": "Smokeless fuel", "Price (p)": 67.26, "Unit": "kg", "Boiler Efficiency (%)": 75,
"Energy Content, Net Calorific value (kWh/unit)": 6.70, "Price per kWh (p) (inc boiler efficiency)": 13.38,
"CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.404},
{"Fuel": "Coal", "Price (p)": 48.50, "Unit": "kg", "Boiler Efficiency (%)": 75,
"Energy Content, Net Calorific value (kWh/unit)": 7.95, "Price per kWh (p) (inc boiler efficiency)": 8.13,
"CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.404},
{"Fuel": "GSHP", "Price (p)": 28.58, "Unit": "kWh", "Boiler Efficiency (%)": 350,
"Energy Content, Net Calorific value (kWh/unit)": 1.00, "Price per kWh (p) (inc boiler efficiency)": 8.17,
"CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.079},
{"Fuel": "ASHP", "Price (p)": 28.58, "Unit": "kWh", "Boiler Efficiency (%)": 294,
"Energy Content, Net Calorific value (kWh/unit)": 1.00, "Price per kWh (p) (inc boiler efficiency)": 9.72,
"CO2eq emission factor kgCO2eq/kWh (Gross CV)": 0.094}
])
EPC_BANDS = ["G", "F", "E", "D", "C", "B", "A"]
@classmethod
@ -199,3 +248,75 @@ class AnnualBillSavings:
return current_epc_rating
return cls.EPC_BANDS[expected_index - 1]
@staticmethod
def cost_per_kwh(price_per_unit, energy_content_per_unit):
"""
Calculate the cost of fuel per kWh given the price per unit in GBP and the energy content per unit in kWh.
"""
cost_per_kwh = price_per_unit / energy_content_per_unit
# Tgis data is returned in pennies so we convert to pounds
return cost_per_kwh / 100
@classmethod
def calculate_recommendation_fuel_cost(cls, kwh, fuel, cop):
if fuel == "Electricity":
return (kwh / cop) * cls.ELECTRICITY_PRICE_CAP
if fuel in ["Natural Gas", "Natural Gas (Community Scheme)"]:
return (kwh / cop) * cls.GAS_PRICE_CAP
if fuel == "LPG":
# Get the cost per kwh
price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "LPG"].squeeze()
cost_per_kwh = cls.cost_per_kwh(
price_data["Price (p)"], price_data["Energy Content, Net Calorific value (kWh/unit)"]
)
return (kwh / cop) * cost_per_kwh
if fuel in ["Wood Logs", "Wood Pellets"]:
price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "Pellets (Bagged)"].squeeze()
cost_per_kwh = cls.cost_per_kwh(
price_data["Price (p)"], price_data["Energy Content, Net Calorific value (kWh/unit)"]
)
return (kwh / cop) * cost_per_kwh
if fuel == "Natural Gas + Solar Thermal":
# The solar thermal covers a % of the heating kwh, so we need to adjust the cost
return (kwh / cop) * assumptions.SOLAR_CONSUMPTION_PROPORTION * cls.GAS_PRICE_CAP
if fuel == "Electricity + Solar Thermal":
# The solar thermal covers a % of the heating kwh, so we need to adjust the cost
return (kwh / cop) * assumptions.SOLAR_CONSUMPTION_PROPORTION * cls.ELECTRICITY_PRICE_CAP
if fuel == "LPG + Solar Thermal":
# The solar thermal covers a % of the heating kwh, so we need to adjust the cost
price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "LPG"].squeeze()
cost_per_kwh = cls.cost_per_kwh(
price_data["Price (p)"], price_data["Energy Content, Net Calorific value (kWh/unit)"]
)
return (kwh / cop) * cost_per_kwh * assumptions.SOLAR_CONSUMPTION_PROPORTION
if fuel == "Oil":
price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "Kerosene"].squeeze()
cost_per_kwh = cls.cost_per_kwh(
price_data["Price (p)"], price_data["Energy Content, Net Calorific value (kWh/unit)"]
)
return (kwh / cop) * cost_per_kwh
if fuel in ["Smokeless Fuel", "Anthracite"]:
price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "Smokeless fuel"].squeeze()
cost_per_kwh = cls.cost_per_kwh(
price_data["Price (p)"], price_data["Energy Content, Net Calorific value (kWh/unit)"]
)
return (kwh / cop) * cost_per_kwh
# We use coal's values for
if fuel == "Coal":
price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "Coal"].squeeze()
cost_per_kwh = cls.cost_per_kwh(
price_data["Price (p)"], price_data["Energy Content, Net Calorific value (kWh/unit)"]
)
return (kwh / cop) * cost_per_kwh
raise Exception("Fuel not recognised")

View file

@ -1,4 +1,5 @@
import pandas as pd
from tqdm import tqdm
import requests
from requests.exceptions import RequestException
from utils.logger import setup_logger
@ -12,24 +13,27 @@ class ModelApi:
"sap_change_predictions",
"heat_demand_predictions",
"carbon_change_predictions",
"lighting_cost_predictions",
"heating_cost_predictions",
"hot_water_cost_predictions",
# "lighting_cost_predictions",
# "heating_cost_predictions",
# "hot_water_cost_predictions",
]
MODEL_URLS = {
"sap_change_predictions": "sapmodel",
"heat_demand_predictions": "heatmodel",
"carbon_change_predictions": "carbonmodel",
"lighting_cost_predictions": "lightingmodel",
"heating_cost_predictions": "heatingmodel",
"hot_water_cost_predictions": "hotwatermodel",
"hotwater_kwh_predictions": "hotwaterkwhmodel",
"heating_kwh_predictions": "heatingkwhmodel",
# "lighting_cost_predictions": "lightingmodel",
# "heating_cost_predictions": "heatingmodel",
# "hot_water_cost_predictions": "hotwatermodel",
}
def __init__(
self,
portfolio_id,
timestamp,
prediction_buckets,
base_url="https://api.dev.hestia.homes",
):
"""
@ -44,6 +48,7 @@ class ModelApi:
self.base_url = base_url
self.portfolio_id = portfolio_id
self.timestamp = timestamp
self.prediction_buckets = prediction_buckets
@staticmethod
def predictions_template():
@ -51,9 +56,8 @@ class ModelApi:
"sap_change_predictions": pd.DataFrame(),
"heat_demand_predictions": pd.DataFrame(),
"carbon_change_predictions": pd.DataFrame(),
"lighting_cost_predictions": pd.DataFrame(),
"heating_cost_predictions": pd.DataFrame(),
"hot_water_cost_predictions": pd.DataFrame(),
"hotwater_kwh_predictions": pd.DataFrame(),
"heating_kwh_predictions": pd.DataFrame(),
}
def upload_scoring_data(self, df: pd.DataFrame, bucket: str, model_prefix: str) -> str:
@ -68,8 +72,8 @@ class ModelApi:
:return:
"""
if model_prefix not in self.MODEL_PREFIXES:
raise ValueError(f"Model prefix specified is not in {self.MODEL_PREFIXES}")
# if model_prefix not in self.MODEL_PREFIXES:
# raise ValueError(f"Model prefix specified is not in {self.MODEL_PREFIXES}")
# Store parquet file in s3 for scoring
file_location = f"{model_prefix}/{self.portfolio_id}/{self.timestamp}.parquet"
@ -123,7 +127,7 @@ class ModelApi:
else:
return None
def predict_all(self, df, bucket, prediction_buckets) -> dict:
def predict_all(self, df, bucket, model_prefixes=None, extract_ids=True) -> dict:
"""
For each model prefix, this method will upload the scoring data to s3 and then make a request to the
@ -132,19 +136,24 @@ class ModelApi:
a dictionary of panaas dataframes
:param df: Pandas dataframe with scoring data to be uploaded to s3
:param bucket: Name of the bucket in s3 to upload to
:param prediction_buckets: Dictionary containing the prediction buckets for each model prefix
:param model_prefixes: List of model prefixes to generate predictions for. If None, all model prefixes will be
used
:param extract_ids: Boolean to determine if the property_id and recommendation_id should be extracted from the
id column
:return:
"""
model_prefixes = self.MODEL_PREFIXES if model_prefixes is None else model_prefixes
predictions = {}
for model_prefix in self.MODEL_PREFIXES:
for model_prefix in model_prefixes:
logger.info(f"Scoring for model prefix: {model_prefix}")
file_location = self.upload_scoring_data(df, bucket, model_prefix)
response = self.predict(
"s3://{DATA_BUCKET}/".format(DATA_BUCKET=bucket) + file_location, model_prefix
)
predictions_bucket = prediction_buckets[model_prefix]
predictions_bucket = self.prediction_buckets[model_prefix]
# Retrieve the predictions
predictions_df = pd.DataFrame(
@ -155,16 +164,35 @@ class ModelApi:
)
predictions_df['predictions'] = predictions_df["predictions"].astype(float).round(1)
predictions_df[['property_id', 'recommendation_id']] = predictions_df['id'].str.split('+', expand=True)
# To grab the phase, we pull the integer after "phase=" in the recommendation_id. We can do this with a
# string split on phase= and then grab the second element of the resulting list. We could also use a
# regular expression to do this but we use the string split method here, for safety.
# We may not always have a phase to split on, so we need to handle this case. We can do this by using the
# str[1] method to grab the second element of the resulting list. We then grab the first character of this
# string to get the phase. We then convert this to an integer.
# Convert back to int
predictions_df['phase'] = predictions_df['recommendation_id'].apply(self.extract_phase)
if extract_ids:
predictions_df[['property_id', 'recommendation_id']] = predictions_df['id'].str.split('+', expand=True)
# To grab the phase, we pull the integer after "phase=" in the recommendation_id. We can do this with a
# string split on phase= and then grab the second element of the resulting list. We could also use a
# regular expression to do this but we use the string split method here, for safety.
# We may not always have a phase to split on, so we need to handle this case. We can do this by using
# the str[1] method to grab the second element of the resulting list. We then grab the first
# character of this
# string to get the phase. We then convert this to an integer.
# Convert back to int
predictions_df['phase'] = predictions_df['recommendation_id'].apply(self.extract_phase)
predictions[model_prefix] = predictions_df
return predictions
def paginated_predictions(self, data, bucket, batch_size, model_prefixes=None, extract_ids=True):
all_predictions = self.predictions_template()
to_loop_over = range(0, data.shape[0], batch_size)
for chunk in tqdm(to_loop_over, total=len(to_loop_over)):
predictions_dict = self.predict_all(
df=data.iloc[chunk:chunk + batch_size],
bucket=bucket,
model_prefixes=model_prefixes,
extract_ids=extract_ids
)
# Append the predictions to the predictions dictionary
for key, scored in predictions_dict.items():
all_predictions[key] = pd.concat([all_predictions[key], scored])
return all_predictions

View file

@ -6,6 +6,7 @@ from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percenta
from sklearn.feature_selection import RFECV
from utils.s3 import save_pickle_to_s3, read_pickle_from_s3, read_dataframe_from_s3_parquet, read_csv_from_s3
from utils.logger import setup_logger
from backend.Property import Property
logger = setup_logger()
@ -125,37 +126,6 @@ class EnergyConsumptionModel:
self.retail_price_comparison = pd.DataFrame(data_rows, columns=header)
self.retail_price_comparison['Date'] = pd.to_datetime(self.retail_price_comparison['Date'], errors='coerce')
def convert_cost_to_today(self, original_cost, lodgement_date):
"""
Given energy costs in an EPC, this function converts that energy cost to a figure based on today's energy costs
(or as close to today as possible)
:param original_cost: The original energy cost
:param lodgement_date: The date the EPC was lodged
:return:
"""
closest_date = self.retail_price_comparison.iloc[
(self.retail_price_comparison['Date'] - lodgement_date).abs().argsort()[:1]
]['Date'].values[0]
closest_date = pd.Timestamp(closest_date)
# Extract the tariff price on the closest date
tariff_2024 = self.retail_price_comparison[
self.retail_price_comparison['Date'] == closest_date
]['Average standard variable tariff (Large legacy suppliers)'].values[0]
# Extract the latest available tariff price
latest_tariff = self.retail_price_comparison[
'Average standard variable tariff (Large legacy suppliers)'
].iloc[-1]
# Calculate the ratio
ratio = float(latest_tariff) / float(tariff_2024)
# Calculate the updated heating cost
updated_cost = original_cost * ratio
return updated_cost
def read_dataset(self, file_path):
"""Reads the dataset from the specified file path."""
logger.info(f"Reading dataset from {file_path}")
@ -525,7 +495,7 @@ class EnergyConsumptionModel:
def estimate_new_consumption(self, current_energy_efficiency, target_efficiency, current_consumption):
"""
Given then consumption_averages dataset, which is produced as a result of the data_combining.py script,
Given then consumption_averages dataset, which is produced as a result of the training_data.py script,
for the energy kwh models, this function will estimate the new consumption based on the current consumption,
based on the expected reduction in consumption from the current rating to the target rating.
:param current_energy_efficiency:

363
etl/bill_savings/KwhData.py Normal file
View file

@ -0,0 +1,363 @@
import re
import pandas as pd
import numpy as np
from datetime import datetime
from tqdm import tqdm
from utils.logger import setup_logger
from utils.s3 import (
list_files_in_s3_folder, read_pickle_from_s3, save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet,
read_csv_from_s3
)
from backend.Property import Property
logger = setup_logger()
class KwhData:
COLS_TO_STRINGIFY = ["main-heating-controls", "floor-level"]
CATEGORICAL_COLUMNS = [
"lodgement-year", "lodgement-month", "main-fuel", "mainheat-description", "number-heated-rooms",
"number-habitable-rooms", "mainheat-energy-eff", "mainheatcont-description", "property-type",
"built-form",
"construction-age-band", "secondheat-description", "hotwater-description", "hot-water-energy-eff",
"walls-description", "walls-energy-eff", "roof-description", "roof-energy-eff", "floor-description",
"county",
"windows-description", "windows-energy-eff", "flat-top-storey",
"flat-storey-count", "unheated-corridor-length", "solar-water-heating-flag", "mechanical-ventilation",
"low-energy-lighting", "environment-impact-current", "energy-tariff", "current-energy-rating",
"floor-level"
]
NUMERICAL_COLUMNS = [
'heating-cost-current', 'total-floor-area', 'co2-emissions-current', 'energy-consumption-current',
'heating-cost-potential', 'hot-water-cost-current', 'current-energy-efficiency'
]
def __init__(self, bucket=None, read_consumption_data=False):
self.run_date = datetime.now().strftime("%Y-%m-%d")
self.bucket = bucket
self.data = None
self.consumption_data_filepath = None
self.consumption_averages_filepath = None
self.model_training_data_filepath = None
self.consumption_averages = None
self.retail_price_comparison = None
if read_consumption_data:
self.get_consumption_data()
self.read_retail_price_comparison()
def get_consumption_data(self):
# Look for the latest version of this file
s3_contents = list_files_in_s3_folder(bucket_name=self.bucket, folder_name="energy_consumption/")
consumption_averages = [
{"run_date": pd.to_datetime(x.split("/")[1]), "filepath": x}
for x in s3_contents if "consumption_averages.parquet" in x
]
# Get the file with the soonest run date
consumption_averages = sorted(consumption_averages, key=lambda x: x["run_date"])
if not consumption_averages:
raise ValueError("No consumption averages data found, something went wrong")
self.consumption_averages = read_dataframe_from_s3_parquet(
bucket_name=self.bucket,
file_key=consumption_averages[-1]["filepath"]
)
def read_retail_price_comparison(self):
data = read_csv_from_s3(
bucket_name=self.bucket,
filepath="energy_consumption/retail-price-comparison.csv"
)
header = ['Date', 'Average standard variable tariff (Large legacy suppliers)',
'Average standard variable tariff (Other suppliers)', 'Average fixed tariff',
'Cheapest tariff (Large legacy suppliers)', 'Cheapest tariff (All suppliers)',
'Cheapest tariff (Basket)', 'Default tariff cap level']
# Extract data rows
data_rows = []
for row in data[1:]:
date = row['\ufeff"']
values = row[None]
data_rows.append([date] + values)
self.retail_price_comparison = pd.DataFrame(data_rows, columns=header)
self.retail_price_comparison['Date'] = pd.to_datetime(self.retail_price_comparison['Date'], errors='coerce')
@staticmethod
def extract_kwh_value(text: str):
"""
Extract the numerical kWh value from a given string.
:param text: The input string containing the kWh value.
:return: The extracted numerical kWh value as an integer.
"""
# Use regular expression to find the numerical value followed by "kWh per year"
match = re.search(r'([\d,]+) kWh per year', text)
if match:
# Remove commas from the extracted value and convert to integer
kwh_value = int(match.group(1).replace(',', ''))
return kwh_value
else:
# If no match is found, return None or raise an exception
return None
def combine(self):
"""
Given the data that is collected containing the kwh values for heating and hot water, this method will combine
and save the data
:return:
"""
# Firstly, list all of the saved files in s3
data_files = list_files_in_s3_folder(bucket_name="retrofit-datalake-dev", folder_name="energy_consumption_data")
complete_data = []
for files in tqdm(data_files):
dataset_run_date = files.split("/")[-1].split(".")[0]
# Extract the date from the file name
dataset_run_date = pd.Timestamp(dataset_run_date)
# Load the data from the file
data = read_pickle_from_s3(bucket_name="retrofit-datalake-dev", s3_file_name=files)
# We check that the retrieved energy consumption sufficiently matches the EPC data
internal_dataset = []
for x in data:
epc_data = x["epc"]
epc_sap = epc_data["current-energy-efficiency"]
epc_potential_sap = epc_data["potential-energy-efficiency"]
# Make sure this matches the extracted sap
if int(epc_sap) != int(x["current_epc_efficiency"]) or int(epc_potential_sap) != int(
x["potential_epc_efficiency"]
):
continue
heating_kwh = self.extract_kwh_value(x["heating_text"])
hot_water_kwh = self.extract_kwh_value(x["hot_water_text"])
internal_dataset.append(
{
**epc_data,
"heating_kwh": heating_kwh,
"hot_water_kwh": hot_water_kwh,
"dataset_run_date": dataset_run_date
}
)
complete_data.extend(internal_dataset)
df = pd.DataFrame(complete_data)
# Because we collate multiple runs into a single data source, it's possible that we have duplicated data at
# the uprn level, so we dedupe based on the newest dataset_run_date
df = df.sort_values("dataset_run_date", ascending=False).drop_duplicates(subset="uprn", keep="first")
df = df.drop(columns=["dataset_run_date"])
for col in self.COLS_TO_STRINGIFY:
df[col] = df[col].astype(str)
# Save the data back to s3, but this time as a parquet file
self.consumption_data_filepath = f"energy_consumption/{self.run_date}/energy_consumption_dataset.parquet"
logger.info(f"Storing energy consumption dataset in s3 at {self.consumption_data_filepath}")
save_dataframe_to_s3_parquet(
bucket_name=self.bucket,
file_key=self.consumption_data_filepath,
df=df
)
# We also estimate the energy consumption reduction from this data, by band
df["total_consumption"] = df["heating_kwh"] + df["hot_water_kwh"]
consumption_averages = df.groupby("current-energy-efficiency")["total_consumption"].mean().reset_index()
df = df.drop(columns=["total_consumption"])
self.consumption_averages_filepath = f"energy_consumption/{self.run_date}/consumption_averages.parquet"
logger.info(f"Storing consumption averages in s3 at {self.consumption_averages_filepath}")
# Save the consumption averages back to s3
save_dataframe_to_s3_parquet(
bucket_name="retrofit-data-dev",
file_key=self.consumption_averages_filepath,
df=consumption_averages
)
self.data = df
def transform(
self, data: pd.DataFrame, cleaned, new=False, save=False
):
"""
Given the input EPCs, this method will transform the data into a format that can be used by the model
This method can be used to transform the training data, or new epcs within the backend engine
:return:
"""
if save and self.bucket is None:
raise Exception("bucket not set, cannot save data")
# TODO: New is a temporary parameter, which will transform the epc descriptions to their transformed features
# in anticipation of the new model
data["lodgement-date"] = pd.to_datetime(data["lodgement-date"])
data["lodgement-year"] = data["lodgement-date"].dt.year
data["lodgement-month"] = data["lodgement-date"].dt.month
# For walls, roof, floor description where we have average thermal transmittance, to avoid too many
# categories
# we group them
ranges = {
"lessthan 0.1": (0, 0.1),
"0.1 - 0.3": (0.1, 0.3),
"0.3 - 0.5": (0.3, 0.5),
"morethan 0.5": (0.5, 2.5),
}
# Generate the lookup table
thermal_transmittance_lookup_table = []
for i in range(1, 251):
value = i / 100
for label, (low, high) in ranges.items():
if low < value <= high:
thermal_transmittance_lookup_table.append({"from": value, "to": label})
break
# Convert to DataFrame for display
thermal_transmittance_lookup_table = pd.DataFrame(thermal_transmittance_lookup_table)
thermal_transmittance_lookup_table["from"] = thermal_transmittance_lookup_table["from"].astype(str)
# Apply the lookup table to the data
for feature in ["walls-description", "roof-description", "floor-description"]:
cleaned_df = pd.DataFrame(cleaned[feature])[["original_description", "thermal_transmittance"]]
# Round to 2 decimal places and convert to string
cleaned_df["thermal_transmittance"] = cleaned_df["thermal_transmittance"].round(2).astype(str)
data = data.merge(
cleaned_df,
how="left",
left_on=feature,
right_on="original_description",
)
# We now have the thermal transmittance in the data, which we can use to group with the lookup table
data = data.merge(
thermal_transmittance_lookup_table,
how="left",
left_on="thermal_transmittance",
right_on="from",
)
# Where "to" is populated, replace feature with to
data[feature] = np.where(
~pd.isnull(data["to"]),
data["to"],
data[feature]
)
data = data.drop(columns=["original_description", "thermal_transmittance", "from", "to"])
data[self.NUMERICAL_COLUMNS] = data[self.NUMERICAL_COLUMNS].apply(pd.to_numeric)
data[self.CATEGORICAL_COLUMNS] = data[self.CATEGORICAL_COLUMNS].astype(str)
# Create new features:
data['estimate_annual_kwh'] = data['energy-consumption-current'] * data['total-floor-area']
if save:
self.model_training_data_filepath = f"energy_consumption/{self.run_date}/training_data.parquet"
logger.info(f"Storing energy consumption dataset in s3 at {self.consumption_data_filepath}")
save_dataframe_to_s3_parquet(
bucket_name=self.bucket,
file_key=self.model_training_data_filepath,
df=data
)
return
return data
@staticmethod
def _prepare_epc(p: Property):
"""
Given an instance of the property class, this method will ensure that the EPC is ready for scoring with the
kwh models. In the backend, we perform some cleaning and transformation on an EPC so we just ensure that the
data is in the format required by the model
:return:
"""
epc = p.data.copy()
numeric_cols = [
'current-energy-efficiency',
'potential-energy-efficiency', 'environment-impact-current',
'environment-impact-potential', 'energy-consumption-current',
'energy-consumption-potential', 'co2-emissions-current',
'co2-emiss-curr-per-floor-area', 'co2-emissions-potential',
'lighting-cost-current', 'lighting-cost-potential',
'heating-cost-current', 'heating-cost-potential',
'hot-water-cost-current', 'hot-water-cost-potential',
'total-floor-area', 'multi-glaze-proportion',
'extension-count', 'number-habitable-rooms', 'number-heated-rooms',
'low-energy-lighting', 'number-open-fireplaces',
'wind-turbine-count', 'unheated-corridor-length',
'floor-height', 'photo-supply', 'fixed-lighting-outlets-count',
'low-energy-fixed-light-count',
]
for v in numeric_cols:
if epc[v] is not None:
epc[v] = float(epc[v])
bools_to_remap = ['mains-gas-flag', 'flat-top-storey']
bool_map = {
True: "Y",
False: "N",
None: "N",
"Y": "Y",
"N": "N"
}
for v in bools_to_remap:
epc[v] = bool_map[epc[v]]
no_data = {
"floor-level": "NODATA!",
"floor-energy-eff": "NO DATA!"
}
for v, fill_val in no_data.items():
if pd.isnull(epc[v]):
epc[v] = fill_val
return epc
def prepare_epc(self, input_properties: list[Property]):
scoring_data = pd.DataFrame([self._prepare_epc(p) for p in input_properties])
scoring_data["lodgement-year"] = pd.to_datetime(scoring_data["lodgement-date"]).dt.year
scoring_data["lodgement-month"] = pd.to_datetime(scoring_data["lodgement-date"]).dt.month
scoring_data["id"] = scoring_data["uprn"].copy()
return scoring_data
def convert_cost_to_today(self, original_cost, lodgement_date):
"""
Given energy costs in an EPC, this function converts that energy cost to a figure based on today's energy costs
(or as close to today as possible)
:param original_cost: The original energy cost
:param lodgement_date: The date the EPC was lodged
:return:
"""
closest_date = self.retail_price_comparison.iloc[
(self.retail_price_comparison['Date'] - lodgement_date).abs().argsort()[:1]
]['Date'].values[0]
closest_date = pd.Timestamp(closest_date)
# Extract the tariff price on the closest date
tariff_2024 = self.retail_price_comparison[
self.retail_price_comparison['Date'] == closest_date
]['Average standard variable tariff (Large legacy suppliers)'].values[0]
# Extract the latest available tariff price
latest_tariff = self.retail_price_comparison[
'Average standard variable tariff (Large legacy suppliers)'
].iloc[-1]
# Calculate the ratio
ratio = float(latest_tariff) / float(tariff_2024)
# Calculate the updated heating cost
updated_cost = original_cost * ratio
return updated_cost

View file

@ -131,53 +131,57 @@ def app():
sample_size = 500
energy_consumption_data = []
cavity_walls_data = []
for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)):
# Skip the first 50
# if i < 57:
# continue
data = pd.read_csv(directory / "certificates.csv", low_memory=False)
# Rename the columns to the same format as the api returns
data.columns = [c.replace("_", "-").lower() for c in data.columns]
# Take just date before the date threshold
data = data[data["lodgement-date"] >= EARLIEST_EPC_DATE]
data = data[~pd.isnull(data["uprn"])]
# Take just the newest EPC per uprn, based on lodgement-date
data = data.sort_values("lodgement-date", ascending=False).drop_duplicates("uprn")
data = data.sample(sample_size, replace=False)
# We use the addreess data to find the related information
collected_data = []
for _, property_data in data.iterrows():
time.sleep(np.random.uniform(0.2, 1.5))
uprn = int(property_data["uprn"])
address = property_data["address1"]
postcode = property_data["postcode"]
expected_expiry_date = calculate_expiry_date(property_data["lodgement-date"])
response = retrieve_find_my_epc_data(
uprn=uprn,
postcode=postcode,
address=address,
expected_expiry_date=expected_expiry_date
)
if response is None:
try:
# Skip the first 50
if i < 256:
continue
collected_data.append(
{
**response,
"epc": property_data.to_dict(),
"epc_directory": str(directory)
}
)
energy_consumption_data.extend(collected_data)
data = pd.read_csv(directory / "certificates.csv", low_memory=False)
# Rename the columns to the same format as the api returns
data.columns = [c.replace("_", "-").lower() for c in data.columns]
# Take just date before the date threshold
data = data[data["lodgement-date"] >= EARLIEST_EPC_DATE]
data = data[~pd.isnull(data["uprn"])]
# Take just the newest EPC per uprn, based on lodgement-date
data = data.sort_values("lodgement-date", ascending=False).drop_duplicates("uprn")
data = data.sample(sample_size, replace=False)
# We use the addreess data to find the related information
collected_data = []
for _, property_data in data.iterrows():
time.sleep(np.random.uniform(0.2, 1.5))
uprn = int(property_data["uprn"])
address = property_data["address1"]
postcode = property_data["postcode"]
expected_expiry_date = calculate_expiry_date(property_data["lodgement-date"])
response = retrieve_find_my_epc_data(
uprn=uprn,
postcode=postcode,
address=address,
expected_expiry_date=expected_expiry_date
)
if response is None:
continue
collected_data.append(
{
**response,
"epc": property_data.to_dict(),
"epc_directory": str(directory)
}
)
energy_consumption_data.extend(collected_data)
except Exception as e:
print(f"Error for directory {directory}: {e}")
# If we have an error, then we wait for a bit since it's likely due to timeout
time.sleep(300)
continue
# Store the pickle in s3
save_time = datetime.now()

View file

@ -1,104 +0,0 @@
import re
from datetime import datetime
from tqdm import tqdm
import pandas as pd
from utils.s3 import list_files_in_s3_folder, read_pickle_from_s3, save_dataframe_to_s3_parquet
# These columns we co-erce to strings before saving
PROBLEMATIC_COLUMNS = ["main-heating-controls", "floor-level"]
def extract_kwh_value(text):
"""
Extract the numerical kWh value from a given string.
:param text: The input string containing the kWh value.
:return: The extracted numerical kWh value as an integer.
"""
# Use regular expression to find the numerical value followed by "kWh per year"
match = re.search(r'([\d,]+) kWh per year', text)
if match:
# Remove commas from the extracted value and convert to integer
kwh_value = int(match.group(1).replace(',', ''))
return kwh_value
else:
# If no match is found, return None or raise an exception
return None
def app():
"""
Given the files written in our datalake in s3, this application will collate the data into a single file
and store it back in s3 for analysis
:return:
"""
# Firstly, list all of the saved files in s3
data_files = list_files_in_s3_folder(bucket_name="retrofit-datalake-dev", folder_name="energy_consumption_data")
run_date = datetime.now().strftime("%Y-%m-%d")
complete_data = []
for files in tqdm(data_files):
dataset_run_date = files.split("/")[-1].split(".")[0]
# Extract the date from the file name
dataset_run_date = pd.Timestamp(dataset_run_date)
# Load the data from the file
data = read_pickle_from_s3(bucket_name="retrofit-datalake-dev", s3_file_name=files)
# We check that the retrieved energy consumption sufficiently matches the EPC data
internal_dataset = []
for x in data:
epc_data = x["epc"]
epc_sap = epc_data["current-energy-efficiency"]
epc_potential_sap = epc_data["potential-energy-efficiency"]
# Make sure this matches the extracted sap
if int(epc_sap) != int(x["current_epc_efficiency"]) or int(epc_potential_sap) != int(
x["potential_epc_efficiency"]
):
continue
heating_kwh = extract_kwh_value(x["heating_text"])
hot_water_kwh = extract_kwh_value(x["hot_water_text"])
internal_dataset.append(
{
**epc_data,
"heating_kwh": heating_kwh,
"hot_water_kwh": hot_water_kwh,
"dataset_run_date": dataset_run_date
}
)
complete_data.extend(internal_dataset)
df = pd.DataFrame(complete_data)
# Because we collate multiple runs into a single data source, it's possible that we have duplicated data at
# the uprn level, so we dedupe based on the newest dataset_run_date
df = df.sort_values("dataset_run_date", ascending=False).drop_duplicates(subset="uprn", keep="first")
df = df.drop(columns=["dataset_run_date"])
for col in PROBLEMATIC_COLUMNS:
df[col] = df[col].astype(str)
# Save the data back to s3, but this time as a parquet file
save_dataframe_to_s3_parquet(
bucket_name="retrofit-data-dev",
file_key=f"energy_consumption/{run_date}/energy_consumption_dataset.parquet",
df=df
)
# We also estimate the energy consumption reduction from this data, by band
df["total_consumption"] = df["heating_kwh"] + df["hot_water_kwh"]
consumption_averages = df.groupby("current-energy-efficiency")["total_consumption"].mean().reset_index()
# Save the consumption averages back to s3
save_dataframe_to_s3_parquet(
bucket_name="retrofit-data-dev",
file_key=f"energy_consumption/{run_date}/consumption_averages.parquet",
df=consumption_averages
)

View file

@ -1,57 +0,0 @@
from pprint import pprint
import msgpack
from utils.s3 import read_from_s3
from etl.bill_savings.EnergyConsumptionModel import EnergyConsumptionModel
def handler():
"""
This function is used to train the model and store the final models in s3 as pickles
:return:
"""
dataset_version = "2024-07-08"
# Usage:
cleaned = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-dev"
)
cleaned = msgpack.unpackb(cleaned, raw=False)
model = EnergyConsumptionModel(cleaned=cleaned, n_jobs=2)
model.read_dataset(f'energy_consumption/{dataset_version}/energy_consumption_dataset.parquet')
model.feature_engineering()
model.save_dummy_schema(dataset_version=dataset_version)
# For heating_kwh
model.split_dataset(target='heating_kwh')
model.fit_model(target='heating_kwh')
model.re_train_final_model(target='heating_kwh')
evaluation_results = model.evaluate_model(target='heating_kwh')
pprint(evaluation_results["train"])
pprint(evaluation_results["test"])
model.save_model(target='heating_kwh', dataset_version=dataset_version)
# importance_df = evaluation_results["train"]["Feature Importance"]
# testing_predictions = model.testing_predictions["heating_kwh"]
# testing_predictions = testing_predictions.sort_values("residual", ascending=False)
# training_predictions = model.training_predictions["heating_kwh"]
# training_predictions = training_predictions.sort_values("residual", ascending=False)
# # Merge on model.input_data, by the index
# merged_data = testing_predictions.merge(model.input_data, left_index=True, right_index=True)
# merged_data_train = training_predictions.merge(model.input_data, left_index=True, right_index=True)
# For hot_water_kwh
model.split_dataset(target='hot_water_kwh')
model.fit_model(target='hot_water_kwh')
model.re_train_final_model(target='hot_water_kwh')
evaluation_results = model.evaluate_model(target='hot_water_kwh')
pprint(evaluation_results["train"])
pprint(evaluation_results["test"])
model.save_model(target='hot_water_kwh', dataset_version=dataset_version)

View file

@ -0,0 +1,24 @@
import msgpack
from etl.bill_savings.KwhData import KwhData
from utils.s3 import read_from_s3
def app():
"""
Given the files written in our datalake in s3, this application will collate the data into a single file
and store it back in s3 for analysis
:return:
"""
cleaned = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-dev"
)
cleaned = msgpack.unpackb(cleaned, raw=False)
# If there is any problematic data, it could be:
# s3://retrofit-datalake-dev/energy_consumption_data/2024-08-10 18:48:06.866647.pkl
kwh_data_client = KwhData(bucket="retrofit-datalake-dev")
kwh_data_client.combine()
kwh_data_client.transform(data=kwh_data_client.data, cleaned=cleaned, save=True)

View file

@ -0,0 +1,186 @@
"""
This script prepares some data for the Birmingham City Council tender
"""
import pandas as pd
import numpy as np
epc_data = pd.read_csv("local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv")
# Broad assumptions
# Around 67% of homes in the Uk have an EPC, to be conservative with our estimates, we round up to 70%:
# https://www.ons.gov.uk/peoplepopulationandcommunity/housing/articles/energyefficiencyofhousinginenglandandwales/2023
# However, we have 322128 homes in Birmingham with an EPC, which is 76% of the total number of homes in Birmingham
# based on the 2021 census, which put this figure at 423,500 homes
PROPORTION_OF_HOMES_WITH_AN_EPC = 0.761
N_HOUSEHOLDS_IN_BIRMINGHAM = 423_500
N_HOMES_WITHOUT_AN_EPC = 423_500 - 322128
# 55% of households are recipients of benefits in the West Midlands
# (2021/2022 - https://www.statista.com/statistics/382858/uk-state-benefits-by-region/)
PROPORTION_OF_HOMES_ON_BENEFITS = 0.55
# https://www.justgroupplc.co.uk/~/media/Files/J/Just-Retirement-Corp/news-doc/2023/six-in-10-homeowners-eligible-for
# -benefits-failing-to-claim-just-group-annual-insight-report.pdf
PROPORTION_OF_HOMEOWNERS_CLAIMING_FOR_BENEFITS = 0.106
# Breakdown of properties in council tax bands in the UK, to give us an estimate of the number of properties in A-D
band_a_proportion = 0.239
band_b_proportion = 0.195
band_c_proportion = 0.219
band_d_proportion = 0.156
COUNCIL_TAX_BAND_A_TO_D_PROPORTION = band_a_proportion + band_b_proportion + band_c_proportion + band_d_proportion
# Get the newest record, based on lodgment datetime, by uprn
epc_data["LODGEMENT_DATETIME"] = pd.to_datetime(epc_data["LODGEMENT_DATETIME"], errors="coerce")
epc_data = epc_data.sort_values(["LODGEMENT_DATETIME"], ascending=False).drop_duplicates("UPRN")
# We want to figure out the number of properties that are eligible for ECO/GBIS funding
social_tenures = ["Rented (social)", "rental (social)"]
owner_occupied_tenures = ["Owner-occupied", "owner-occupied"]
prs_tenures = ["Rented (private)", "rental (private)"]
# If social tenure, then as long as the property is EPC D-G, it's eligible
epc_data["eligibility_type"] = None
# Eligibiltiy 1: ECO4 help to heat group OO - tenure is owner occupied and EPC rating D-G
epc_data["eligibility_type"] = np.where(
(
epc_data["TENURE"].isin(owner_occupied_tenures) &
epc_data["CURRENT_ENERGY_RATING"].isin(["D", "E", "F", "G"]) &
pd.isnull(epc_data["eligibility_type"])
),
"eco4_oo_hthg_needs_scaling_on_benefits",
epc_data["eligibility_type"]
)
# Eligibility 2: ECO4 help to heat group PRS - tenure is private rental and EPC rating E-G
epc_data["eligibility_type"] = np.where(
(
epc_data["TENURE"].isin(prs_tenures) &
epc_data["CURRENT_ENERGY_RATING"].isin(["E", "F", "G"]) &
pd.isnull(epc_data["eligibility_type"])
),
"eco4_prs_hthg_needs_scaling_on_benefits",
epc_data["eligibility_type"]
)
# Eligibiltiy 3: ECO4 Social housing - tenure is social rented and EPC rating D-G
epc_data["eligibility_type"] = np.where(
(
epc_data["TENURE"].isin(social_tenures) &
epc_data["CURRENT_ENERGY_RATING"].isin(["D", "E", "F", "G"]) &
pd.isnull(epc_data["eligibility_type"])
),
"eco4_social_housing",
epc_data["eligibility_type"]
)
# Eligibility 4: GBIS General Eligibility, OO - tenure is owner occupied and EPC rating D-G
# This is a subset of Eligiblity 1. We scale eco4_oo_hthg_needs_scaling based on thhe % of properties on benefits
# For any properties left over that are deemed as not eligibile, a % of these will be eligible for GBIS via Eligibility
# 4, and therefore any properties that fall out of Eligibility 1, a % will fall into eligibility 4 based a % of units
# being in council tax bands A-D
# Eligibility 5: GBIS General Eligibility, PRS - tenure is private rental and EPC rating D-G
# Additionally, some units that fall our of Eligibility 2 will be eligible for GBIS via Eligibility 5, via the same
# mechanism as Eligibility 4. We handle this later
epc_data["eligibility_type"] = np.where(
(
epc_data["TENURE"].isin(prs_tenures) &
epc_data["CURRENT_ENERGY_RATING"].isin(["D", "E", "F", "G"]) &
pd.isnull(epc_data["eligibility_type"])
),
"gbis_prs_ge_needs_scaling_on_council_tax_band",
epc_data["eligibility_type"]
)
# Eligibiilty 6: GBIS General Eligibility, Social - tenure is social rented and EPC rating D-G, but also the property
# should be rented out below market rate
# This is a subset of Eligibility 3 - we likely don't need to do any scaling
n_eco4_oo_hthg_needs_scaling_on_benefits = epc_data[
epc_data["eligibility_type"] == "eco4_oo_hthg_needs_scaling_on_benefits"
].shape[0]
n_eco4_prs_hthg_needs_scaling_on_benefits = epc_data[
epc_data["eligibility_type"] == "eco4_prs_hthg_needs_scaling_on_benefits"
].shape[0]
n_eco4_social = epc_data[
epc_data["eligibility_type"] == "eco4_social_housing"
].shape[0]
n_gbis_prs_ge_needs_scaling_on_council_tax_band = epc_data[
epc_data["eligibility_type"] == "gbis_prs_ge_needs_scaling_on_council_tax_band"
].shape[0]
# We're going to make the broad assumption that all homeowners claiming for benefits, live in homes in council tax
# bands A-D. There there are no additionals in eligibility 4 and 5
# n_eligibility_1 = np.floor(n_eco4_oo_hthg_needs_scaling_on_benefits * PROPORTION_OF_HOMEOWNERS_CLAIMING_FOR_BENEFITS)
n_eligibility_1 = np.floor(n_eco4_oo_hthg_needs_scaling_on_benefits * COUNCIL_TAX_BAND_A_TO_D_PROPORTION)
# n_eligibility_2 = np.floor(n_eco4_prs_hthg_needs_scaling_on_benefits * PROPORTION_OF_HOMES_ON_BENEFITS)
n_eligibility_2 = np.floor(n_eco4_prs_hthg_needs_scaling_on_benefits * COUNCIL_TAX_BAND_A_TO_D_PROPORTION)
n_eligiblity_3 = n_eco4_social
# We subtract the number of homes in eligiblity 1, from the number of homes under ECO4 OO, HTHG, before scaling on
# benefits. This gives us the number of homes that were not on benefits. We then scale this number based on the % of
# homes in council tax bands A-D
# n_eligiblity_4 = np.floor(
# (n_eco4_oo_hthg_needs_scaling_on_benefits - n_eligibility_1) * COUNCIL_TAX_BAND_A_TO_D_PROPORTION
# )
# We also need to add on homes that fall out of eligibility 2
n_eligibiltiy_5 = np.floor(
np.floor(n_gbis_prs_ge_needs_scaling_on_council_tax_band * COUNCIL_TAX_BAND_A_TO_D_PROPORTION)
# np.floor((n_eco4_prs_hthg_needs_scaling_on_benefits - n_eligibility_2) * COUNCIL_TAX_BAND_A_TO_D_PROPORTION)
)
# We don't scale up the # of homes based on % of homes with an EPC, because
n_owner_occupied = epc_data[epc_data["TENURE"].isin(owner_occupied_tenures)].shape[0]
oo_eligibility = n_eligibility_1
# 68% of owner occupied are eligibiltiy
proportion_of_oo_eligible = oo_eligibility / n_owner_occupied
# We then use this % on the rest of the homes in Birmingham that do not have an EPC
oo_eligible_without_an_epc = np.floor(N_HOMES_WITHOUT_AN_EPC * proportion_of_oo_eligible)
oo_eligibility = oo_eligibility + oo_eligible_without_an_epc
# All private rentals require an EPC
prs_eligibility = (n_eligibility_2 + n_eligibiltiy_5)
# Most social housing properties will have an EPC so we don't scale this up
social_eligibility = n_eligiblity_3
# We scale this up since this number is based on the number of homes in Birmingham with an EPC, and we want to
# estimate the total number of homes in Birmingham
total_eligible = oo_eligibility + prs_eligibility + social_eligibility
proportion_of_homes_eligibile = total_eligible / N_HOUSEHOLDS_IN_BIRMINGHAM
# Approx 53% of homes in Birmingham are eligible for ECO/GBIS funding
# Approximately 53% of Homes are eligible for some form of ECO4 or GBIS funding, 227k homes
# This is broken down as follows:
# - 155k owner occupiers
# - 33k private rentals
# - 39k social housing
# We can't seem to identify the properties owned by the council in the company ownership data, because what is the
# entity that owns the property? Is it the council, or is it a company that is owned by the council? We can't be sure
# and so since BCC owns 54,000 social housing properties (5k) supported housing
# [https://www.birmingham.gov.uk/info/50094/housing_options/2686/apply_for_social_housing#:~:text=We%20manage
# %20around%2054%2C000%20social,a%20member%20of%20your%20household.]
# and there are 78,410 social housing properties in Birmingham, we can assume that the council owns 54,000 of these
# and so 69% of the social housing is owned by the Council
# Since we saw that 38,779 of 78,410 social housing looked to be able to benefit from ECO/GBIS funding, we can assume
# that 69% of these are owned by the council, which is 26,757 properties
# So, with these assumptions in mind:
# We can commit to [x] per annum based on your 54k council-owned, of which approximately 27k are likely to be eligible
# for some form of ECO/GBIS funding. We will work directly with Housing associations to address the remaining 12k
# social properties that may be eligible for funding through ECO/GBIS.
# We will market directly to the 33k private rentals and 155k owner occupiers that are eligible for funding,
# and assuming a 5% conversion, will aim to complete work on

View file

View file

@ -0,0 +1,378 @@
import inspect
import pandas as pd
from etl.epc.settings import EARLIEST_EPC_DATE
from pathlib import Path
import numpy as np
from utils.s3 import save_csv_to_s3
src_file_path = inspect.getfile(lambda: None)
EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates"
CUSTOMER_DATA_DIRECTORY = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/Data"
USER_ID = 8
PORTFOLIO_ID = 90
def make_asset_list():
"""
Set up a small asset list for the study
"""
# Read in EPC data for Lewes
lewes_directory = EPC_DIRECTORY / "domestic-E07000063-Lewes/certificates.csv"
epc_data = pd.read_csv(lewes_directory, low_memory=False)
# Rename the columns to the same format as the api returns
epc_data.columns = [c.replace("_", "-").lower() for c in epc_data.columns]
# Take just date before the date threshold
epc_data = epc_data[epc_data["lodgement-date"] >= EARLIEST_EPC_DATE]
epc_data = epc_data[~pd.isnull(epc_data["uprn"])]
epc_data["uprn"] = epc_data["uprn"].astype(int).astype(str)
# Take the newest EPC per uprn
epc_data = epc_data.sort_values("lodgement-date").groupby("uprn").last().reset_index()
# /Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/Data/
# We read in the multiple data sources
address_base = pd.read_csv(
f"{CUSTOMER_DATA_DIRECTORY}/OS AddressBase Premium/OS AddressBase Premium.csv",
low_memory=False,
)
# Filter on resi
address_base = address_base[address_base["Primary Code Description"] == "Residential"]
address_base["UPRN"] = address_base["UPRN"].astype(int).astype(str)
pv_potential = pd.read_csv(
f"{CUSTOMER_DATA_DIRECTORY}/Domestic Rooftop PV Potential/Domestic Rooftop PV Potential.csv",
low_memory=False,
)
pv_potential["UPRN"] = pv_potential["UPRN"].astype(int).astype(str)
ashp_potential = pd.read_csv(
f"{CUSTOMER_DATA_DIRECTORY}/Air Source Heat Pump Potential/Air Source Heat Pump Potential.csv",
low_memory=False,
)
ashp_potential["UPRN"] = ashp_potential["UPRN"].astype(int).astype(str)
ashp_potential[ashp_potential["UPRN"] == "100060067063"].squeeze()
insulation_potential = pd.read_csv(
f"{CUSTOMER_DATA_DIRECTORY}/Insulation Potential/Insulation Potential.csv",
low_memory=False,
)
insulation_potential["UPRN"] = insulation_potential["UPRN"].astype(int).astype(str)
renewables_cost = pd.read_csv(
f"{CUSTOMER_DATA_DIRECTORY}/Low Carbon Technology Costs/Low Carbon Technology Costs.csv",
low_memory=False,
)
renewables_cost["UPRN"] = renewables_cost["UPRN"].astype(int).astype(str)
# Merge the EPC data onto address base
asset_list = address_base[
[
"UPRN", "Class Description", "Relative Height - Eaves",
]
].merge(
epc_data[
["uprn", "current-energy-efficiency", "current-energy-rating", "address1", "postcode", "floor-height",
"property-type", "built-form", "co2-emissions-current"]],
how="left",
left_on="UPRN",
right_on="uprn"
).drop(
columns=["uprn"]
).merge(
insulation_potential[["UPRN", "EPC Rating", "Wall Area [m^2]", "Building Area [m^2]"]],
how="left",
on="UPRN"
).rename(
columns={"Wall Area [m^2]": "insulation_wall_area", "Building Area [m^2]": "floor_area"}
)
had_an_epc = asset_list[~pd.isnull(asset_list["current-energy-efficiency"])]
below_b = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 80].shape
below_c = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 69].shape
had_an_epc["energy-efficiency-rating"].value_counts()
asset_list["current-energy-rating"].value_counts()
asset_list["co2-emissions-current"].mean()
# # Get the underlying data of a histograme
import matplotlib.pyplot as plt
n, bins, patches = plt.hist(asset_list["co2-emissions-current"], bins=100, color="blue", alpha=0.7)
#
bins = np.arange(0, asset_list["co2-emissions-current"].max(), 1) # Bins from 50 to 150 with a step of 10
#
# # Step 3: Calculate the frequency of data in each bin
hist, bin_edges = np.histogram(asset_list["co2-emissions-current"], bins=bins)
# Take properties below a B - there are 2844 units
asset_list = asset_list[asset_list["current-energy-efficiency"].astype(float) <= 80]
# Drop caravans
asset_list = asset_list[asset_list["Class Description"] != "Caravan"]
asset_list = asset_list[~pd.isnull(asset_list["current-energy-efficiency"])]
# Take a sample, for properties that have an EPC, with a seed
# asset_list = asset_list.sample(frac=0.5, random_state=42)
AVG_FLOOR_HEIGHT = asset_list["floor-height"].median()
def estimate_n_floors(
building_height, floor_height, address_base_property_description, epc_property_type,
):
if address_base_property_description == "Self Contained Flat (Includes Maisonette / Apartment)":
if epc_property_type in ["Flat"]:
return 1
if epc_property_type == "Maisonette":
return 2
return None
if pd.isnull(floor_height):
return np.round(building_height / AVG_FLOOR_HEIGHT)
return np.round(building_height / floor_height)
# Estimate the number of floors
asset_list["number_of_floors"] = asset_list.apply(
lambda x: estimate_n_floors(
building_height=x["Relative Height - Eaves"],
floor_height=x["floor-height"],
address_base_property_description=x["Class Description"],
epc_property_type=x["property-type"],
),
axis=1
)
# Drop any entires with null floors because that means the ordnance survey data doesn't align with the epc data
asset_list = asset_list[~pd.isnull(asset_list["number_of_floors"])]
# Drop any entries with null insulation wall area
asset_list = asset_list[~pd.isnull(asset_list["insulation_wall_area"])]
# D 0.419929
# C 0.391459
# E 0.160142
# F 0.017794
# G 0.010676
# Total asset list:
# D 0.450409
# C 0.412016
# E 0.110203
# F 0.020263
# G 0.007110
# We do the followings:
# 1) Create final asset list
# 2) Create Non-intrusive recommendations
# 3) Create a third party costing object
cost_testing = renewables_cost.merge(
insulation_potential, how="inner", on="UPRN"
)
cost_testing["cwi_cost_per_m2"] = cost_testing["Insulation - Cavity Wall - Total"] / cost_testing["Wall Area [m^2]"]
# Their cavity wall insulation is £8 per m^2
cost_testing["ewi_cost_per_m2"] = cost_testing["Insulation - External Wall - Total"] / cost_testing[
"Wall Area [m^2]"]
cost_testing["li_cost_per_m2"] = cost_testing["Insulation - Loft - Total"] / cost_testing["Building Area [m^2]"]
cost_testing["underfloor_cost_per_m2"] = cost_testing["Insulation - Under Floor- Total"] / cost_testing[
"Building Area [m^2]"]
final_asset_list = asset_list.rename(
columns={"UPRN": "uprn", "address1": "address", "floor_area": "insulation_floor_area"}
)[["uprn", "address", "postcode", "insulation_wall_area", "insulation_floor_area", "number_of_floors"]]
# Create non-invasive recommendations, which come from the solar potential and ASHP potential data sources
non_invasive_recommendations = []
for _, row in final_asset_list.iterrows():
property_ashp_potential = ashp_potential[
(ashp_potential["UPRN"] == row["uprn"]) & ashp_potential["Overall Suitability Rating"]
]
property_pv_potential = pv_potential[
(pv_potential["UPRN"] == row["uprn"]) & pv_potential["Overall Suitability"]
]
property_costs = renewables_cost[renewables_cost["UPRN"] == row["uprn"]]
property_non_invasive_recs = []
if not property_ashp_potential.empty:
if property_costs.empty:
similar_properties = ashp_potential[
ashp_potential["Overall Suitability Rating"] &
(ashp_potential["Recommended Heat Pump Size [kW]"] ==
property_ashp_potential["Recommended Heat Pump Size [kW]"].values[0])
].merge(
renewables_cost, how="inner", on="UPRN"
)
property_costs = similar_properties[["Air Source Heat Pump - Total"]].mean().to_frame().T
property_non_invasive_recs.append(
{
"type": "air_source_heat_pump",
"suitable": True,
"size": property_ashp_potential["Recommended Heat Pump Size [kW]"].values[0],
"cost": property_costs["Air Source Heat Pump - Total"].values[0],
"ashp_only_heating_recommendation": True
}
)
else:
property_non_invasive_recs.append(
{
"type": "air_source_heat_pump",
"suitable": False
}
)
if not property_pv_potential.empty:
property_non_invasive_recs.append(
{
"type": "solar_pv",
"suitable": True,
"array_wattage": property_pv_potential["Recommended Array Size [kW]"].values[0] * 1000,
"initial_ac_kwh_per_year": property_pv_potential["Annual Generation [kWh]"].values[0],
"panneled_roof_area": property_pv_potential["Roof area suitable for PV [m^2]"].values[0],
"cost": property_costs["Rooftop PV - Total"].values[0],
}
)
else:
property_non_invasive_recs.append(
{
"type": "solar_pv",
"suitable": False
}
)
non_invasive_recommendations.append(
{
"uprn": row["uprn"],
"recommendations": property_non_invasive_recs,
}
)
# Save the asset list
# Store the asset list in s3
filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
save_csv_to_s3(
dataframe=final_asset_list,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
# Store non-invasive recommendations in S3
non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
save_csv_to_s3(
dataframe=pd.DataFrame(non_invasive_recommendations),
bucket_name="retrofit-plan-inputs-dev",
file_name=non_invasive_recommendations_filename
)
# We add a patch to one of the units because there's no data for the built form
# We would be able to handle this automatically in the future, when using OS API
patches = [
{
"uprn": "10033266220",
"built-form": "Semi-Detached",
},
{'uprn': '10033266219', 'built-form': 'Semi-Detached'}
]
# Store patches in s3
patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.json"
save_csv_to_s3(
dataframe=pd.DataFrame(patches),
bucket_name="retrofit-plan-inputs-dev",
file_name=patches_filename
)
# Create three scenarios
body1 = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "A",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": patches_filename,
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"scenario_name": "Demand Reduction - no solid wall, windows, LEDs",
"multi_plan": True,
"exclusions": [
"internal_wall_insulation", "external_wall_insulation", "floor_insulation", "heating", "solar_pv",
"lighting", "windows", "secondary_heating"
],
"budget": None,
}
print(body1)
body2 = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "A",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": patches_filename,
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"scenario_name": "Demand Reduction - no solid wall, floors or heating",
"multi_plan": True,
"exclusions": [
"internal_wall_insulation", "external_wall_insulation", "floor_insulation", "heating", "solar_pv",
],
"budget": None,
}
print(body2)
# 2.5 - full fabric, no decant
body2_5 = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "A",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": patches_filename,
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"scenario_name": "Demand Reduction - no solid wall, floors or heating",
"multi_plan": True,
"exclusions": [
"internal_wall_insulation", "floor_insulation", "heating", "solar_pv",
],
"budget": None,
}
print(body2_5)
# Scenario B
body3 = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "A",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": patches_filename,
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"scenario_name": "Demand Reduction, Heating Systems, Solar PV - no solid wall or floors",
"multi_plan": True,
"exclusions": ["internal_wall_insulation", "external_wall_insulation", "floor_insulation"],
"budget": None,
}
print(body3)
# Scenario 4 - deep fabric, no IWI, floor
body4 = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "A",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": patches_filename,
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"scenario_name": "Whole House",
"multi_plan": True,
"budget": None,
}
print(body4)

View file

@ -0,0 +1,417 @@
from tqdm import tqdm
import pandas as pd
import numpy as np
from sqlalchemy.orm import sessionmaker
from backend.app.db.connection import db_engine
from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, Scenario
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
def get_data(portfolio_id, scenario_ids):
session = sessionmaker(bind=db_engine)()
session.begin()
# Get properties and their details for a specific portfolio
properties_query = session.query(
PropertyModel,
PropertyDetailsEpcModel
).join(
PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id
).filter(
PropertyModel.portfolio_id == portfolio_id # Filter by portfolio ID
).all()
# Transform properties data to include all fields dynamically
properties_data = [
{**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns},
**{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in
PropertyDetailsEpcModel.__table__.columns}}
for prop in properties_query
]
# Get property IDs from fetched properties
# Get plans linked to the fetched properties
plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all()
# Transform plans data to include all fields dynamically
plans_data = [
{col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
for plan in plans_query
]
# Extract plan IDs for filtering recommendations through PlanRecommendations
plan_ids = [plan['id'] for plan in plans_data]
# Get recommendations through PlanRecommendations for those plans and that are default
recommendations_query = session.query(
Recommendation,
Plan.scenario_id
).join(
PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id
).join(
Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id
).filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default == True # Filtering for default recommendations
).all()
# Transform recommendations data to include all fields dynamically and include scenario_id
recommendations_data = [
{**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') else getattr(rec,
col.name) for
col in Recommendation.__table__.columns},
"Scenario ID": rec.scenario_id}
for rec in recommendations_query
]
session.close()
return properties_data, plans_data, recommendations_data
def estimate_post_retrofit_heating_hotwater_kwh(properties_df, recommendations_df, scenario_ids):
# properties_starting_with_electric_heating = properties_df[
# properties_df["mainfuel"].isin(
# ["Electricity not community", "Electricity electricity unspecified tariff"]
# )
# ]["id"].tolist()
# Get the recommendations for the scenario, default
scenario_comparison_df = []
scenario_comparison_df_2 = []
cost_per_kwh_saved_table = []
for scenario_id in scenario_ids:
# Get the recommendations for the scenario, default
scenario_recommendations = recommendations_df[
(recommendations_df["Scenario ID"] == scenario_id) &
(recommendations_df["default"] == True)
].copy()
scenario_recommendations['ligting_kwh'] = scenario_recommendations.apply(
lambda x: x['kwh_savings'] if x['type'] == 'low_energy_lighting' else 0,
axis=1)
scenario_recommendations['solar_kwh'] = scenario_recommendations.apply(
lambda x: x['kwh_savings'] if x['type'] == 'solar_pv' else 0, axis=1)
# Set 'Estimated Kwh Savings' to zero where specific kwh columns are used
scenario_recommendations['Estimated Kwh Savings'] = scenario_recommendations.apply(
lambda x: 0 if x['type'] in ['low_energy_lighting', 'solar_pv'] else x[
'kwh_savings'], axis=1)
# We need to determine if any of the properties start with electric heating or end with it
# property_electric_heating = []
# for pid, recs in scenario_recommendations.groupby("property_id"):
# has_ashp = recs[recs["description"].str.contains("air source heat pump")]
# if not has_ashp.empty:
# property_electric_heating.append(pid)
# continue
# has_heating_rec = recs[recs["description"].str.contains("high heat retention electric")]
# if not has_heating_rec.empty:
# property_electric_heating.append(pid)
# continue
grouped_data = scenario_recommendations.groupby(['property_id']).agg({
'Estimated Kwh Savings': 'sum',
'ligting_kwh': 'sum',
'solar_kwh': 'sum',
"estimated_cost": "sum"
}).reset_index()
comparison = properties_df.drop_duplicates().merge(
grouped_data, on=["property_id"], how="left"
)
comparison["Post Retrofit Heating & Hotwater kwh"] = (
comparison["current_energy_demand_heating_hotwater"] - \
comparison["Estimated Kwh Savings"]
)
avgs = comparison[['current_energy_demand_heating_hotwater', 'Post Retrofit Heating & Hotwater kwh']].mean()
# We now, for properties that have a plan, do a before and after
with_savings = comparison[~pd.isnull(comparison["Estimated Kwh Savings"])]
avgs2 = with_savings[
['current_energy_demand_heating_hotwater', 'Post Retrofit Heating & Hotwater kwh']].mean()
avgs2["difference"] = avgs2["current_energy_demand_heating_hotwater"] - avgs2[
"Post Retrofit Heating & Hotwater kwh"]
avgs2["percentage_reduction"] = 100 * avgs2["difference"] / avgs2["current_energy_demand_heating_hotwater"]
# We also calculate the cost per kwh saves
total_kwh_saved = (
with_savings["Estimated Kwh Savings"].sum() +
with_savings["ligting_kwh"].sum() +
with_savings["solar_kwh"].sum()
)
total_cost = with_savings["estimated_cost"].sum()
cost_per_kwh_saved = total_cost / total_kwh_saved
scenario_comparison_df.append({"scenario_id": scenario_id, **avgs})
scenario_comparison_df_2.append({"scenario_id": scenario_id, **avgs2})
cost_per_kwh_saved_table.append({"scenario_id": scenario_id, "cost_per_kwh_saved": cost_per_kwh_saved})
scenario_comparison_population = pd.DataFrame(scenario_comparison_df)
scenario_comparison_retrofitted_units = pd.DataFrame(scenario_comparison_df_2)
cost_per_kwh_saved_table = pd.DataFrame(cost_per_kwh_saved_table)
return scenario_comparison_population, scenario_comparison_retrofitted_units, cost_per_kwh_saved_table
def slides():
# Prepares the information required for the slides
# Right now this is the second version of the nehaven portfolio
portfolio_id = 90
# Look at one scenario at a time, otherwise this is agony
scenario_ids = [47, 48, 49, 50, 51]
properties_data, plans_data, recommendations_data = get_data(portfolio_id, scenario_ids)
properties_df = pd.DataFrame(properties_data)
plans_df = pd.DataFrame(plans_data)
recommendations_df = pd.DataFrame(recommendations_data)
if properties_df.shape[0] != 2553:
raise ValueError("The number of unique properties is not 2553")
# Q1: What is the baseline heating and energy demand for the properties in the portfolio - baseline?
heating_hotwater_kwh = (
properties_df[['current_energy_demand', 'current_energy_demand_heating_hotwater']]
.mean()
)
# Q2: For each scenario, what is for what is the heating and hot water kwh after retrofit, on the entire
# popoulation (incl those without retrofit) and for just those being retrofit
# We also calculat the cost per kwh saved
scenario_comparison_population, scenario_comparison_retrofitted_units, cost_per_kwh_saved_table = (
estimate_post_retrofit_heating_hotwater_kwh(properties_df, recommendations_df, scenario_ids)
)
# Q3: For each scenario, we want to answer what the heating and hot water kwh looks like after retrofit
# We need to take recommndations that affect just the heating and hot water
# By property
recommendations_df["type_mapped"] = recommendations_df["type"].copy().replace(
{
"loft_insulation": "roof_insulation",
"room_roof_insulation": "roof_insulation",
"flat_roof_insulation": "roof_insulation",
"hot_water_tank_insulation": "other",
"cylinder_thermostat": "other",
"sealing_open_fireplace": "other",
"suspended_floor_insulation": "floor_insulation",
"solid_floor_insulation": "floor_insulation",
}
)
recommendations_df["type_mapped"] = np.where(
recommendations_df["description"].str.contains("air source heat pump"),
"air_source_heat_pump",
recommendations_df["type_mapped"]
)
# Group by 'Plan Name' and 'Recommendation Type' and count unique 'Property ID'
recommendation_summary = recommendations_df[recommendations_df["default"] == True].groupby(
['Scenario ID', 'type_mapped']
).agg({
'property_id': 'nunique'
}).reset_index()
recommendation_summary.columns = ['Scenario ID', 'Type Mapped', 'Number of Properties']
recommendation_summary["Percentage of Properties"] = 100 * (
recommendation_summary["Number of Properties"] / properties_df["id"].nunique()
)
recommendation_summary_final_scenario = recommendation_summary[recommendation_summary["Scenario ID"].isin([51])]
# MVP implementation of funding estimation for the most basic scenario, using GBIS
project_scores_matrix = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv")
def find_abs(sap_movement, starting_sap, floor_area):
starting_band = find_band(starting_sap)
finishing_band = find_band(starting_sap + sap_movement)
if starting_band == finishing_band:
return 0
if floor_area <= 72:
floor_area_segment = '0-72'
elif (floor_area > 72) and (floor_area <= 97):
floor_area_segment = "73-97"
elif (floor_area > 97) and (floor_area <= 199):
floor_area_segment = "98-199"
else:
floor_area_segment = "200+"
return project_scores_matrix[
(project_scores_matrix["Floor Area Segment"] == floor_area_segment) &
(project_scores_matrix["Starting Band"] == starting_band) &
(project_scores_matrix["Finishing Band"] == finishing_band)
].squeeze()["Cost Savings"]
eco4_scores_sap_table = [
{'Band': 'High_A', 'From': 96.0, 'Up to': 100.0, 'Mid-point': 98.0},
{'Band': 'Low_A', 'From': 92.0, 'Up to': 96.0, 'Mid-point': 94.0},
{'Band': 'High_B', 'From': 86.0, 'Up to': 91.0, 'Mid-point': 88.5},
{'Band': 'Low_B', 'From': 81.0, 'Up to': 86.0, 'Mid-point': 83.5},
{'Band': 'High_C', 'From': 74.5, 'Up to': 80.0, 'Mid-point': 77.25},
{'Band': 'Low_C', 'From': 69.0, 'Up to': 74.5, 'Mid-point': 71.75},
{'Band': 'High_D', 'From': 61.5, 'Up to': 68.0, 'Mid-point': 64.75},
{'Band': 'Low_D', 'From': 55.0, 'Up to': 61.5, 'Mid-point': 58.25},
{'Band': 'High_E', 'From': 46.5, 'Up to': 54.0, 'Mid-point': 50.25},
{'Band': 'Low_E', 'From': 39.0, 'Up to': 46.5, 'Mid-point': 42.75},
{'Band': 'High_F', 'From': 29.5, 'Up to': 38.0, 'Mid-point': 33.75},
{'Band': 'Low_F', 'From': 21.0, 'Up to': 29.5, 'Mid-point': 25.25},
{'Band': 'High_G', 'From': 10.5, 'Up to': 20.0, 'Mid-point': 15.25},
{'Band': 'Low_G', 'From': 1.0, 'Up to': 10.5, 'Mid-point': 5.75}
]
eco4_scores_sap_table = pd.DataFrame(eco4_scores_sap_table)
def find_band(value):
# Iterate through each row in the DataFrame to find the correct band
value_floored = np.floor(value)
return eco4_scores_sap_table[
(eco4_scores_sap_table["From"] <= value_floored) & (eco4_scores_sap_table["Up to"] >= value_floored)
].squeeze()["Band"]
def identify_funding_measure(p, p_recs, is_social):
measures = ["cavity_wall_insulation", "loft_insulation"]
property_abs = []
for m in measures:
funding_measure = p_recs[p_recs["type"] == m]
if not funding_measure.empty:
funding_measure = funding_measure.squeeze()
project_abs = find_abs(
sap_movement=funding_measure["sap_points"],
starting_sap=p["current_sap_points"],
floor_area=p["total_floor_area"]
)
property_abs.append({
"property_id": p["property_id"],
"measure": funding_measure["type"],
"cost": funding_measure["estimated_cost"],
"abs": project_abs,
"is_social": is_social
})
if not property_abs:
return None
property_abs = pd.DataFrame(property_abs).sort_values("cost", ascending=False)
property_abs = property_abs.head(1).to_dict(orient="records")[0]
return property_abs
social_tenure = ["rental (social)", "Rented (social)"]
scenario_recs = recommendations_df[recommendations_df["Scenario ID"].isin([47])]
funding = []
for _, p in tqdm(properties_df.iterrows(), total=len(properties_df)):
p_recs = scenario_recs[scenario_recs["property_id"] == p["property_id"]]
if p_recs.empty:
continue
if (p["tenure"] in social_tenure) and (p["current_sap_points"] < 69):
f = identify_funding_measure(p, p_recs, True)
if f:
funding.append(f)
continue
if p["current_sap_points"] < 69:
f = identify_funding_measure(p, p_recs, False)
if f:
funding.append(f)
continue
funding = pd.DataFrame(funding)
conservative_abs = 20
funding["expected_funding"] = funding["abs"] * conservative_abs
# We take rows where the expected funding is higher than the cost of the works + 10%
funding = funding[funding["expected_funding"] >= (funding["cost"] * 1.15)]
# From the owner of the properties, the funding that they see is just the cost of the works. The actual funding
# recieved will go to the installer
# We now look at the social funding
social_funding = funding[funding["is_social"]]["cost"].sum()
# For the private funding, we need to scale this to consider the fact that only a proportion of the properties
# will qualify due to needing the property to fall into council tax bands A - D, and that only some of the tenants
# will meet the benefits criteria
private_funding = funding[~funding["is_social"]]["cost"].sum()
# 51% of households are recipients of benefits in the South East, in the UK
# (2021/2022 - https://www.statista.com/statistics/382858/uk-state-benefits-by-region/)
# We also need to deduce the % of properties in council tax bands A - D
# 2023 council tax bands:
# https://www.gov.uk/government/statistics/council-tax-stock-of-properties-2023/council-tax-stock-of-properties
# -statistical-commentary
band_a_proportion = 0.239
band_b_proportion = 0.195
band_c_proportion = 0.219
band_d_proportion = 0.156
a_to_d_proportion = band_a_proportion + band_b_proportion + band_c_proportion + band_d_proportion
benefits_proportion = 0.51
# Note: It's probable that an occupant of a property in council tax bands A-D is more likely to be on benefits,
# however we retain the regional average to be conservative
# We scale the private funding based on these two factors
private_funding_scaled = private_funding * benefits_proportion * a_to_d_proportion
n_private_projects = np.round((~funding["is_social"]).sum() * benefits_proportion * a_to_d_proportion)
# Look at the impact of EWI for scenario
ewi_jobs = recommendations_df[
(recommendations_df["Scenario ID"] == 49) & (recommendations_df["type"] == "external_wall_insulation")
]
ewi_jobs["estimated_cost"].sum()
has_cavity = recommendations_df[
(recommendations_df["type"] == "cavity_wall_insulation") & (recommendations_df["Scenario ID"] == 47)
]
# Take the some properties in this
cavity_units = properties_df[properties_df["property_id"].isin(has_cavity["property_id"].values)]
cavity_units[cavity_units.index == 3][["uprn", "property_id"]]
z = recommendations_df[recommendations_df["property_id"] == 24525]
# Recommenation type by kwh savings per unit
recommendations_final_scenario = recommendations_df[
recommendations_df["Scenario ID"].isin([51]) &
(recommendations_df["default"] == True)
].copy()
# Merge on floor area
recommendations_final_scenario = recommendations_final_scenario.merge(
properties_df[["property_id", "total_floor_area"]], on="property_id", how="left"
)
recommendations_final_scenario = recommendations_final_scenario[
~pd.isnull(recommendations_final_scenario["total_floor_area"])]
recommendations_final_scenario["kwh_savings_per_unit"] = recommendations_final_scenario["kwh_savings"] / \
recommendations_final_scenario["total_floor_area"]
recommendations_final_scenario["type_mapped2"] = recommendations_df["type"].copy().replace(
{
"room_roof_insulation": "roof_insulation",
"flat_roof_insulation": "roof_insulation",
"hot_water_tank_insulation": "other",
"cylinder_thermostat": "other",
"sealing_open_fireplace": "other",
"suspended_floor_insulation": "floor_insulation",
"solid_floor_insulation": "floor_insulation",
}
)
aggs = recommendations_final_scenario.groupby("type_mapped")[
["kwh_savings_per_unit", "estimated_cost"]].mean().reset_index().sort_values(
"kwh_savings_per_unit", ascending=False
)
aggs["cost_per_kwh_saved"] = aggs["estimated_cost"] / aggs["kwh_savings_per_unit"]
# Show more columns with pandas
pd.set_option('display.max_columns', None)
# Show more rows with pandas
pd.set_option('display.max_rows', None)
# Show more characters in a column
pd.set_option('display.max_colwidth', None)

View file

@ -0,0 +1,420 @@
import pandas as pd
import numpy as np
from backend.SearchEpc import SearchEpc
from dotenv import load_dotenv
from tqdm import tqdm
import os
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
def clean_colnames(df):
secondary_cols = ["" if pd.isnull(x) else x for x in df.iloc[0, :].values]
new_colnames = [
"+".join([df.columns[i], secondary_cols[i]]) if secondary_cols[i] else df.columns[i]
for i, c in enumerate(df.columns)
]
# Drop row 0
df = df.drop(0)
df.columns = new_colnames
return df
def lesney_farms():
"""
Some rough and ready analysis to get a view of what the achetypes could be, ahead of a meeting with Wates
on the 28th Aug 2024
:return:
"""
all_locations = [
"Forest Road Erith",
"Lesney Farms",
"Brook Street 155 - 243",
"Hazel Drive",
"Page Crescent",
"Brook Salmon Roberts and Chapma",
"Beacon Road"
]
all_assets = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Orbit - Wates/Bexley Wave 3 Project - external - "
"reduced.xlsx",
sheet_name="Full Property List",
header=1
)
all_assets = clean_colnames(all_assets)
all_assets["Location"] = None
locations = {
location_name: clean_colnames(pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Orbit - Wates/Bexley Wave 3 Project - external - "
"reduced.xlsx",
sheet_name=location_name,
header=1
)) for location_name in all_locations
}
for loc in all_locations:
all_assets["Location"] = np.where(
all_assets["Asset Reference"].isin(locations[loc]["Asset Reference"]),
loc,
all_assets["Location"]
)
if pd.isnull(all_assets["Location"]).sum():
raise Exception("something went wrong")
# 234 properties below EPC C
below_epc_c = all_assets[all_assets["PRE CALCULATED EPC"].isin(["D", "E", "F", "G"])].copy()
# We simplify wall type
below_epc_c["wall_type_simplified"] = below_epc_c["Wall Type"].str.split(" ").str[0]
known_no_epc = [
28679, # These is no EPC for 11 Page Crescent, Erith, Kent, DA8 2HJ, just 11A
29291, # No EPC for 225 Slade Green Road, Erith, Kent, DA8 2JW
]
# Get the EPC data
# epc_data = []
# for _, home in tqdm(all_assets.iterrows(), total=len(all_assets)):
# if home["Asset Reference"] in known_no_epc:
# continue
#
# address = home["Address"]
# # Spelling error
# if "Frinstead" in address:
# address = address.replace("Frinstead", "Frinsted")
#
# address1 = address.split(",")[0]
#
# asset_type_map = {
# "HOUSE": "House",
# "BUNGALOWS": "Bungalow",
# "FLATS": "Flat",
# "MAISONETTES": "Maisonette",
# }
#
# searcher = SearchEpc(
# address1=address1,
# postcode=home["Address - Postcode"],
# auth_token=EPC_AUTH_TOKEN,
# os_api_key="",
# full_address=address,
# )
# searcher.ordnance_survey_client.property_type = asset_type_map[home["Asset Type"]]
# searcher.ordnance_survey_client.built_form = None
#
# searcher.find_property(skip_os=True)
# if searcher.newest_epc is None:
# raise Exception("Couldn't find")
#
# epc_data.append(
# {
# "Asset Reference": home["Asset Reference"],
# **searcher.newest_epc.copy()
# }
# )
#
# epc_data = pd.DataFrame(epc_data)
epc_data = pd.read_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Orbit - Wates/Bexley EPC data.csv", )
# epc_data.to_csv(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Orbit - Wates/Bexley EPC data.csv", index=False
# )
epc_comparison = all_assets[
['Asset Reference', 'Address', 'PRE CALCULATED EPC']
].merge(
epc_data[["Asset Reference", "current-energy-rating", "lodgement-date"]],
on='Asset Reference',
how="left"
)
# There are a large # of properties (147) that have different pre calcualted EPC rating, to what's on the registry
# These may be internally held EPRs but this may inform which properties we might want to prioritise for survey
different_epcs = epc_comparison[
epc_comparison["PRE CALCULATED EPC"] != epc_comparison["current-energy-rating"]
]
not_c = different_epcs[
(different_epcs["PRE CALCULATED EPC"] == "C") &
(different_epcs["current-energy-rating"] != "C")
]
system_builds = below_epc_c[
below_epc_c["Wall Type"].str.contains("SystemBuilt")
].copy()
combinations = system_builds[
['Asset Type', 'Property Type', 'Location', 'PRE CALCULATED EPC', 'Wall Type', ]
].drop_duplicates()
system_build_data_comparison = system_builds.merge(
epc_data[
["Asset Reference", "walls-description", "roof-description", "current-energy-rating", "lodgement-date",
"current-energy-efficiency"]],
left_on='Asset Reference',
right_on='Asset Reference',
how="left"
)
# Apply patches
patches = {
25847: {"Property Type": "Semi Detached House"},
}
for asset_ref, patch in patches.items():
for k, v in patch.items():
system_build_data_comparison.loc[
system_build_data_comparison["Asset Reference"] == asset_ref,
k
] = v
archetype_columns = [
["Asset Type", "Property Type", "Wall Type", "Location"],
["Asset Type", "Property Type", "Location"],
["Asset Type", "Property Type", "Wall Type", "Location", "PRE CALCULATED EPC", "roof-description"],
["Asset Type", "Property Type", "Location", "PRE CALCULATED EPC"]
]
summary = []
for cols in archetype_columns:
combinations = system_build_data_comparison[cols].drop_duplicates()
summary.append(
{
"cols": cols,
"number_archetypes": len(combinations),
}
)
summary = pd.DataFrame(summary)
# Let's use this column combination
chosen_combination = [
"Asset Type", "Property Type", "Wall Type", "Location", "PRE CALCULATED EPC", "roof-description"
]
# For this combination, let's find the properties
archetype_combinations = system_build_data_comparison[chosen_combination].drop_duplicates().reset_index(drop=True)
archetype_combinations["archetype ID"] = archetype_combinations.index
archetyped_data = system_build_data_comparison.merge(
archetype_combinations, how="left", on=chosen_combination
)
counts = archetyped_data["archetype ID"].value_counts()
# Archetype 0: Semi D, As built system built, Pre calculated EPC D, flat insulated roof, (Lesney-0)
# Archetype 1: Semi D, Externally insulated system built, Pre calculated EPC D, flat insulated roof (Lesney-1)
# Archetype 4: Semi D, System built with unknown insulation, Pre calculated EPC D, flat roof insulated (Lesney-2)
# Archetype 3: Semi D, Externally insulated system built, Pre calculated EPC D, flat roof uninsulated (assumed) (
# Lesney-3)
# 0 21
# 1 11
# 4 11
# 3 3
# 2 1
# 5 1
# 6 1
# 7 1
# 8 1
# 9 1
# This archetype is the same as 0, apart from the pre calculate EPC being an E. The registry says this is a D
# This has been added to additonal units
eg1 = archetyped_data[archetyped_data["archetype ID"] == 2]
# Semi D, System built with unknown insulation, Pre calculated EPC D, flat roof insulated
# This looks like it would fit either in archetype
eg2 = archetyped_data[archetyped_data["archetype ID"] == 5]
eg3 = archetyped_data[archetyped_data["archetype ID"] == 6]
# Archetypes 7, 8, 9 are all similar, Semi D, Uninsulated system built, with pitched lofts with up to 200mm
# insulation in the lofts
# It's just the three units
# They're all labelled as
pitched_system_built_properties = archetyped_data[archetyped_data["archetype ID"].isin([9, 10, 11])]
pitched_system_built_properties["Address"]
notes = [
{
"Asset Reference": 27445,
"note": "Confirmed this has a pitched roof on Maps"
},
{
"Asset Reference": 27443,
"note": "Confirmed this has a pitched roof on Maps"
},
{
"Asset Reference": 27442,
"note": "Confirmed this has a pitched roof on Maps"
},
{
"Asset Reference": 25847,
"note": "This is labelled as a mid-terrace but the EPC data + Maps suggest it's a semi-detached"
}
]
# These are As Built, System Built
system_built_streets = (
archetyped_data["Address"].str.split(",").str[0].str.split(" ").str[1].unique()
)
all_assets_w_epcs = all_assets.merge(epc_data, on="Asset Reference", how="left")
# Grab all of the properties on this street that aren't system built
streets_not_system_builds = all_assets_w_epcs[
all_assets_w_epcs["Address"].str.split(",").str[0].str.split(" ").str[1].isin(system_built_streets) &
~all_assets_w_epcs["Wall Type"].str.contains("SystemBuilt")
]
system_builds = archetyped_data[
archetyped_data["Wall Type"].str.contains("SystemBuilt")
][["Asset Reference", "Address", "Wall Type", "walls-description"]].sort_values("Address")
birling_street_system_builds = system_builds[system_builds["Address"].str.contains("Birling")]
halstead_street_system_builds = system_builds[system_builds["Address"].str.contains("Halstead")]
brasted_street_system_builds = system_builds[system_builds["Address"].str.contains("Brasted")]
frinstead_street_system_builds = system_builds[
system_builds["Address"].str.contains("Frinstead") | system_builds["Address"].str.contains("Frinsted")
]
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
streets_not_system_builds[["Asset Reference", "Address", "Wall Type", "walls-description"]]
system_builds[system_builds["Address"].str.contains("Birling")]
# Possible System Builds
# Create the proposed sample
# lesney-0
archetyped_data["lodgement-date"] = pd.to_datetime(archetyped_data["lodgement-date"])
lesney_0 = archetyped_data[archetyped_data["archetype ID"] == 0].copy()
# Get the oldest EPC per postcode
lesney_0 = lesney_0.sort_values(["Address - Postcode", "lodgement-date"])
lesney_0[["Address", "Address - Postcode", "lodgement-date"]]
lesney_1 = archetyped_data[archetyped_data["archetype ID"] == 1].copy()
lesney_1 = lesney_1.sort_values(["Address - Postcode", "lodgement-date"])
lesney_1[["Address", "Address - Postcode", "lodgement-date"]]
lesney_2 = archetyped_data[archetyped_data["archetype ID"] == 4].copy()
lesney_2 = lesney_2.sort_values(["Address - Postcode", "lodgement-date"])
lesney_2[["Address", "Address - Postcode", "lodgement-date"]]
lesney_3 = archetyped_data[archetyped_data["archetype ID"] == 3].copy()
lesney_3 = lesney_3.sort_values(["Address - Postcode", "lodgement-date"])
lesney_3[["Address", "Address - Postcode", "lodgement-date", "roof-description"]]
# Get the pitched roof properties, which are lesney-4
lesney_4 = archetyped_data[archetyped_data["archetype ID"].isin([7, 8, 9])].copy()
lesney_4 = lesney_4.sort_values(["Address - Postcode", "lodgement-date"])
lesney_4[["Address", "Address - Postcode", "lodgement-date", "roof-description"]]
assigned_archetypes = archetyped_data[
["Asset Reference", "archetype ID", "Address", "Address - Postcode"] + chosen_combination +
["lodgement-date", "current-energy-rating", "current-energy-efficiency", "walls-description"]
].copy()
# Map the archetype ID to their string representation
assigned_archetypes["archetype ID"] = assigned_archetypes["archetype ID"].replace(
{
0: "Lesney-0",
1: "Lesney-1",
4: "Lesney-2",
3: "Lesney-3",
7: "Lesney-4",
8: "Lesney-4",
9: "Lesney-4",
2: "Lesney-0",
5: "Lesney-2",
6: "Lesney-0",
}
)
assigned_archetypes["Asset Reference"] = assigned_archetypes["Asset Reference"].astype(int)
assigned_archetypes.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Orbit - Wates/assigned_archetypes.csv", index=False
)
def culworth_court():
"""
Some rough works on Cuthwork Court
They're looking at an ASHP/GSHP
:return:
"""
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Orbit - Wates/001 - EPC CULWORTH COURT.xlsx",
sheet_name="EPC C",
header=1
)
asset_list = clean_colnames(asset_list)
# Let's get the EPC data
# Get the EPC data
epc_data = []
for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
address = home["Address"]
# Spelling error
if "Frinstead" in address:
address = address.replace("Frinstead", "Frinsted")
address1 = address.split(",")[0]
asset_type_map = {
"HOUSE": "House",
"BUNGALOWS": "Bungalow",
"FLATS": "Flat",
"MAISONETTES": "Maisonette",
}
searcher = SearchEpc(
address1=address1,
postcode=home["Address - Postcode"],
auth_token=EPC_AUTH_TOKEN,
os_api_key="",
full_address=address,
)
searcher.ordnance_survey_client.property_type = asset_type_map[home["Asset Type"]]
searcher.ordnance_survey_client.built_form = None
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
raise Exception("Couldn't find")
epc_data.append(
{
"Asset Reference": home["Asset Reference"],
**searcher.newest_epc.copy()
}
)
epc_data = pd.DataFrame(epc_data)
asset_list = asset_list.merge(epc_data, on="Asset Reference", how="left")
asset_list["floor-level"] = np.where(
asset_list["floor-level"] == "NODATA!",
"",
asset_list["floor-level"]
)
asset_list["built-form"] = np.where(
asset_list["built-form"] == "Enclosed End-Terrace",
"End-Terrace",
asset_list["built-form"]
)
archetype_combinations = asset_list[
["Asset Type", "Property Type", "built-form", "floor-level"]
].drop_duplicates()
z = asset_list[asset_list["built-form"] == "Enclosed End-Terrace"]

View file

@ -0,0 +1,141 @@
import pandas as pd
from utils.s3 import save_csv_to_s3
USER_ID = 8
PORTFOLIO_ID = 100
def app():
"""
This function sets up an asset list with just a few properties to model the impact of the following scenarios:
1) EWI
2) EWI + Solar
3) EWI + Solar + ASHP
:return:
"""
asset_list = [
# This is an example of a low D - SAP score is 60
{
"address": "37, Birling Road",
"postcode": "DA8 3JQ",
"uprn": 100020225444
},
{
"address": "16, Brasted Road",
"postcode": "DA8 3HU",
"uprn": 100020225805
},
{
"address": "25, Birling Road",
"postcode": "DA8 3JQ",
"uprn": 100020225432,
},
{
"address": "4, Halstead Road",
"postcode": "DA8 3HX",
"uprn": 100020229555
}
]
asset_list = pd.DataFrame(asset_list)
filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
save_csv_to_s3(
dataframe=asset_list,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
non_invasive_recs = []
for _, al in asset_list.iterrows():
solar_rec = {
"type": "solar_pv",
"suitable": True,
"array_wattage": 4000,
"initial_ac_kwh_per_year": 3800,
"cost": 4009,
"panneled_roof_area": 20 # Rough estimate for 10 panels, around 1m x 1.8m (accomodate gaps and 30cm edge)
}
non_invasive_recs.append({
"uprn": al["uprn"],
"recommendations": [solar_rec],
})
# Store non-invasive recommendations in S3
non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
save_csv_to_s3(
dataframe=pd.DataFrame(non_invasive_recs),
bucket_name="retrofit-plan-inputs-dev",
file_name=non_invasive_recommendations_filename
)
body1 = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "A",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": "",
"scenario_name": "ECO4 funding - EWI",
"multi_plan": True,
"exclusions": [
"internal_wall_insulation",
"roof_insulation", "ventilation", "floor_insulation", "windows", "fireplace", "heating", "hot_water",
"lighting", "secondary_heating", "solar_pv"
],
"budget": None,
}
print(body1)
body2 = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "A",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"scenario_name": "ECO4 funding - EWI + Solar",
"multi_plan": True,
"exclusions": [
"internal_wall_insulation",
"roof_insulation",
"ventilation",
"floor_insulation",
"windows",
"fireplace",
"heating",
"hot_water",
"lighting",
"secondary_heating",
"boiler_upgrade",
"high_heat_retention_storage_heater",
],
"budget": None,
}
print(body2)
body3 = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "A",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"scenario_name": "ECO4 funding - EWI + Solar + ASHP",
"multi_plan": True,
"exclusions": [
"internal_wall_insulation",
"roof_insulation", "ventilation", "floor_insulation", "windows", "fireplace", "hot_water",
"lighting", "secondary_heating",
],
"budget": None,
}
print(body3)

1126
etl/ownership/Ownership.py Normal file

File diff suppressed because it is too large Load diff

10
etl/ownership/README.md Normal file
View file

@ -0,0 +1,10 @@
# Ownership Application
This application contains methods that allows us to attempt to discover
corporate ownership of properties, where possible.
Practically, it's likely that the code within this application will be
exported into other areas of this repository, and used to assemble
pipelines that solve specific property ownership questions, and so this
codebase is set up with the goal of providing farily easy to use, plug
and play tools.

35
etl/ownership/config.py Normal file
View file

@ -0,0 +1,35 @@
# These are the registration numbers for companies we've heard a reponse from, and cannot sell
OWNERS_WHO_CANT_SELL = [
# Al Rayan - they're the senior lender, not able to sell
"4483430",
# Ultrabarn - they're unwilling to sell and will sort any retrofits themselves
"2794851",
# Mountview - Anna spoke with someone from Mounview - they acquire tenancies and sell them as soon as they become
# vacant. They have no immediate opportunities but we may come back and remove this
"328090",
]
EXCLUDED_UPRNS = [
# This property no longer exists
200003827624,
# This property doesn't seem to exist
90070698,
# Can't really find a solid record on Zoopla/Rightmove
10090437990,
# This property doesn't seem to exist
100070902790,
# This property doesn't seem to exist
100070902791,
# This property doesn't seem to exist
100031997775,
# Can't find reliable information to this property on zoopla/rightmove
200001372608,
# Can't find reliable information to this property on zoopla/rightmove
100031592801,
# Can't find reliable information to this property on zoopla/rightmove
100031579087,
# Can't find reliable information to this property on zoopla/rightmove
200000877273,
# Can't find reliable information to this property on zoopla/rightmove - seems like a post office!
100071391639
]

View file

@ -0,0 +1,181 @@
import datetime
from sqlalchemy.orm import sessionmaker
from backend.app.db.connection import db_engine
from backend.app.db.models.portfolio import Portfolio, PortfolioUsers
from etl.ownership.Ownership import Ownership
from etl.ownership.config import OWNERS_WHO_CANT_SELL as EXCLUDED_OWNERS, EXCLUDED_UPRNS
from utils.s3 import save_csv_to_s3
# Set up the project configuration
USER_IDS = [
2, # Khalim
3, # Chenai
5, # Anna
30, # Patricia
]
EPC_PATHS = [
"local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv",
"local_data/all-domestic-certificates/domestic-E08000031-Wolverhampton/certificates.csv",
"local_data/all-domestic-certificates/domestic-E08000026-Coventry/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000016-Leicester/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000015-Derby/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000021-Stoke-on-Trent/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000018-Nottingham/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000154-Northampton/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000061-North-Northamptonshire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000062-West-Northamptonshire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000152-East-Northamptonshire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000155-South-Northamptonshire/certificates.csv",
#
"local_data/all-domestic-certificates/domestic-E08000027-Dudley/certificates.csv",
"local_data/all-domestic-certificates/domestic-E08000029-Solihull/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000234-Bromsgrove/certificates.csv",
"local_data/all-domestic-certificates/domestic-E08000030-Walsall/certificates.csv",
"local_data/all-domestic-certificates/domestic-E08000028-Sandwell/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000019-Herefordshire-County-of/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000020-Telford-and-Wrekin/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000218-North-Warwickshire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000222-Warwick/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000237-Worcester/certificates.csv",
# East midlands
"local_data/all-domestic-certificates/domestic-E07000035-Derbyshire-Dales/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000038-North-East-Derbyshire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000039-South-Derbyshire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000012-North-East-Lincolnshire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000013-North-Lincolnshire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000138-Lincoln/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000134-North-West-Leicestershire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000017-Rutland/certificates.csv",
]
DOMESTIC_OWNERSHIP_PATH = "/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_07.csv"
OVERSEAS_OWNERSHIP_PATH = "/Users/khalimconn-kowlessar/Downloads/OCOD_FULL_2024_07.csv"
LAND_REGISTRY_PATH = "/Users/khalimconn-kowlessar/Downloads/pp-complete.csv"
PROJECT_NAME = "Midlands Portfolio"
DATA_BUCKET = "retrofit-data-dev"
# We use this as a rough figure, which helps us shape the portfolio
PROPERTY_VALUE_ESTIMATE = 200_000
# We want a 50m portfolio, but we create a bigger portfolio that needed, since properties will be filtered out
PORTFOLIO_VALUE = 75_000_000
def create_sfr_portfolio(project_name, user_ids, status, goal):
session = sessionmaker(bind=db_engine)()
try:
session.begin()
# Check for an existing portfolio by name
portfolio = session.query(Portfolio).filter_by(name=project_name).one_or_none()
if portfolio:
# Fetch the associated users
existing_user_ids = {
pu.user_id for pu in session.query(PortfolioUsers.user_id).filter_by(portfolioId=portfolio.id)
}
# Check if the specified user_ids match any existing associations
if existing_user_ids.intersection(set(user_ids)):
print("Portfolio already exists under this name, for specified users.")
else:
print("Portfolio already exists under this name, for different users.")
session.rollback() # No changes to be committed
return None # Optional: You could also update the user associations here if needed
return portfolio # Return the existing portfolio data
# If portfolio does not exist, create a new one with the provided status and goal
new_portfolio = Portfolio(name=project_name, status=status, goal=goal)
session.add(new_portfolio)
session.flush() # Ensures that 'id' is available before committing if needed
# Create new user associations in PortfolioUsers
for user_id in user_ids:
new_association = PortfolioUsers(user_id=user_id, portfolioId=new_portfolio.id) # corrected attribute name
session.add(new_association)
session.commit()
print(f"New portfolio created with ID: {new_portfolio.id}")
return new_portfolio
except Exception as e:
session.rollback() # Ensure no partial changes are committed
print(f"An error occurred: {e}")
raise
finally:
session.close()
def app():
epc_column_filters = {
"CURRENT_ENERGY_RATING": ["F", "G"]
}
ownership_instance = Ownership(
epc_paths=EPC_PATHS,
domestic_ownership_path=DOMESTIC_OWNERSHIP_PATH,
overseas_ownership_path=OVERSEAS_OWNERSHIP_PATH,
land_registry_path=LAND_REGISTRY_PATH,
project_name=PROJECT_NAME,
bucket=DATA_BUCKET,
average_property_value=PROPERTY_VALUE_ESTIMATE,
portfolio_value=PORTFOLIO_VALUE,
excluded_owners=EXCLUDED_OWNERS,
excluded_uprns=EXCLUDED_UPRNS
)
ownership_instance.pipeline(column_filters=epc_column_filters)
# Create the project, if a portfolio doesn't exist for the project name
# Create the asset list and the body of the portfolio
asset_list = ownership_instance.get_asset_list()
# Create the portfolio
# TODO: Wasn't working
# create_sfr_portfolio(project_name=PROJECT_NAME, user_ids=USER_IDS, status="scoping", goal="Increasing EPC")
portfolio_id = 99
user_id = 8
filename = f"{user_id}/{portfolio_id}/asset_list.csv"
save_csv_to_s3(
dataframe=asset_list,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
body = {
"portfolio_id": str(portfolio_id),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "C",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": "",
"scenario_name": "Hit EPC C",
"multi_plan": True,
"exclusions": ["fireplace", "floor_insulation"],
"budget": None,
}
print(body)
# # We read in the current valuation data and identify if there are any uprns that need to be added
# previous_valuations = pd.read_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/sfr/sfr property valuations.xlsx")
# missed = asset_list[~asset_list["uprn"].astype(str).isin(previous_valuations["uprn"].astype(str))]
# missed.to_csv("missed_valuations.csv")
# We now need a distinct step to prepare final outputs
portfolio_timestamp = "2024-08-20 19:51:33.884145"
# Create a date in the yyyy-mm-dd format to store the data against
storage_date = datetime.datetime.now().strftime("%Y-%m-%d")
ownership_instance.create_final_outputs(
portfolio_timestamp=portfolio_timestamp, storage_date=storage_date, exclusion_uprns=EXCLUDED_UPRNS
)

View file

@ -3,7 +3,8 @@ from tqdm import tqdm
import pandas as pd
import geopandas as gpd
from utils.logger import setup_logger
from utils.s3 import read_io_from_s3, save_dataframe_to_s3_parquet
from utils.s3 import read_io_from_s3, save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
from backend.Property import Property
logger = setup_logger()
@ -116,3 +117,81 @@ class OpenUprnClient:
file_key=file_key,
bucket_name=bucket_name
)
@staticmethod
def make_uprn_map(uprns, uprn_filenames):
"""
Given a list of UPRNs, this method will return a map of the UPRN to the filename that the UPRN is contained in
:param uprns: List of UPRNs
:param uprn_filenames: Lookup from UPRN range to filename
:return:
"""
uprn_map = {}
for uprn in uprns:
filtered_df = uprn_filenames[
(uprn_filenames["lower"] <= int(uprn))
& (uprn_filenames["upper"] >= int(uprn))
]
if filtered_df["filenames"].values[0] in uprn_map:
uprn_map[filtered_df["filenames"].values[0]].append(int(uprn))
else:
uprn_map[filtered_df["filenames"].values[0]] = [int(uprn)]
return uprn_map
@classmethod
def set_spatial_data(cls, input_properties: list[Property], bucket_name):
"""
Given a list of properties, this method will set the spatial data for each property
The method will look for the minimal set of uprn datasets that it needs to read in to get all of the spatial
data for the properties
"""
uprn_filenames = read_dataframe_from_s3_parquet(
bucket_name=bucket_name, file_key="spatial/filename_meta.parquet"
)
uprns = [p.uprn for p in input_properties]
uprn_map = cls.make_uprn_map(uprns, uprn_filenames)
for filename, associated_uprn in tqdm(uprn_map.items(), total=len(uprn_map)):
# Read in the file
spatial_data = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev", file_key=f"spatial/{filename}"
)
spatial_df = spatial_data[spatial_data["UPRN"].isin(associated_uprn)]
for p in input_properties:
if p.uprn in associated_uprn:
p.set_spatial(spatial_df[spatial_df["UPRN"] == p.uprn])
# Perform a final check to ensure that all properties have spatial data
for p in input_properties:
if p.spatial is None:
raise Exception(f"Property with UPRN {p.uprn} does not have spatial data")
return input_properties
@classmethod
def get_spatial_data(cls, uprns: list[int], bucket_name):
"""
Similar method to set_spatial_data, but designed to work more generally on a list of uprns
:return:
"""
uprn_filenames = read_dataframe_from_s3_parquet(
bucket_name=bucket_name, file_key="spatial/filename_meta.parquet"
)
uprn_map = cls.make_uprn_map(uprns, uprn_filenames)
uprn_spatial_table = []
for filename, associated_uprn in tqdm(uprn_map.items(), total=len(uprn_map)):
# Read in the file
spatial_data = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev", file_key=f"spatial/{filename}"
)
spatial_df = spatial_data[spatial_data["UPRN"].isin(associated_uprn)]
uprn_spatial_table.append(spatial_df)
return pd.concat(uprn_spatial_table)

View file

@ -0,0 +1,287 @@
# We use some sample properties from Newhaven to use as a testing dataset for implementing the model fixes
import inspect
import pandas as pd
from etl.epc.settings import EARLIEST_EPC_DATE
from pathlib import Path
from utils.s3 import save_csv_to_s3
src_file_path = inspect.getfile(lambda: None)
EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates"
USER_ID = 8
PORTFOLIO_ID = -1
def app():
"""
This application is tasked with pulling a large quantity of data from the find my epc website, containing the
estimated energy consumption for properties
:return:
"""
lewes_directory = EPC_DIRECTORY / "domestic-E07000063-Lewes/certificates.csv"
data = pd.read_csv(lewes_directory, low_memory=False)
# Rename the columns to the same format as the api returns
data.columns = [c.replace("_", "-").lower() for c in data.columns]
# Take just date before the date threshold
data = data[data["lodgement-date"] >= EARLIEST_EPC_DATE]
data = data[~pd.isnull(data["uprn"])]
data = data[data["current-energy-efficiency"].astype(float) < 52]
data = data.sample(10)
# Create an asset list
asset_list = data[["uprn", "address1", "postcode"]].copy().rename(columns={"address1": "address"})
asset_list["uprn"] = asset_list["uprn"].astype(str)
filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv"
save_csv_to_s3(
dataframe=asset_list,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "B",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": "",
"budget": None,
}
print(body)
# This is some temp code, which is for diagnosing the issues with the bills models
heating_training_data_filepath = "sap_change_model/2024-08-06-11-19-49/dataset_rooms.parquet"
# For the heating model:
heating_drop_columns = [
"sap_ending", "heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending",
"lighting_cost_ending", "hot_water_cost_ending",
# "days_to_ending", "days_to_starting", # TODO This is in the live version
'number_habitable_rooms_starting', 'number_habitable_rooms_ending', 'number_heated_rooms_starting',
'number_heated_rooms_ending',
'number_habitable_rooms', 'number_heated_rooms'
]
heating_response = "heating_cost_ending"
# for the hot water model (older dataset)
hot_water_training_data_filepath = "sap_change_model/2024-07-10-20-28-54/dataset_rooms.parquet"
hot_water_drop_columns = [
"sap_ending", "heat_demand_change", "carbon_change", "rdsap_change", "heat_demand_ending", "carbon_ending",
"lighting_cost_ending", "heating_cost_ending",
"days_to_starting", "days_to_ending",
'number_habitable_rooms_starting', 'number_habitable_rooms_ending', 'number_heated_rooms_starting',
'number_heated_rooms_ending',
'number_habitable_rooms', 'number_heated_rooms'
]
# Diagnose heating
from utils.s3 import read_dataframe_from_s3_parquet
train = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev",
file_key=heating_training_data_filepath
)
# Drop the columns that aren't used
train = train.drop(columns=heating_drop_columns)
# if the value is postive, it means the ending cost is bigger than the starting (which means it got more expensive)
train["cost_diference"] = (train["heating_cost_ending"] - train["heating_cost_starting"])
change_direction = train["cost_diference"] > 0
change_direction.value_counts(normalize=True)
average_costs_by_time_starting = train.groupby(
["lodgement_year_starting", "lodgement_month_starting"]
)["heating_cost_starting"].mean().reset_index().sort_values(["lodgement_year_starting", "lodgement_month_starting"])
average_costs_by_time_ending = train.groupby(
["lodgement_year_ending", "lodgement_month_ending"]
)["heating_cost_ending"].mean().reset_index().sort_values(["lodgement_year_ending", "lodgement_month_ending"])
# Check by photo supply values - if the property is gas, solar panels won't have an affect on the heating or hot
# water so let's look for electric homes
# Across the entire dataset, there is no correlation
# Even for electric properties, there is no correlation
photo_supply_averages = train[
train["fuel_type_ending"] == "electricity"
].groupby(["photo_supply_ending"])["heating_cost_ending"].mean().reset_index()
photo_supply_to_size = train.groupby("photo_supply_ending")["total_floor_area_ending"].mean().reset_index()
photo_supply_to_size[["photo_supply_ending", "total_floor_area_ending"]].corr()
train[["total_floor_area_ending", "heating_cost_ending"]].corr()
# Bigger properties end up with smaller photo_supply values. This will be because the array size likely remains fairly
# consistent but takes up a smaller proportion of the roof. Typically, the bigger the floor area, the higher the heating
# costs, but bigger units also have smaller photo_supply
adding_solar = train[
(train["photo_supply_ending"] > 0) & (train["photo_supply_starting"] == 0)
]
is_positive = (adding_solar["cost_diference"] > 0)
is_positive.value_counts(normalize=True)
photo_supply_by_time = (
train[
train["fuel_type_ending"] == "electricity"
].groupby(
["lodgement_year_ending", "photo_supply_ending"]
)["heating_cost_ending"].mean().reset_index().sort_values(
["lodgement_year_ending", "photo_supply_ending"], ascending=True)
)
# Plot
photo_supply_by_time[["photo_supply_ending", "heating_cost_ending"]].corr()
photo_supply_by_time.plot()
# Observations
# 1) We retain all of the potential columns, however they are just based on the starting EPC
# 2) 21% of the the time, the ending heating cost is more than the starting but this is clearly a minority
# 3) Let's get ride of estimated perimeter starting and ending
# Things I should check
# 1) Do we updated the lodgment_year_ending and lodgement_month_ending
# 2) Should we adjust costs to now, as well as lodgement_dates to today? Since 2023, costs have increased a lot so
# any savings should be benchmarked against what a customer is paying now
# 3) It might make sense to create a feature between floor area and photo supply, to give a more consistent estimate
# of a panel size for the property
# Get an example and score with the models
example = train[
(train["photo_supply_starting"] == 0) &
(train["photo_supply_ending"] > 0) &
(train["heating_cost_starting"] > train["heating_cost_ending"])
].sample(1)
# example["lodgement_month_starting"]
# example["lodgement_year_starting"]
# example["lodgement_month_ending"]
# example["lodgement_year_ending"].values[0]
#
# example["lodgement_year_ending"] = 2023
# example["days_to_ending"] = 3500
# example["days_to_starting"]
# {'heating_cost_predictions': predictions
# 0 378.5}
resp = model_api.predict_all(
df=example,
bucket="retrofit-data-dev",
prediction_buckets=get_prediction_buckets(),
model_prefixes=["heating_cost_predictions"],
extract_ids=False
)
# Step 1: get a cost for today
p.create_base_difference_epc_record(cleaned)
cwi_impact = p.base_difference_record.df.copy()
for k in property_recommendations[0][0]["simulation_config"]:
cwi_impact[k] = property_recommendations[0][0]["simulation_config"][k]
# 2212.4 - Baseline
today = model_api.predict_all(
df=p.base_difference_record.df.copy(),
bucket="retrofit-data-dev",
prediction_buckets=get_prediction_buckets(),
model_prefixes=["heating_cost_predictions"],
extract_ids=False
)
# impact of CWI - 1908
cwi_response = model_api.predict_all(
df=cwi_impact,
bucket="retrofit-data-dev",
prediction_buckets=get_prediction_buckets(),
model_prefixes=["heating_cost_predictions"],
extract_ids=False
)
pv_impact = cwi_impact.copy()
pv_impact["photo_supply_ending"] = 50
pv_impact["heating_cost_starting"] = 2212.4
pv_response = model_api.predict_all(
df=pv_impact,
bucket="retrofit-data-dev",
prediction_buckets=get_prediction_buckets(),
model_prefixes=["heating_cost_predictions"],
extract_ids=False
)
# Testing kwh for vde
base_prediction = model_api.predict_all(
df=epcs_for_scoring,
bucket=get_settings().DATA_BUCKET,
prediction_buckets=get_prediction_buckets(),
model_prefixes=["heating_kwh_predictions"],
extract_ids=False
)
cwi_epc = pd.DataFrame([property_scoring_epcs[1].copy()])
cwi_epc = add_features_from_code(cwi_epc)
cwi_epc = add_estimate_annual_kwh(cwi_epc)
# cwi_epc["walls-description"] = "Cavity wall, filled cavity"
# cwi_epc["walls-energy-eff"] = "Good"
# cwi_epc["heating-cost-current"] = 1650
# cwi_epc["current-energy-efficiency"] = 72
# cwi_epc["current-energy-rating"] = "C"
# cwi_epc["co2-emissions-current"] = 3.7
# cwi_epc["energy-consumption-current"] = 121
# cwi_epc["co2-emiss-curr-per-floor-area"] = 19
# cwi_epc["photo-supply"] = 0
# cwi_epc["energy-consumption-current"] =
# cwi_epc["roof-description"] = "Pitched, 300 mm loft insulation"
# cwi_epc["roof-energy-eff"] = "Very Good"
# cwi_epc["heating-cost-current"] = 1264
# "heating-cost-current": rec_impact["epc_heating_cost"],
# "hot-water-cost-current": rec_impact["epc_hot_water_cost"],
# # CO₂ emissions per square metre floor area per year in kg/m². Since CO₂ emissions are in tonnes
# # per year, we multiply by 1000 to get kg/m²
# "co2-emiss-curr-per-floor-area": round(
# 1000 * (rec_impact["carbon"] / self.data["total-floor-area"])
# ),
# "co2-emissions-current": rec_impact["carbon"],
# "current-energy-rating": sap_to_epc(rec_impact["sap"]),
# "current-energy-efficiency": int(np.floor(rec_impact["sap"])),
# "energy-consumption-current": rec_impact["heat_demand"],
# "lighting-cost-current": rec_impact["epc_lighting_cost"],
# "id": "+".join([str(self.id), rec_id])
cwi_prediction = model_api.predict_all(
df=cwi_epc,
bucket=get_settings().DATA_BUCKET,
prediction_buckets=get_prediction_buckets(),
model_prefixes=["heating_kwh_predictions", "hotwater_kwh_predictions"],
extract_ids=False
)
# 77 perryn
starting_heating = 19837.2
starting_hot_water = 2974.1
ending_heating = 17041.1
ending_hot_water = 2735.3
# 44 lindlings
starting_heating = 13327.1
starting_hot_water = 2349.5
ending_heating = 9672.3
ending_hot_water = 2030.2
ending_heating = 8695.1
ending_hot_water = 2437.0
heating_impact = starting_heating - ending_heating
hot_water_impact = starting_hot_water - ending_hot_water
total_impact = heating_impact + hot_water_impact

View file

@ -166,6 +166,7 @@ def main():
# For each property, we download the xmls and extract the data
database_data = []
for uprn, xmls in assessments_map.items():
extracted_data = {}
for xml in xmls:
xml_data = read_from_s3(bucket_name=BUCKET, s3_file_name=xml)

View file

@ -100,8 +100,8 @@ CONDENSING_BOILER_COSTS = {
# The unit is a 15kw boiler, capable of outputting between 3kw and 15kw. Costs seem to be around £1800
ELECTRIC_BOILER_COSTS = 1800
# Assumes 3 hours to remove each heater (including re-decorating)
ROOM_HEATER_REMOVAL_COST = 120
# Assumes 1 hours to remove each heater (including re-decorating)
ROOM_HEATER_REMOVAL_COST = 50
ROOM_HEATER_REMOVAL_LABOUR_HOURS = 3
# This is a cost quoted by Jim for a system flush - existig system will run more efficiently
@ -1014,7 +1014,7 @@ class Costs:
"labour_days": labour_days
}
def solar_pv(self, wattage: float, has_battery: bool = False):
def solar_pv(self, wattage: float, has_battery: bool = False, array_cost=None):
"""
Calculates the total cost for solar PV based data provided by the MCS dashboard, which contains
@ -1028,13 +1028,17 @@ class Costs:
https://www.checkatrade.com/blog/cost-guides/cost-of-solar-panel-installation/
:param wattage: Peak wattage of the solar PV system]
:param has_battery: Bool, whether the system includes a battery
:param array_cost: float, containing the cost of the solar PV array
"""
# Get the cost data relevant to the region
regional_cost = MCS_SOLAR_PV_COST_DATA["-".join(["average_cost_per_kwh", self.region])]
kw = wattage / 1000
total_cost = kw * regional_cost
if array_cost is not None:
total_cost = array_cost
else:
kw = wattage / 1000
total_cost = kw * regional_cost
if has_battery:
# The battery cost is based on the £3500 quote, recieved from installers

View file

@ -8,9 +8,10 @@ from datatypes.enums import QuantityUnits
from backend.Property import Property
from recommendations.recommendation_utils import (
r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns, update_lowest_selected_u_value,
get_recommended_part, get_floor_u_value, override_costs
get_recommended_part, get_floor_u_value, override_costs, check_simulation_difference
)
from recommendations.Costs import Costs
from etl.epc_clean.epc_attributes.FloorAttributes import FloorAttributes
class FloorRecommendations(Definitions):
@ -73,7 +74,6 @@ class FloorRecommendations(Definitions):
u_value = self.property.floor["thermal_transmittance"]
property_type = self.property.data["property-type"]
floor_area = self.property.insulation_floor_area
year_built = self.property.year_built
if self.property.floor["another_property_below"] | (self.property.floor["insulation_thickness"] in [
"average", "above average"
@ -94,14 +94,16 @@ class FloorRecommendations(Definitions):
if u_value:
# By being built more recently than this, it means that the property was likely build with soild
# concrete floors with insulation already
if year_built < self.PART_L_YEAR_CUTOFF:
raise NotImplementedError("Not investigated this use case")
if u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
# The floor is already compliant
return
# In this case where we have the u-value of a floor, we likely don't have any other information about it
# so there is no recommendation that we can practically make
if (
self.property.floor["is_suspended"] or
self.property.floor["is_to_unheated_space"] or
self.property.floor["is_to_external_air"] or
self.property.floor["is_solid"]
):
raise ValueError("This should not be possible")
return
if u_value is None:
u_value = get_floor_u_value(
@ -118,7 +120,11 @@ class FloorRecommendations(Definitions):
if u_value < self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
return
if self.property.floor["is_suspended"]:
if (
self.property.floor["is_suspended"] or
self.property.floor["is_to_unheated_space"] or
self.property.floor["is_to_external_air"]
):
# Given the U-value, we recommend underfloor insulation
self.recommend_floor_insulation(
phase=phase,
@ -138,10 +144,6 @@ class FloorRecommendations(Definitions):
)
return
if self.property.floor["is_to_unheated_space"] or self.property.floor["is_to_external_air"]:
self.recommend_floor_insulation(u_value=u_value, parts=self.exposed_floor_insulation_parts)
return
raise NotImplementedError("Implement me!")
@staticmethod
@ -197,6 +199,8 @@ class FloorRecommendations(Definitions):
if already_installed:
cost_result = override_costs(cost_result)
new_description = "Suspended, insulated"
elif material["type"] == "solid_floor_insulation":
cost_result = self.costs.solid_floor_insulation(
insulation_floor_area=self.property.insulation_floor_area,
@ -207,9 +211,21 @@ class FloorRecommendations(Definitions):
already_installed = "solid_floor_insulation" in self.property.already_installed
if already_installed:
cost_result = override_costs(cost_result)
new_description = "Solid, insulated"
else:
raise NotImplementedError("Implement me!")
floor_ending_config = FloorAttributes(new_description).process()
floor_simulation_config = check_simulation_difference(
new_config=floor_ending_config, old_config=self.property.floor, prefix="floor_"
)
simulation_config = {
**floor_simulation_config,
"floor_thermal_transmittance_ending": new_u_value,
}
self.recommendations.append(
{
"phase": phase,
@ -227,6 +243,7 @@ class FloorRecommendations(Definitions):
"new_u_value": new_u_value,
"sap_points": None,
"already_installed": already_installed,
"simulation_config": simulation_config,
"description_simulation": {
"floor-description": "Solid, insulated" if
material["type"] == "solid_floor_insulation"

View file

@ -43,7 +43,7 @@ class HeatingControlRecommender:
# For an ASHP, we can recommend time and temperature zone controls, as well as programmer, trvs and a bypass
# which are common configurations for ASHPs
self.recommend_time_temperature_zone_controls()
self.recommend_programmer_trvs_bypass()
# self.recommend_programmer_trvs_bypass()
def recommend_room_heaters_electric_controls(self):
"""

View file

@ -28,7 +28,7 @@ class HeatingRecommender:
self.property.main_heating["clean_description"] in self.ELECTRIC_HEATING_DESCRIPTIONS
)
def is_high_heat_retention_valid(self):
def is_high_heat_retention_valid(self, ashp_only_heating_recommendation, exclusions):
"""
Check conditions if high heat retention storage is valid
:return:
@ -40,45 +40,29 @@ class HeatingRecommender:
self.property.main_heating["clean_description"] in ["No system present, electric heaters assumed"]
)
return self.has_electric_heating_description or electric_heating_assumed
has_electric = self.has_electric_heating_description or electric_heating_assumed
def recommend(self, has_cavity_or_loft_recommendations, phase=0, exclusions=None):
return (
has_electric and (not ashp_only_heating_recommendation) and ("boiler_upgrade" not in exclusions)
)
def is_boiler_upgrade_suitable(self, exclusions, ashp_only_heating_recommendation):
"""
Produces heating recommendations
:param has_cavity_or_loft_recommendations: boolean indicating if we have produced a cavity or loft insulation
recommendation. If there are cavity or loft recommendations, the property would need to complete those measures
before being able to get the boiler upgrade scheme benefits. The messaging in the front end would be to
:param phase: indicates the phase of the retrofit programme
:param exclusions: A list of exclusions for the recommendations
These are the conditions we apply to recommend a boiler installation
:return:
"""
# TODO: We could have a system flush recommendation for an existing boiler, where there is no need to replace
# the boiler, but instead flushing the system will make it run more efficiently. There is a cost for this
# in the Costs class, stored as SYSTEM_FLUSH_COST
exclusions = [] if exclusions is None else exclusions
self.heating_recommendations = []
self.heating_control_recommendations = []
# This first iteration of the recommender will provide very basic recommendation
# We recommend heating controls based on the main heating system
if self.is_high_heat_retention_valid():
# Recommend high heat retention storage heaters
# TODO: We need to allow for the possibility that the property aleady has storage heaters, but just
# needs the controls
self.recommend_hhr_storage_heaters(phase=phase, system_change=True, heating_controls_only=False)
# if the property has mains heating with boiler and radiators, we recommend optimal heating controls
# 1) if the property has mains heating with boiler and radiators, we recommend optimal heating controls
has_boiler = self.property.main_heating["clean_description"] in ["Boiler and radiators, mains gas"]
# We also check that the property doesn't have a heating system, but it has access to the mains gas
# 2) If the property doesn't have a heating system, but it has access to the mains gas
no_heating_has_mains = self.property.main_heating["clean_description"] in [
'No system present, electric heaters assumed'
] and self.property.data["mains-gas-flag"]
has_gas_heaters = (
self.property.main_heating["clean_description"] in ["Room heaters, mains gas"] and
# The property is using portable heaters and has access to gas mains
has_room_heaters = (
self.property.main_heating["clean_description"] in ["Room heaters, mains gas", "Room heaters, electric"] and
self.property.data["mains-gas-flag"]
)
@ -91,13 +75,66 @@ class HeatingRecommender:
self.property.data["mains-gas-flag"]
)
if (
has_boiler or
no_heating_has_mains or
electic_heating_has_mains or
has_gas_heaters or
portable_heaters_has_mains
):
is_valid = (
(
has_boiler or
no_heating_has_mains or
electic_heating_has_mains or
has_room_heaters or
portable_heaters_has_mains
) and
(not ashp_only_heating_recommendation) and
("boiler_upgrade" not in exclusions)
)
return is_valid, has_boiler
def recommend(self, has_cavity_or_loft_recommendations, phase=0, exclusions=None):
"""
Produces heating recommendations
:param has_cavity_or_loft_recommendations: boolean indicating if we have produced a cavity or loft insulation
recommendation. If there are cavity or loft recommendations, the property would need to complete those measures
before being able to get the boiler upgrade scheme benefits. The messaging in the front end would be to
:param phase: indicates the phase of the retrofit programme
:param exclusions: A list of exclusions for the recommendations
"""
# TODO: We could have a system flush recommendation for an existing boiler, where there is no need to replace
# the boiler, but instead flushing the system will make it run more efficiently. There is a cost for this
# in the Costs class, stored as SYSTEM_FLUSH_COST
# TODO: Right now, we don't have recommendations for electric boilers - we should probably have one
exclusions = [] if exclusions is None else exclusions
non_invasive_ashp_recommendation = next(
(r for r in self.property.non_invasive_recommendations if r["type"] == "air_source_heat_pump"),
{"suitable": True}
)
# We allow for the non-invasive recommendation to be that ASHP is not suitable
# This option will prevent other heating recommendations from being specified, other than an ASHP
ashp_only_heating_recommendation = non_invasive_ashp_recommendation.get(
"ashp_only_heating_recommendation", False
)
self.heating_recommendations = []
self.heating_control_recommendations = []
# This first iteration of the recommender will provide very basic recommendation
# We recommend heating controls based on the main heating system
hhr_valid = self.is_high_heat_retention_valid(ashp_only_heating_recommendation, exclusions)
if hhr_valid:
# Recommend high heat retention storage heaters
# TODO: We need to allow for the possibility that the property aleady has storage heaters, but just
# needs the controls
self.recommend_hhr_storage_heaters(phase=phase, system_change=True, heating_controls_only=False)
gas_boiler_suitable, has_boiler = self.is_boiler_upgrade_suitable(
exclusions=exclusions, ashp_only_heating_recommendation=ashp_only_heating_recommendation
)
if gas_boiler_suitable:
# This indicates that the home previously did not have a boiler in place and so would require
# an overhaul to the system - right now, this is all reasons, apart from if there is an existing boiler
system_change = not has_boiler
@ -116,9 +153,11 @@ class HeatingRecommender:
# In the future, we'll allow overrides, so that non-intrusive surveys can contradict these conditions
# and either allow or prevent the recommendation of an air source heat pump
if self.property.is_ashp_valid(exclusions=exclusions):
if self.property.is_ashp_valid(exclusions=exclusions) and non_invasive_ashp_recommendation["suitable"]:
self.recommend_air_source_heat_pump(
phase=phase, has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations
phase=phase,
has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations,
)
return
@ -194,14 +233,21 @@ class HeatingRecommender:
:return:
"""
# Look for a non-intrusive recommendation
non_intrusive_recommendation = next((
r for r in self.property.non_invasive_recommendations if r["type"] == "air_source_heat_pump"
), {})
controls_recommender = HeatingControlRecommender(self.property)
controls_recommender.recommend(heating_description="Air source heat pump, radiators, electric")
ashp_costs = self.costs.air_source_heat_pump()
# We add the costs of the heating controls, onto each key in the costs dictionary
if controls_recommender.recommendation:
for key in ashp_costs:
ashp_costs[key] += controls_recommender.recommendation[0][key]
if non_intrusive_recommendation:
# Update with non-intrusive recommendation
if non_intrusive_recommendation.get("cost"):
ashp_costs.update(
{"total": non_intrusive_recommendation["cost"], "subtotal": None, "vat": None}
)
already_installed = "air_source_heat_pump" in self.property.already_installed
@ -213,6 +259,14 @@ class HeatingRecommender:
if already_installed:
ashp_costs = override_costs(ashp_costs)
if non_intrusive_recommendation and not all([x is None for x in controls_recommendations]):
# We just use the ttzc control
controls_recommendations = [
x for x in controls_recommendations if (
x["description_simulation"]["mainheatcont-description"] == "Time and temperature zone control"
)
]
# This is a map from the heating controls description to the description of the air source heat pump set up
ashp_descriptions = {
"Time and temperature zone control": (
@ -233,7 +287,8 @@ class HeatingRecommender:
if controls_rec:
for key in ashp_costs_with_controls:
ashp_costs_with_controls[key] += controls_rec[key]
if ashp_costs_with_controls[key] is not None:
ashp_costs_with_controls[key] += controls_rec[key]
if controls_rec is None:
description = "Install an air source heat pump."
@ -245,19 +300,19 @@ class HeatingRecommender:
# If the property does not have existing cavity and loft insulation, we include a note that the cost
# includes the boiler upgrade scheme and that the cavity and loft need to be treated, to ensure access
# to the funding
if has_cavity_or_loft_recommendations:
description = description + (
f" The cost includes the £"
f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant. "
f"You must ensure that the property has an insulated cavity and "
f"270mm+ loft insulation to qualify for the grant"
)
else:
description = description + (
f" The cost includes the £{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant"
)
if not non_intrusive_recommendation:
if has_cavity_or_loft_recommendations:
description = description + (
f" The cost includes the £"
f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant. "
f"You must ensure that the property has an insulated cavity and "
f"270mm+ loft insulation to qualify for the grant"
)
else:
description = description + (
f" The cost includes the £{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant"
)
print("TEMP UPDATED FOR 77 Perryn!!!!!")
simulation_config = {
"mainheat_energy_eff_ending": "Good",
"hot_water_energy_eff_ending": "Good"

View file

@ -1,4 +1,5 @@
import pandas as pd
import numpy as np
from backend.Property import Property
from typing import List
from itertools import groupby
@ -15,6 +16,10 @@ from recommendations.HotwaterRecommendations import HotwaterRecommendations
from recommendations.SecondaryHeating import SecondaryHeating
from backend.ml_models.AnnualBillSavings import AnnualBillSavings
from backend.apis.GoogleSolarApi import GoogleSolarApi
import backend.app.assumptions as assumptions
ASHP_COP = 3
STARTING_DUMMY_ID_VALUE = -9999
class Recommendations:
@ -66,7 +71,7 @@ class Recommendations:
# Building Fabric
if "wall_insulation" not in self.exclusions:
self.wall_recomender.recommend(phase=phase)
self.wall_recomender.recommend(phase=phase, exclusions=self.exclusions)
if self.wall_recomender.recommendations:
property_recommendations.append(self.wall_recomender.recommendations)
phase += 1
@ -359,475 +364,381 @@ class Recommendations:
property_instance,
all_predictions,
recommendations,
representative_recommendations,
energy_consumption_client
):
"""
Given predictions from the model apis, with method will update the recommendations with the predicted
impact of the recommendation on the property
This function will return two objects:
1) Updated recommendations with the predicted impact of the recommendation
2) A list of impacts by phase, which will be used for the kwh model scoring
:param property_instance: Instance of the Property class, for the home associated to property_id
:param all_predictions: dictionary of predictions from the model apis
:param recommendations: dictionary of recommendations for the property
:param representative_recommendations: dictionary of representative recommendations for the property
:param energy_consumption_client: Instance of the EnergyConsumptionClient class
:return:
"""
property_sap_predictions = all_predictions["sap_change_predictions"][
all_predictions["sap_change_predictions"]["property_id"] == str(property_instance.id)
].copy()
property_heat_predictions = all_predictions["heat_demand_predictions"][
all_predictions["heat_demand_predictions"]["property_id"] == str(property_instance.id)
].copy()
property_carbon_predictions = all_predictions["carbon_change_predictions"][
all_predictions["carbon_change_predictions"]["property_id"] == str(property_instance.id)
].copy()
property_lighting_cost_predictions = all_predictions["lighting_cost_predictions"][
all_predictions["lighting_cost_predictions"]["property_id"] == str(property_instance.id)
].copy()
property_heating_cost_predictions = all_predictions["heating_cost_predictions"][
all_predictions["heating_cost_predictions"]["property_id"] == str(property_instance.id)
].copy()
property_hot_water_cost_predictions = all_predictions["hot_water_cost_predictions"][
all_predictions["hot_water_cost_predictions"]["property_id"] == str(property_instance.id)
].copy()
# We apply adjustments to each of the heating costs
property_lighting_cost_predictions["adjusted_cost"] = property_lighting_cost_predictions["predictions"].apply(
lambda x: AnnualBillSavings.adjust_energy_to_metered(
x, current_epc_rating=property_instance.data["current-energy-rating"]
)
)
property_heating_cost_predictions["adjusted_cost"] = property_heating_cost_predictions["predictions"].apply(
lambda x: AnnualBillSavings.adjust_energy_to_metered(
x, current_epc_rating=property_instance.data["current-energy-rating"]
)
)
property_hot_water_cost_predictions["adjusted_cost"] = property_hot_water_cost_predictions["predictions"].apply(
lambda x: AnnualBillSavings.adjust_energy_to_metered(
x, current_epc_rating=property_instance.data["current-energy-rating"]
)
)
property_predictions = {
prefix + "_predictions": all_predictions[prefix + "_predictions"][
all_predictions[prefix + "_predictions"]["property_id"] == str(property_instance.id)
].copy() for prefix in ["sap_change", "heat_demand", "carbon_change"]
}
property_recommendations = recommendations[property_instance.id].copy()
# We calculate the impact by phase
sap_phase_impact = property_sap_predictions.groupby("phase")["predictions"].median().reset_index()
heat_phase_impact = property_heat_predictions.groupby("phase")["predictions"].median().reset_index()
carbon_phase_impact = property_carbon_predictions.groupby("phase")["predictions"].median().reset_index()
# lighting_cost_phase_impact = (
# property_lighting_cost_predictions.groupby("phase")[["adjusted_cost", "predictions"]].median(
# ).reset_index()
# )
heating_cost_phase_impact = (
property_heating_cost_predictions.groupby("phase")[["adjusted_cost", "predictions"]].median().reset_index()
)
hot_water_cost_phase_impact = (
property_hot_water_cost_predictions.groupby("phase")[
["adjusted_cost", "predictions"]
].median().reset_index()
)
increasing_variables = ["sap"]
decreasing_variables = ["carbon", "heat_demand"]
representative_rec_ids = [
rec["recommendation_id"] for rec in representative_recommendations[property_instance.id]
]
phase_lighting_costs = {}
phase_kwh_figures = {}
bill_savings_list = []
kwh_savings_list = []
impact_summary = []
for recommendations_by_type in property_recommendations:
for rec in recommendations_by_type:
if rec["type"] == "mechanical_ventilation":
# We don't have a percieved sap impact of mechanical ventilation
continue
new_heat_demand = property_heat_predictions[property_heat_predictions["recommendation_id"] == str(
rec["recommendation_id"]
)]["predictions"].values[0]
new_carbon = property_carbon_predictions[property_carbon_predictions["recommendation_id"] == str(
rec["recommendation_id"]
)]["predictions"].values[0]
new_sap = property_sap_predictions[property_sap_predictions["recommendation_id"] == str(
rec["recommendation_id"]
)]["predictions"].values[0]
# Lighting costs won't change unless we have a lighting recommendation
new_lighting_cost_data = property_lighting_cost_predictions[
property_lighting_cost_predictions["recommendation_id"] == str(rec["recommendation_id"])
]
new_lighting_cost = new_lighting_cost_data["adjusted_cost"].values[0]
new_lighting_cost_unadjusted = new_lighting_cost_data["predictions"].values[0]
new_heating_cost_data = property_heating_cost_predictions[
property_heating_cost_predictions["recommendation_id"] == str(rec["recommendation_id"])
]
new_heating_cost = new_heating_cost_data["adjusted_cost"].values[0]
new_heating_cost_unadjusted = new_heating_cost_data["predictions"].values[0]
new_hot_water_cost_data = property_hot_water_cost_predictions[
property_hot_water_cost_predictions["recommendation_id"] == str(rec["recommendation_id"])
]
new_hot_water_cost = new_hot_water_cost_data["adjusted_cost"].values[0]
new_hot_water_cost_unadjusted = new_hot_water_cost_data["predictions"].values[0]
phase_energy_efficiency_metrics = {
prefix: property_predictions[prefix + "_predictions"][
property_predictions[prefix + "_predictions"]["recommendation_id"] == str(
rec["recommendation_id"]
)]["predictions"].values[0] for prefix in ["sap_change", "heat_demand", "carbon_change"]
}
# We structure this so that depending on the phase, we capture the previous phase impacts and
# then just have one piece of code to calculate the difference
if rec["phase"] == 0:
predicted_sap_points = new_sap - float(property_instance.data["current-energy-efficiency"])
predicted_co2_savings = float(property_instance.data["co2-emissions-current"]) - new_carbon
predicted_heat_demand = property_instance.floor_area * (
float(property_instance.data["energy-consumption-current"]) - new_heat_demand
)
if rec["type"] == "lighting":
new_heating_cost = property_instance.energy_cost_estimates["adjusted"]["heating"]
new_hot_water_cost = property_instance.energy_cost_estimates["adjusted"]["hot_water"]
new_lighting_cost = min(
new_lighting_cost, property_instance.energy_cost_estimates["adjusted"]["lighting"]
)
scoring_heating_cost = property_instance.energy_cost_estimates["unadjusted"]["heating"]
scoring_hot_water_cost = property_instance.energy_cost_estimates["unadjusted"]["hot_water"]
scoring_lighting_cost = min(
property_instance.energy_cost_estimates["unadjusted"]["lighting"],
new_lighting_cost_unadjusted
)
else:
new_heating_cost = min(
new_heating_cost, property_instance.energy_cost_estimates["adjusted"]["heating"]
)
new_hot_water_cost = min(
new_hot_water_cost, property_instance.energy_cost_estimates["adjusted"]["hot_water"]
)
new_lighting_cost = property_instance.energy_cost_estimates["adjusted"]["lighting"]
scoring_heating_cost = min(
property_instance.energy_cost_estimates["unadjusted"]["heating"],
new_heating_cost_unadjusted
)
scoring_hot_water_cost = min(
property_instance.energy_cost_estimates["unadjusted"]["hot_water"],
new_hot_water_cost_unadjusted
)
scoring_lighting_cost = property_instance.energy_cost_estimates["unadjusted"]["lighting"]
predicted_heating_cost_reduction = (
property_instance.energy_cost_estimates["adjusted"]["heating"] - new_heating_cost
)
predicted_hot_water_cost_reduction = (
property_instance.energy_cost_estimates["adjusted"]["hot_water"] - new_hot_water_cost
)
predicted_lighting_cost_reduction = 0 if rec["type"] != "lighting" else (
property_instance.energy_cost_estimates["adjusted"]["lighting"] - new_lighting_cost
)
# We store this value for later
phase_lighting_costs[rec["phase"]] = {
"adjusted": new_lighting_cost,
"unadjusted": scoring_lighting_cost
}
# We now predict the kwh savings using the xgb model
simulation_epc = property_instance.simulation_epcs[rec["phase"]].copy()
# The current heating, hot water and energy kwh should be based on the new, unadjusted
# costs for lighting, heating, hot water
simulation_epc["heating-cost-current"] = int(scoring_heating_cost)
simulation_epc["hot-water-cost-current"] = int(scoring_hot_water_cost)
simulation_epc["lighting-cost-current"] = int(scoring_lighting_cost)
# We predict with the energy consumption model
scoring_df = pd.DataFrame([simulation_epc])
# Change columns from underscores to hyphens
scoring_df.columns = [
x.lower().replace("_", "-") for x in scoring_df.columns
]
for col in ["heating_kwh", "hot_water_kwh"]:
scoring_df[col] = None
energy_consumption_client.data = None
new_heating_kwh = energy_consumption_client.score_new_data(
new_data=scoring_df, target="heating_kwh"
)[0]
new_hot_water_kwh = energy_consumption_client.score_new_data(
new_data=scoring_df, target="hot_water_kwh"
)[0]
# Adjust these figures
new_heating_kwh_adjusted = AnnualBillSavings.adjust_energy_to_metered(
new_heating_kwh, current_epc_rating=property_instance.data["current-energy-rating"]
)
new_hot_water_kwh_adjusted = AnnualBillSavings.adjust_energy_to_metered(
new_hot_water_kwh, current_epc_rating=property_instance.data["current-energy-rating"]
)
heating_kwh_reduction = 0 if predicted_heating_cost_reduction == 0 else (
property_instance.energy_consumption_estimates["adjusted"]["heating"] - new_heating_kwh_adjusted
)
hot_water_kwh_reduction = 0 if predicted_hot_water_cost_reduction == 0 else (
property_instance.energy_consumption_estimates["adjusted"]["hot_water"] -
new_hot_water_kwh_adjusted
)
lighting_kwh_reduction = predicted_lighting_cost_reduction / AnnualBillSavings.ELECTRICITY_PRICE_CAP
(
predicted_appliances_cost_reduction,
predicted_appliances_kwh_reduction
) = cls._calculate_appliance_solar_savings(
rec=rec,
property_instance=property_instance,
heating_kwh_reduction=heating_kwh_reduction,
hot_water_kwh_reduction=hot_water_kwh_reduction,
lighting_kwh_reduction=lighting_kwh_reduction
)
kwh_reduction = (
heating_kwh_reduction +
hot_water_kwh_reduction +
lighting_kwh_reduction +
predicted_appliances_kwh_reduction
)
predicted_bill_savings = (
predicted_heating_cost_reduction +
predicted_hot_water_cost_reduction +
predicted_lighting_cost_reduction +
predicted_appliances_cost_reduction
)
phase_kwh_figures[rec["phase"]] = {
"adjusted": {
"heating": new_heating_kwh_adjusted,
"hot_water": new_hot_water_kwh_adjusted
},
"unadjusted": {
"heating": new_heating_kwh,
"hot_water": new_hot_water_kwh
}
# These are just the starting values, from the EPC. When we score the ML models,
# heating_cost_starting and heating_cost_ending are just the values in the EPC. However, with
# heating_cost_ending, we expect that the EPC will predict a heating cost based on what would happen
# if we implemented the recommendation today, so our starting value is the EPC
previous_phase_values = {
"sap": float(property_instance.data["current-energy-efficiency"]),
"carbon": float(property_instance.data["co2-emissions-current"]),
"heat_demand": float(property_instance.data["energy-consumption-current"]),
}
else:
previous_phase = rec["phase"] - 1
predicted_sap_points = (
new_sap - sap_phase_impact[sap_phase_impact["phase"] == previous_phase]["predictions"].values[0]
)
predicted_co2_savings = (
carbon_phase_impact[carbon_phase_impact["phase"] == previous_phase]["predictions"].values[0] -
new_carbon
)
predicted_heat_demand = property_instance.floor_area * (
heat_phase_impact[heat_phase_impact["phase"] == previous_phase]["predictions"].values[0] -
new_heat_demand
)
if rec["type"] == "lighting":
# If we have a lighting recommendation, the heating, hot water and lighting costs will
# be from the previous phase - nothing will change
new_heating_cost = heating_cost_phase_impact[
heating_cost_phase_impact["phase"] == previous_phase
]["adjusted_cost"].values[0]
new_hot_water_cost = hot_water_cost_phase_impact[
hot_water_cost_phase_impact["phase"] == previous_phase
]["adjusted_cost"].values[0]
previous_phase_values_multiple = [x for x in impact_summary if x["phase"] == (rec["phase"] - 1)]
if len(previous_phase_values_multiple) != 1:
# Take an average of each of the previous phases
keys_to_median = ["sap", "carbon", "heat_demand"]
previous_phase_values = {}
for key in keys_to_median:
values = [item[key] for item in previous_phase_values_multiple]
previous_phase_values[key] = np.median(values)
new_lighting_cost = min(
new_lighting_cost, phase_lighting_costs[previous_phase]["adjusted"]
)
# We also use the unadjusted costs for the scoring from the previous phase
scoring_heating_cost = heating_cost_phase_impact[
heating_cost_phase_impact["phase"] == previous_phase
]["predictions"].values[0]
scoring_hot_water_cost = hot_water_cost_phase_impact[
hot_water_cost_phase_impact["phase"] == previous_phase
]["predictions"].values[0]
scoring_lighting_cost = min(
new_lighting_cost_unadjusted,
phase_lighting_costs[previous_phase]["unadjusted"]
)
else:
# Whereas for other recommendations, we use the new costs
new_heating_cost = min(
new_heating_cost,
heating_cost_phase_impact[
heating_cost_phase_impact["phase"] == previous_phase
]["adjusted_cost"].values[0]
previous_phase_values = previous_phase_values_multiple[0]
# We extract the values for the current phase
current_phase_values = {
"sap": phase_energy_efficiency_metrics["sap_change"],
"carbon": phase_energy_efficiency_metrics["carbon_change"],
"heat_demand": phase_energy_efficiency_metrics["heat_demand"],
}
# For increasing variables, the new value needs to be higher than the previous, otherwise we set it to
# the previous
# For decreasing variables, the new value should be lower than the previous, otherwise we set it to
# the previous
# In either case, we adjudge the recommendation to have had no/negligible impact
for v in increasing_variables:
current_phase_values[v] = (
current_phase_values[v] if current_phase_values[v] > previous_phase_values[v] else
previous_phase_values[v]
)
for v in previous_phase_values:
if v in decreasing_variables:
current_phase_values[v] = (
current_phase_values[v] if current_phase_values[v] < previous_phase_values[v] else
previous_phase_values[v]
)
new_hot_water_cost = min(
new_hot_water_cost,
hot_water_cost_phase_impact[
hot_water_cost_phase_impact["phase"] == previous_phase
]["adjusted_cost"].values[0]
)
new_lighting_cost = phase_lighting_costs[previous_phase]["adjusted"]
scoring_heating_cost = min(
new_heating_cost_unadjusted,
heating_cost_phase_impact[
heating_cost_phase_impact["phase"] == previous_phase
]["predictions"].values[0]
)
scoring_hot_water_cost = min(
new_hot_water_cost_unadjusted,
hot_water_cost_phase_impact[
hot_water_cost_phase_impact["phase"] == previous_phase
]["predictions"].values[0]
)
scoring_lighting_cost = phase_lighting_costs[previous_phase]["unadjusted"]
# We now estimate the adjusted cost savings for the recommendation
predicted_heating_cost_reduction = (
heating_cost_phase_impact[heating_cost_phase_impact["phase"] == previous_phase][
"adjusted_cost"
].values[0] - new_heating_cost
)
predicted_hot_water_cost_reduction = (
hot_water_cost_phase_impact[hot_water_cost_phase_impact["phase"] == previous_phase][
"adjusted_cost"
].values[0] - new_hot_water_cost
)
# Only lighting recommendations can have an impact here
predicted_lighting_cost_reduction = (
phase_lighting_costs[previous_phase]["adjusted"] - new_lighting_cost
)
# We now predict the kwh savings using the xgb model - this is based on
# the new costs at this phase
simulation_epc = property_instance.simulation_epcs[rec["phase"]].copy()
# The current heating, hot water and energy kwh should be based on the new, unadjusted
# costs for lighting, heating, hot water
simulation_epc["heating-cost-current"] = int(scoring_heating_cost)
simulation_epc["hot-water-cost-current"] = int(scoring_hot_water_cost)
simulation_epc["lighting-cost-current"] = int(scoring_lighting_cost)
# We predict with the energy consumption model
scoring_df = pd.DataFrame([simulation_epc])
# Change columns from underscores to hyphens
scoring_df.columns = [
x.lower().replace("_", "-") for x in scoring_df.columns
]
for col in ["heating_kwh", "hot_water_kwh"]:
scoring_df[col] = None
energy_consumption_client.data = None
new_heating_kwh = energy_consumption_client.score_new_data(
new_data=scoring_df, target="heating_kwh"
)[0]
new_hot_water_kwh = energy_consumption_client.score_new_data(
new_data=scoring_df, target="hot_water_kwh"
)[0]
# Adjust these figures
new_heating_kwh_adjusted = AnnualBillSavings.adjust_energy_to_metered(
new_heating_kwh, current_epc_rating=property_instance.data["current-energy-rating"]
)
new_hot_water_kwh_adjusted = AnnualBillSavings.adjust_energy_to_metered(
new_hot_water_kwh, current_epc_rating=property_instance.data["current-energy-rating"]
)
heating_kwh_reduction = 0 if predicted_heating_cost_reduction == 0 else (
phase_kwh_figures[previous_phase]["adjusted"]["heating"] - new_heating_kwh_adjusted
)
if heating_kwh_reduction < 0:
heating_kwh_reduction = 0
hot_water_kwh_reduction = 0 if predicted_hot_water_cost_reduction == 0 else (
phase_kwh_figures[previous_phase]["adjusted"]["hot_water"] - new_hot_water_kwh_adjusted
)
if hot_water_kwh_reduction < 0:
hot_water_kwh_reduction = 0
lighting_kwh_reduction = predicted_lighting_cost_reduction / AnnualBillSavings.ELECTRICITY_PRICE_CAP
(
predicted_appliances_cost_reduction,
predicted_appliances_kwh_reduction
) = cls._calculate_appliance_solar_savings(
rec=rec,
property_instance=property_instance,
heating_kwh_reduction=heating_kwh_reduction,
hot_water_kwh_reduction=hot_water_kwh_reduction,
lighting_kwh_reduction=lighting_kwh_reduction
)
# We now calculate the predicted_bill_savings
predicted_bill_savings = (
predicted_heating_cost_reduction + predicted_hot_water_cost_reduction +
predicted_lighting_cost_reduction + predicted_appliances_cost_reduction
)
kwh_reduction = (
heating_kwh_reduction +
hot_water_kwh_reduction +
lighting_kwh_reduction +
predicted_appliances_kwh_reduction
)
# We store this value for later
phase_lighting_costs[rec["phase"]] = {
"adjusted": new_lighting_cost,
"unadjusted": scoring_lighting_cost
}
phase_kwh_figures[rec["phase"]] = {
"adjusted": {
"heating": new_heating_kwh_adjusted,
"hot_water": new_hot_water_kwh_adjusted
},
"unadjusted": {
"heating": new_heating_kwh,
"hot_water": new_hot_water_kwh
}
}
property_phase_impact = {
# Increasing
"sap": current_phase_values["sap"] - previous_phase_values["sap"],
# Decreasing
"carbon": previous_phase_values["carbon"] - current_phase_values["carbon"],
# Decreasing
"heat_demand": previous_phase_values["heat_demand"] - current_phase_values["heat_demand"],
}
# Prevent from being negative
predicted_sap_points = 0 if predicted_sap_points < 0 else predicted_sap_points
predicted_co2_savings = 0 if predicted_co2_savings < 0 else predicted_co2_savings
predicted_heat_demand = 0 if predicted_heat_demand < 0 else predicted_heat_demand
for metric in ["sap", "carbon", "heat_demand"]:
property_phase_impact[metric] = (
0 if property_phase_impact[metric] < 0 else property_phase_impact[metric]
)
if metric == "sap":
property_phase_impact[metric] = round(property_phase_impact[metric], 2)
# For the moment, we cap the number of SAP points that can be achieved by LEDs at 2
if rec["type"] == "low_energy_lighting":
# For the moment, we cap the number of SAP points that can be achieved by ventilation at 2
rec["sap_points"] = min(predicted_sap_points, LightingRecommendations.SAP_LIMIT)
rec["co2_equivalent_savings"] = min(predicted_co2_savings, rec["co2_equivalent_savings"])
rec["heat_demand"] = predicted_heat_demand
else:
rec["sap_points"] = predicted_sap_points
rec["co2_equivalent_savings"] = predicted_co2_savings
rec["heat_demand"] = predicted_heat_demand
property_phase_impact["sap"] = min(property_phase_impact["sap"], LightingRecommendations.SAP_LIMIT)
property_phase_impact["carbon"] = min(
property_phase_impact["carbon"], rec["co2_equivalent_savings"]
)
# Round to 2 decimal places
rec["sap_points"] = round(rec["sap_points"], 2)
# Insert this information into the recommendation
rec["sap_points"] = property_phase_impact["sap"]
rec["co2_equivalent_savings"] = property_phase_impact["carbon"]
rec["heat_demand"] = property_phase_impact["heat_demand"]
rec["kwh_savings"] = kwh_reduction
rec["energy_cost_savings"] = predicted_bill_savings
if rec["recommendation_id"] in representative_rec_ids:
bill_savings_list.append(predicted_bill_savings)
kwh_savings_list.append(kwh_reduction)
if (rec["sap_points"] is None) and (rec["co2_equivalent_savings"] is None) or (
rec["heat_demand"] is None) or (rec["energy_cost_savings"] is None):
if (
(rec["sap_points"] is None) and (rec["co2_equivalent_savings"] is None) or
(rec["heat_demand"] is None)
):
raise ValueError("sap points, co2 or heat demand is missing")
# We sum up the total savings for the property and that is our expected energy bill
impact_summary.append(
{
"phase": rec["phase"],
"recommendation_id": rec["recommendation_id"],
**current_phase_values
}
)
expected_energy_bill = property_instance.current_energy_bill - sum(bill_savings_list)
expected_adjusted_energy = property_instance.current_adjusted_energy - sum(kwh_savings_list)
return property_recommendations, impact_summary
return (
property_recommendations,
expected_adjusted_energy,
expected_energy_bill
@staticmethod
def map_descriptions_to_fuel(heating_description, hotwater_description, main_fuel_description):
# Handle the case of community schemes
if (heating_description == "Community scheme") or (hotwater_description == "Community scheme"):
if main_fuel_description == "mains gas (community)":
return {
"heating_fuel_type": "Natural Gas (Community Scheme)",
"hotwater_fuel_type": "Natural Gas (Community Scheme)",
"heating_cop": 1,
"hotwater_cop": 1
}
raise NotImplementedError("Handle this case")
mapped = assumptions.DESCRIPTIONS_TO_FUEL_TYPES[heating_description]
heating_fuel = mapped["fuel"]
if hotwater_description in [
"From main system", "From main system, no cylinder thermostat",
]:
return {
"heating_fuel_type": heating_fuel, "hotwater_fuel_type": heating_fuel,
"heating_cop": mapped["cop"], "hotwater_cop": mapped["cop"]
}
if hotwater_description in [
"From main system, plus solar", "From main system, plus solar, no cylinder thermostat"
]:
# The fuel is
return {
"heating_fuel_type": heating_fuel, "hotwater_fuel_type": heating_fuel + " + Solar Thermal",
"heating_cop": mapped["cop"], "hotwater_cop": 1
}
mapped_hotwater = DESCRIPTIONS_TO_FUEL_TYPES[hotwater_description]
return {
"heating_fuel_type": heating_fuel, "hotwater_fuel_type": mapped_hotwater["fuel"],
"heating_cop": mapped["cop"], "hotwater_cop": mapped_hotwater["cop"]
}
@classmethod
def calculate_recommendation_tenant_savings(
cls, property_instance, kwh_simulation_predictions, property_recommendations
):
"""
This method inserts the kwh savings and the bill savings that the customer will make from the recommendations
based on the predictions from the ML model
:param property_instance: Instance of the Property class, for the home associated to property_id
:param kwh_simulation_predictions: dictionary of predictions from the model apis
:param property_recommendations: dictionary of recommendations for the property
:return:
"""
kwh_impact_table = kwh_simulation_predictions["heating_kwh_predictions"][
kwh_simulation_predictions["heating_kwh_predictions"]["property_id"] == str(property_instance.id)
].merge(
kwh_simulation_predictions["hotwater_kwh_predictions"].drop(
columns=["property_id", "recommendation_id", "phase"]
),
how="inner",
on="id",
suffixes=("_heating", "_hotwater")
).reset_index(drop=True)
# We adjust this table with the kwh estimates for low energy lighting kwh values, and solar kwh estimates
led_recommendation = pd.DataFrame([
{
"phase": r["phase"],
"recommendation_id": r["recommendation_id"],
"lighting_kwh_savings": r["kwh_savings"]
} for recs in property_recommendations for r in recs if r["type"] == "low_energy_lighting"
], columns=["phase", "recommendation_id", "lighting_kwh_savings"])
solar_recommendations = pd.DataFrame([
{
"phase": r["phase"],
"recommendation_id": r["recommendation_id"],
"solar_kwh_savings": r["initial_ac_kwh_per_year"] * assumptions.SOLAR_CONSUMPTION_PROPORTION,
} for recs in property_recommendations for r in recs if r["type"] == "solar_pv"
], columns=["phase", "recommendation_id", "solar_kwh_savings"])
# merge them on
kwh_impact_table = kwh_impact_table.merge(
led_recommendation, how="left", on=["phase", "recommendation_id"]
).merge(
solar_recommendations, how="left", on=["phase", "recommendation_id"]
)
property_kwh = property_instance.energy_consumption_estimates["unadjusted"]
kwh_impact_table = pd.concat(
[
pd.DataFrame(
[
{
"id": STARTING_DUMMY_ID_VALUE,
"phase": STARTING_DUMMY_ID_VALUE,
"recommendation_id": STARTING_DUMMY_ID_VALUE,
"predictions_heating": property_kwh["heating"],
"predictions_hotwater": property_kwh["hot_water"],
}
]
),
kwh_impact_table
]
).sort_values(["phase", "recommendation_id"], ascending=True).reset_index(drop=True)
for i in range(0, len(kwh_impact_table)):
current_phase = kwh_impact_table.loc[i, 'phase']
previous_phase_id = (current_phase - 1) if (current_phase > 0) else -9999
previous_phase = kwh_impact_table[kwh_impact_table['phase'] == previous_phase_id]
if not previous_phase.empty:
for col in ["predictions_heating", "predictions_hotwater"]:
if kwh_impact_table.loc[i, col] > previous_phase[col].max():
kwh_impact_table.loc[i, col] = previous_phase[col].max()
# For heating system recommendations, this could result in a fuel type change so we reflect that
fuel_mapping = pd.DataFrame([
{
"id": epc["id"],
**cls.map_descriptions_to_fuel(
epc["mainheat-description"], epc["hotwater-description"], epc["main-fuel"]
)
} for epc in property_instance.updated_simulation_epcs
])
fuel_mapping = pd.concat(
[
pd.DataFrame(
[
{
"id": STARTING_DUMMY_ID_VALUE,
**cls.map_descriptions_to_fuel(
property_instance.data["mainheat-description"],
property_instance.data["hotwater-description"],
property_instance.data["main-fuel"]
)
}
]
),
fuel_mapping
]
)
kwh_impact_table = kwh_impact_table.merge(
fuel_mapping, how="left", on="id"
).sort_values(["phase", "recommendation_id"], ascending=True).reset_index(drop=True)
if (pd.isnull(kwh_impact_table["heating_fuel_type"]).sum() or
pd.isnull(kwh_impact_table["hotwater_fuel_type"]).sum()):
raise Exception("Fuel type is missing")
# kwh_impact_table["heating_fuel_type"] = np.where(
# kwh_impact_table["id"] == STARTING_DUMMY_ID_VALUE,
# property_instance.heating_energy_source,
# kwh_impact_table["heating_fuel_type"]
# )
#
# kwh_impact_table["hotwater_fuel_type"] = np.where(
# kwh_impact_table["id"] == STARTING_DUMMY_ID_VALUE,
# property_instance.hot_water_energy_source,
# kwh_impact_table["hotwater_fuel_type"]
# )
# We now calculate the fuel cost
for k in ["heating", "hotwater"]:
kwh_impact_table[f"{k}_cost"] = kwh_impact_table.apply(
lambda x: AnnualBillSavings.calculate_recommendation_fuel_cost(
x[f"predictions_{k}"], x[f"{k}_fuel_type"], x[f"{k}_cop"]
), axis=1
)
# We now deduce if any of the recommendations result in a change of fuel type
for recs in property_recommendations:
for rec in recs:
if rec["type"] == "mechanical_ventilation":
continue
rec_impact = kwh_impact_table[kwh_impact_table["recommendation_id"] == rec["recommendation_id"]]
prevous_phase_id = (rec["phase"] - 1) if (rec["phase"] > 0) else STARTING_DUMMY_ID_VALUE
previous_phase_impact = kwh_impact_table[kwh_impact_table["phase"] == prevous_phase_id]
if rec["type"] == "solar_pv":
rec["kwh_savings"] = rec_impact["solar_kwh_savings"].values[0]
rec["energy_cost_savings"] = (
rec_impact["solar_kwh_savings"].values[0] * AnnualBillSavings.ELECTRICITY_PRICE_CAP
)
continue
heating_kwh_savings = (
previous_phase_impact["predictions_heating"].mean() - rec_impact["predictions_heating"].values[0]
)
heating_cost_savings = (
previous_phase_impact["heating_cost"].mean() - rec_impact["heating_cost"].values[0]
)
hotwater_kwh_savings = (
previous_phase_impact["predictions_hotwater"].mean() - rec_impact["predictions_hotwater"].values[0]
)
hotwater_host = (
previous_phase_impact["hotwater_cost"].mean() - rec_impact["hotwater_cost"].values[0]
)
total_kwh_savings = heating_kwh_savings + hotwater_kwh_savings
energy_cost_savings = heating_cost_savings + hotwater_host
if rec["type"] == "lighting":
# In this case, we should probably just SKIP but check when we have one!
raise Exception("Implement me 3")
rec["kwh_savings"] = total_kwh_savings
rec["energy_cost_savings"] = energy_cost_savings
# Finally, we set the current energy bill
# For a community scheme, there is a standing charge but it's based on the operational cost of the network
# and therefore is likely different to the typical standing charge. This will be a cost typically defined
# by the network operator and often a building, whose residents are on a heat network, where the building
# operator will purchase energy from the network and re-sell it to the residents
starting_figures = kwh_impact_table[kwh_impact_table["id"] == STARTING_DUMMY_ID_VALUE].squeeze()
gas_standing_charge = 0
if (
(starting_figures["heating_fuel_type"] in ["Natural Gas", "Natural Gas (Community Scheme)"]) or
(starting_figures["hotwater_fuel_type"] == ["Natural Gas", "Natural Gas (Community Scheme)"])
):
gas_standing_charge = AnnualBillSavings.DAILY_STANDARD_CHARGE_GAS * 365
electricity_standing_charge = AnnualBillSavings.DAILY_STANDARD_CHARGE_ELECTRICITY * 365
current_energy_bill = (
starting_figures["heating_cost"] +
starting_figures["hotwater_cost"] +
property_instance.energy_cost_estimates["unadjusted"]["lighting"] +
property_instance.energy_cost_estimates["unadjusted"]["appliances"] +
gas_standing_charge +
electricity_standing_charge
)
return current_energy_bill

View file

@ -5,9 +5,11 @@ from typing import List
from datatypes.enums import QuantityUnits
from recommendations.recommendation_utils import (
get_roof_u_value, r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns,
update_lowest_selected_u_value, get_recommended_part, convert_thickness_to_numeric, override_costs
update_lowest_selected_u_value, get_recommended_part, convert_thickness_to_numeric, override_costs,
check_simulation_difference
)
from recommendations.Costs import Costs
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
class RoofRecommendations:
@ -274,6 +276,40 @@ class RoofRecommendations:
if already_installed:
cost_result = override_costs(cost_result)
new_thickness = insulation_thickness + material["depth"]
# This is based on the values we have in the training data
valid_numeric_values = [
12,
25,
50,
75,
100,
150,
200,
250,
270,
300,
350,
400,
]
proposed_depth = new_thickness
if (new_thickness not in valid_numeric_values) and material["type"] == "loft_insulation":
# Take the nearest value for scoring
proposed_depth = min(
valid_numeric_values, key=lambda x: abs(x - proposed_depth)
)
if proposed_depth >= 270:
new_efficiency = "Very Good"
else:
if self.property.data["walls-energy-eff"] not in ["Good", "Very Good"]:
new_efficiency = "Good"
else:
new_efficiency = "Very Good"
new_description = f"Pitched, {int(proposed_depth)}mm loft insulation"
elif material["type"] == "flat_roof_insulation":
cost_result = self.costs.flat_roof_insulation(
floor_area=self.property.insulation_floor_area,
@ -283,38 +319,21 @@ class RoofRecommendations:
already_installed = "flat_roof_insulation" in self.property.already_installed
if already_installed:
cost_result = override_costs(cost_result)
new_thickness = None
new_description = "Flat, insulated"
new_efficiency = "Good"
else:
raise ValueError("Invalid material type")
# This is based on the values we have in the training data
valid_numeric_values = [
12,
25,
50,
75,
100,
150,
200,
250,
270,
300,
350,
400,
]
roof_ending_config = RoofAttributes(new_description).process()
roof_simulation_config = check_simulation_difference(
new_config=roof_ending_config, old_config=self.property.roof, prefix="roof_"
)
proposed_depth = new_thickness
if new_thickness not in valid_numeric_values:
# Take the nearest value for scoring
proposed_depth = min(
valid_numeric_values, key=lambda x: abs(x - proposed_depth)
)
if proposed_depth >= 270:
new_efficiency = "Very Good"
else:
if self.property.data["walls-energy-eff"] not in ["Good", "Very Good"]:
new_efficiency = "Good"
simulation_config = {
**roof_simulation_config,
"roof_thermal_transmittance_ending": new_u_value,
"roof_energy_eff_ending": new_efficiency
}
recommendations.append(
{
@ -333,9 +352,9 @@ class RoofRecommendations:
"new_u_value": new_u_value,
"sap_points": None,
"already_installed": already_installed,
"new_thickness": new_thickness,
"simulation_config": simulation_config,
"description_simulation": {
"roof-description": f"Pitched, {int(proposed_depth)}mm loft insulation",
"roof-description": new_description,
"roof-energy-eff": new_efficiency
},
**cost_result
@ -386,18 +405,23 @@ class RoofRecommendations:
:return:
"""
roof_roof_insulation_materials = [m for m in self.materials if m["type"] == "room_roof_insulation"]
if not roof_roof_insulation_materials:
raise ValueError("No room in roof insulation materials found")
# TODO: We temporarilty use costs from SCIS for RIR insulation. The costing was £180/m2 floor
roof_roof_insulation_materials = [
{
"type": "room_roof_insulation",
"description": "Insulating the ceiling of the roof roof and re-decorate",
"depths": [100],
"depth_unit": "mm",
"r_value_per_mm": 0.038,
"thermal_conductivity": 0.022,
"cost": [180],
}
]
if self.property.pitched_roof_area is None:
raise ValueError("pitched_roof_area not included as property attribute")
lowest_selected_u_value = None
# lowest_selected_u_value = None
recommendations = []
for material in roof_roof_insulation_materials:
for depth, cost_per_unit in zip(material["depths"], material["cost"]):
part_u_value = r_value_per_mm_to_u_value(depth, material["r_value_per_mm"])
_, new_u_value = calculate_u_value_uplift(u_value, part_u_value)
@ -409,36 +433,62 @@ class RoofRecommendations:
# If I have a lowest U value and my new u value is lower than the lowest value, it's
# further into the diminishing returns threshold and can shouldn't be
if is_diminishing_returns(
recommendations, new_u_value, lowest_selected_u_value, self.DIMINISHING_RETURNS_U_VALUE
):
continue
# if is_diminishing_returns(
# recommendations, new_u_value, lowest_selected_u_value, self.DIMINISHING_RETURNS_U_VALUE
# ):
# continue
# We allow a small tolerance for error so we don't discount the recommendation entirely
if new_u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
lowest_selected_u_value = update_lowest_selected_u_value(lowest_selected_u_value, new_u_value)
# if new_u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
# lowest_selected_u_value = update_lowest_selected_u_value(lowest_selected_u_value, new_u_value)
estimated_cost = cost_per_unit * self.property.pitched_roof_area
estimated_cost = cost_per_unit * self.property.insulation_floor_area
recommendations.append(
{
"phase": phase,
"parts": [
get_recommended_part(
part=material,
selected_depth=depth,
quantity=self.property.pitched_roof_area,
quantity_unit=QuantityUnits.m2.value,
selected_total_cost=estimated_cost
)
],
"type": "room_roof_insulation",
"description": self.make_room_roof_insulation_description(material, depth),
"starting_u_value": u_value,
"new_u_value": new_u_value,
"sap_points": None,
"cost": estimated_cost,
}
)
# Could also be Roof room(s), ceiling insulated
new_descriptin = "Pitched, insulated at rafters"
roof_ending_config = RoofAttributes(new_descriptin).process()
roof_simulation_config = check_simulation_difference(
new_config=roof_ending_config, old_config=self.property.roof, prefix="roof_"
)
if self.property.data["roof-energy-eff"] in ["Very Poor", "Poor"]:
new_efficiency = "Average"
else:
new_efficiency = self.property.data["roof-energy-eff"]
simulation_config = {
**roof_simulation_config,
"roof_thermal_transmittance_ending": new_u_value,
"roof_energy_eff_ending": new_efficiency
}
already_installed = "flat_roof_insulation" in self.property.already_installed
cost_result = {
"total": estimated_cost,
"labour_hours": 80,
"labour_days": 5,
}
if already_installed:
cost_result = override_costs(cost_result)
recommendations.append(
{
"phase": phase,
"parts": [
# TODO
],
"type": "room_roof_insulation",
"description": "Insulate room in roof at rafters and re-decorate",
"starting_u_value": u_value,
"new_u_value": None,
"sap_points": None,
"simulation_config": simulation_config,
"description_simulation": {
"roof-description": new_descriptin,
"roof-energy-eff": new_efficiency
},
**cost_result,
"already_installed": already_installed
}
)
self.recommendations = recommendations

View file

@ -60,6 +60,9 @@ class SecondaryHeating:
**costs,
"simulation_config": {
"secondheat_description_ending": "None"
},
"description_simulation": {
"secondheat-description": "None"
}
}
)

View file

@ -1,6 +1,8 @@
import numpy as np
import pandas as pd
from recommendations.Costs import Costs
from recommendations.recommendation_utils import override_costs
from recommendations.recommendation_utils import override_costs, esimtate_pitched_roof_area
class SolarPvRecommendations:
@ -97,7 +99,11 @@ class SolarPvRecommendations:
best_configurations = panel_performance.head(1).reset_index(drop=True)
for rank, recommendation_config in best_configurations.iterrows():
roof_coverage_percent = round(recommendation_config["panneled_roof_area"] / total_roof_area * 100)
# If we dont have the panneled_roof_area in the recommendation_config we calculate it
if recommendation_config.get("panneled_roof_area", None):
roof_coverage_percent = round(recommendation_config["panneled_roof_area"] / total_roof_area * 100)
else:
raise Exception("IMPLEMENT ME")
# Spread the cost to the individual units - adding a 20% contingency
total_cost = recommendation_config["total_cost"] / n_units
kw = np.floor(recommendation_config["array_wattage"] / 100) / 10
@ -150,17 +156,47 @@ class SolarPvRecommendations:
self.recommend_building_analysis(phase)
return
panel_performance = self.property.solar_panel_configuration["panel_performance"]
roof_area = self.property.roof_area
non_invasive_recommendation = next(
(r for r in self.property.non_invasive_recommendations if r["type"] == "solar_pv"), {"suitable": True}
)
solar_configurations = panel_performance.head(3).reset_index(drop=True)
# We allow for the non-invasive recommendation to be that solar PV is not suitable
if not non_invasive_recommendation["suitable"]:
return
if non_invasive_recommendation.get("array_wattage") is not None:
if self.property.roof["is_flat"]:
roof_area = self.property.insulation_floor_area
else:
roof_area = esimtate_pitched_roof_area(
floor_area=self.property.insulation_floor_area, floor_height=self.property.data["floor-height"]
)
solar_configurations = pd.DataFrame(
[
{
"array_wattage": non_invasive_recommendation["array_wattage"],
"initial_ac_kwh_per_year": non_invasive_recommendation["initial_ac_kwh_per_year"],
"panneled_roof_area": non_invasive_recommendation["panneled_roof_area"]
}
]
)
else:
# TODO: There may be some instances where we don't want to use the solar API so we should cover for them
panel_performance = self.property.solar_panel_configuration["panel_performance"]
roof_area = self.property.roof_area
solar_configurations = panel_performance.head(3).reset_index(drop=True)
# We combine each of these configurations with estimates with and without a battery
for rank, recommendation_config in solar_configurations.iterrows():
roof_coverage_percent = round(recommendation_config["panneled_roof_area"] / roof_area * 100)
# We round up to the nearest 10
roof_coverage_percent = np.ceil(roof_coverage_percent / 10) * 10
for has_battery in [False, True]:
cost_result = self.costs.solar_pv(
wattage=recommendation_config["array_wattage"], has_battery=has_battery
wattage=recommendation_config["array_wattage"],
has_battery=has_battery,
array_cost=non_invasive_recommendation.get("cost", None)
)
kw = np.floor(recommendation_config["array_wattage"] / 100) / 10
if has_battery:

View file

@ -61,10 +61,13 @@ class WallRecommendations(Definitions):
"Cavity wall, as built, insulated": "Cavity wall, filled cavity and external insulation",
"Solid brick, as built, no insulation": "Solid brick, with external insulation",
"Solid brick, as built, insulated": "Solid brick, with external insulation",
"Solid brick, as built, partial insulation": "Solid brick, with external insulation",
"Cob, as built": "Cob, with external insulation",
"System built, as built, no insulation": "System built, with external insulation",
"Granite or whinstone, as built, no insulation": 'Granite or whinstone, with external insulation',
"Timber frame, as built, no insulation": "Timber frame, with external insulation",
'Timber frame, as built, partial insulation': 'Timber frame, with external insulation',
"Sandstone or limestone, as built, no insulation": "Sandstone or limestone, with external insulation",
}
# These are the ending descriptions we consider for walls with internal insulation
@ -72,10 +75,13 @@ class WallRecommendations(Definitions):
"Cavity wall, as built, insulated": "Cavity wall, filled cavity and internal insulation",
"Solid brick, as built, no insulation": "Solid brick, with internal insulation",
"Solid brick, as built, insulated": "Solid brick, with internal insulation",
"Solid brick, as built, partial insulation": "Solid brick, with internal insulation",
"Cob, as built": "Cob, with internal insulation",
"System built, as built, no insulation": "System built, with internal insulation",
"Granite or whinstone, as built, no insulation": 'Granite or whinstone, with internal insulation',
"Timber frame, as built, no insulation": "Timber frame, with internal insulation",
'Timber frame, as built, partial insulation': 'Timber frame, with internal insulation',
"Sandstone or limestone, as built, no insulation": "Sandstone or limestone, with internal insulation",
}
def __init__(
@ -184,7 +190,7 @@ class WallRecommendations(Definitions):
return ewi_recommendations
def recommend(self, phase=0):
def recommend(self, phase=0, exclusions=None):
# if building built after 1990 + we're able to identify U-value +
# U-value less than 0.18 and if in or close to a conversation area,
# recommend internal wall insulation as a possible measure
@ -236,8 +242,8 @@ class WallRecommendations(Definitions):
# + it already has a U-value better than the building regulations, so we don't need to recommend anything
if (
(not is_cavity_wall)
and (self.property.year_built >= self.YEAR_WALLS_BUILT_WITH_INSULATION)
and (u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE)
and ((self.property.year_built >= self.YEAR_WALLS_BUILT_WITH_INSULATION)
or (u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE))
):
# Recommend nothing
return
@ -262,7 +268,7 @@ class WallRecommendations(Definitions):
# Remaining wall types are treated with IWI or EWI
if (u_value >= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE) and self.is_suitable_for_solid_insulation():
self.find_insulation(u_value, phase)
self.find_insulation(u_value, phase, exclusions=exclusions)
return
# If the u-value is within regulations, we don't do anything
@ -552,7 +558,7 @@ class WallRecommendations(Definitions):
return recommendations
def find_insulation(self, u_value, phase):
def find_insulation(self, u_value, phase, exclusions=None):
"""
This function contains the logic for finding potential insulation measures for a property, depending
on the parts available and whether the property can have external wall insulation installed
@ -564,8 +570,10 @@ class WallRecommendations(Definitions):
# we separate the logic for for recommending them, therefore we don't
# consider diminishing returns between the two as they are considered to be separate measures
exclusions = [] if exclusions is None else exclusions
ewi_recommendations = []
if self.ewi_valid():
if self.ewi_valid() and "external_wall_insulation" not in exclusions:
ewi_recommendations = self._find_insulation(
u_value=u_value,
insulation_materials=pd.DataFrame(
@ -575,12 +583,14 @@ class WallRecommendations(Definitions):
phase=phase,
)
iwi_recommendations = self._find_insulation(
u_value=u_value,
insulation_materials=pd.DataFrame(self.internal_wall_insulation_materials),
non_insulation_materials=self.internal_wall_non_insulation_materials,
phase=phase,
)
iwi_recommendations = []
if "internal_wall_insulation" not in exclusions:
iwi_recommendations = self._find_insulation(
u_value=u_value,
insulation_materials=pd.DataFrame(self.internal_wall_insulation_materials),
non_insulation_materials=self.internal_wall_non_insulation_materials,
phase=phase,
)
self.recommendations += ewi_recommendations + iwi_recommendations

View file

@ -161,6 +161,9 @@ county_to_region_map = {
# Additional mappings requried, based on what we find in the EPC database
'Greater London Authority': 'Inner London',
'Herefordshire, County of': 'West Midlands',
"North Northamptonshire": 'East Midlands',
"West Northamptonshire": 'East Midlands',
# We have a bunch of inner London local authority mappings, which can be used if the county is not found
'Barking and Dagenham': 'Inner London', 'Barnet': 'Inner London', 'Bexley': 'Inner London',
'Brent': 'Inner London', 'Bromley': 'Inner London', 'Camden': 'Inner London', 'City of London': 'Inner London',

View file

@ -514,8 +514,8 @@ FLOOR_LEVEL_MAP = {
"top floor": 5,
"20+": 20,
"21st or above": 21,
**{str(i).zfill(2): i for i in range(0, 21)},
**{ordinal(i): i for i in range(-1, 21)},
**{str(i): i for i in range(-1, 21)},
**{i: i for i in range(-1, 21)},
**{str(i).zfill(2): i for i in range(0, 51)},
**{ordinal(i): i for i in range(-1, 51)},
**{str(i): i for i in range(-1, 51)},
**{i: i for i in range(-1, 51)},
}

View file

@ -0,0 +1,391 @@
testing_examples = [
{
"epc": {
'lmk-key': '948324269042014090409224502942098', 'address1': '15, Ringwood Crescent', 'address2': None,
'address3': None, 'postcode': 'TS19 9DN', 'building-reference-number': 1016769078,
'current-energy-rating': 'C', 'potential-energy-rating': 'B', 'current-energy-efficiency': 79,
'potential-energy-efficiency': 85, 'property-type': 'House', 'built-form': 'Semi-Detached',
'inspection-date': '2014-08-21', 'local-authority': 'E06000004', 'constituency': 'E14000970',
'county': None,
'lodgement-date': '2014-09-04', 'transaction-type': 'none of the above', 'environment-impact-current': 77,
'environment-impact-potential': 85, 'energy-consumption-current': 152,
'energy-consumption-potential': 103.0, 'co2-emissions-current': 2.2, 'co2-emiss-curr-per-floor-area': 30,
'co2-emissions-potential': 1.5, 'lighting-cost-current': 61.0, 'lighting-cost-potential': 47.0,
'heating-cost-current': 625.0, 'heating-cost-potential': 522.0, 'hot-water-cost-current': 100.0,
'hot-water-cost-potential': 71.0, 'total-floor-area': 74.0, 'energy-tariff': 'Single',
'mains-gas-flag': 'Y', 'floor-level': 'NODATA!', 'flat-top-storey': None, 'flat-storey-count': None,
'main-heating-controls': 2106.0, 'multi-glaze-proportion': 100.0,
'glazed-type': 'double glazing installed before 2002', 'glazed-area': 'Normal', 'extension-count': 0.0,
'number-habitable-rooms': 3.0, 'number-heated-rooms': 3.0, 'low-energy-lighting': 70.0,
'number-open-fireplaces': 0.0, 'hotwater-description': 'From main system', 'hot-water-energy-eff': 'Good',
'hot-water-env-eff': 'Good', 'floor-description': 'Solid, no insulation (assumed)',
'floor-energy-eff': None,
'floor-env-eff': None, 'windows-description': 'Fully double glazed', 'windows-energy-eff': 'Average',
'windows-env-eff': 'Average', 'walls-description': 'Cavity wall, filled cavity', 'walls-energy-eff': 'Good',
'walls-env-eff': 'Good', 'secondheat-description': 'Room heaters, mains gas', 'sheating-energy-eff': None,
'sheating-env-eff': None, 'roof-description': 'Pitched, 50 mm loft insulation', 'roof-energy-eff': 'Poor',
'roof-env-eff': 'Poor', 'mainheat-description': 'Boiler and radiators, mains gas',
'mainheat-energy-eff': 'Good', 'mainheat-env-eff': 'Good',
'mainheatcont-description': 'Programmer, room thermostat and TRVs', 'mainheatc-energy-eff': 'Good',
'mainheatc-env-eff': 'Good', 'lighting-description': 'Low energy lighting in 70% of fixed outlets',
'lighting-energy-eff': 'Very Good', 'lighting-env-eff': 'Very Good',
'main-fuel': 'mains gas (not community)', 'wind-turbine-count': 0.0, 'heat-loss-corridor': 'NO DATA!',
'unheated-corridor-length': None, 'floor-height': 2.5, 'photo-supply': 50.0,
'solar-water-heating-flag': None,
'mechanical-ventilation': 'natural', 'address': '15, Ringwood Crescent',
'local-authority-label': 'Stockton-on-Tees', 'constituency-label': 'Stockton North',
'posttown': 'STOCKTON-ON-TEES', 'construction-age-band': 'England and Wales: 1950-1966',
'lodgement-datetime': '2014-09-04 09:22:45', 'tenure': 'owner-occupied',
'fixed-lighting-outlets-count': 10.0, 'low-energy-fixed-light-count': 7.0, 'uprn': 100110195416.0,
'uprn-source': 'Address Matched'
},
"heating_recommendation_descriptions": [
"Install an air source heat pump, and upgrade heating controls to Smart Thermostats, room sensors and "
"smart radiator valves (time & temperature zone control). The cost includes the £7500 boiler upgrade "
"scheme grant",
],
"heating_controls_recommendation_descriptions": [
"Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & "
"temperature zone control)"
],
"notes": "This property has a boiler, radiators & mains gas with good efficiency so the only recommendation"
"we expect here is for an air source heat pump. The heating controls are a programmer, room thermostat"
"and TRVs and so we should expect a TTZC recommendation"
},
{
"epc": {
'lmk-key': '153995620832008100717310934068296', 'address1': 'Apartment 13 The Quays',
'address2': 'Burscough', 'address3': None, 'postcode': 'L40 5TW',
'building-reference-number': 2604281568, 'current-energy-rating': 'C', 'potential-energy-rating': 'B',
'current-energy-efficiency': 69, 'potential-energy-efficiency': 84, 'property-type': 'Flat',
'built-form': 'Detached', 'inspection-date': '2008-10-06', 'local-authority': 'E07000127',
'constituency': 'E14001033', 'county': 'Lancashire', 'lodgement-date': '2008-10-07',
'transaction-type': 'marketed sale', 'environment-impact-current': 78,
'environment-impact-potential': 78, 'energy-consumption-current': 195,
'energy-consumption-potential': 192.0, 'co2-emissions-current': 1.7,
'co2-emiss-curr-per-floor-area': 29, 'co2-emissions-potential': 1.7, 'lighting-cost-current': 35,
'lighting-cost-potential': 38, 'heating-cost-current': 108, 'heating-cost-potential': 89,
'hot-water-cost-current': 256, 'hot-water-cost-potential': 104, 'total-floor-area': 57.2,
'energy-tariff': 'Single', 'mains-gas-flag': 'N', 'floor-level': '1st', 'flat-top-storey': 'Y',
'flat-storey-count': 2.0, 'main-heating-controls': 2603.0, 'multi-glaze-proportion': 100.0,
'glazed-type': 'double glazing installed during or after 2002', 'glazed-area': 'Normal',
'extension-count': 0.0, 'number-habitable-rooms': 3.0, 'number-heated-rooms': 3.0,
'low-energy-lighting': 77.0, 'number-open-fireplaces': 0.0,
'hotwater-description': 'Electric immersion, standard tariff', 'hot-water-energy-eff': 'Very Poor',
'hot-water-env-eff': 'Poor', 'floor-description': '(other premises below)', 'floor-energy-eff': None,
'floor-env-eff': None, 'windows-description': 'Fully double glazed', 'windows-energy-eff': 'Good',
'windows-env-eff': 'Good', 'walls-description': 'Cavity wall, as built, insulated (assumed)',
'walls-energy-eff': 'Good', 'walls-env-eff': 'Good',
'secondheat-description': 'Portable electric heaters', 'sheating-energy-eff': None,
'sheating-env-eff': None, 'roof-description': '(another dwelling above)', 'roof-energy-eff': None,
'roof-env-eff': None, 'mainheat-description': 'Room heaters, electric',
'mainheat-energy-eff': 'Very Poor', 'mainheat-env-eff': 'Poor',
'mainheatcont-description': 'Programmer and appliance thermostats', 'mainheatc-energy-eff': 'Good',
'mainheatc-env-eff': 'Good', 'lighting-description': 'Low energy lighting in 77% of fixed outlets',
'lighting-energy-eff': 'Very Good', 'lighting-env-eff': 'Very Good',
'main-fuel': 'electricity - this is for backwards compatibility only and should not be used',
'wind-turbine-count': 0.0, 'heat-loss-corridor': 'heated corridor', 'unheated-corridor-length': None,
'floor-height': 2.3, 'photo-supply': 0.0, 'solar-water-heating-flag': 'N',
'mechanical-ventilation': 'natural', 'address': 'Apartment 13 The Quays, Burscough',
'local-authority-label': 'West Lancashire', 'constituency-label': 'West Lancashire',
'posttown': 'ORMSKIRK', 'construction-age-band': 'England and Wales: 2003-2006',
'lodgement-datetime': '2008-10-07 17:31:09', 'tenure': 'owner-occupied',
'fixed-lighting-outlets-count': None, 'low-energy-fixed-light-count': None, 'uprn': 10012342725.0,
'uprn-source': 'Address Matched',
},
"heating_recommendation_descriptions": [
"Install high heat retention electric storage heaters and upgrade heating controls to High Heat Retention "
"Storage Heater Controls"
],
"heating_controls_recommendation_descriptions": [],
"notes": "This property has electric room heaters and is off gas so a boiler recommendation is not appropriate."
"We would expect a high heat retention storage recommendation. The property is a flat and therefore"
"we don't expect an air source heat pump recommendation. We also wouldn't expect a specific heating"
"control recommendation here"
},
{
"epc": {
'lmk-key': '751851300152012022010205497220090', 'address1': '21, Fullers Close', 'address2': 'Kelvedon',
'address3': None, 'postcode': 'CO5 9JX', 'building-reference-number': 8075968, 'current-energy-rating': 'D',
'potential-energy-rating': 'D', 'current-energy-efficiency': 55, 'potential-energy-efficiency': 56,
'property-type': 'Bungalow', 'built-form': 'Detached', 'inspection-date': '2012-02-20',
'local-authority': 'E07000067', 'constituency': 'E14001045', 'county': 'Essex',
'lodgement-date': '2012-02-20',
'transaction-type': 'non marketed sale', 'environment-impact-current': 39,
'environment-impact-potential': 39,
'energy-consumption-current': 475, 'energy-consumption-potential': 472.0, 'co2-emissions-current': 5.4,
'co2-emiss-curr-per-floor-area': 84, 'co2-emissions-potential': 5.4, 'lighting-cost-current': 53.0,
'lighting-cost-potential': 40.0, 'heating-cost-current': 674.0, 'heating-cost-potential': 678.0,
'hot-water-cost-current': 110.0, 'hot-water-cost-potential': 110.0, 'total-floor-area': 64.45,
'energy-tariff': 'dual', 'mains-gas-flag': 'N', 'floor-level': 'NODATA!', 'flat-top-storey': None,
'flat-storey-count': None, 'main-heating-controls': '2402', 'multi-glaze-proportion': 100.0,
'glazed-type': 'double glazing installed before 2002', 'glazed-area': 'Normal', 'extension-count': 0.0,
'number-habitable-rooms': 3.0, 'number-heated-rooms': 3.0, 'low-energy-lighting': 67.0,
'number-open-fireplaces': 0.0, 'hotwater-description': 'Electric immersion, off-peak',
'hot-water-energy-eff': 'Average', 'hot-water-env-eff': 'Very Poor',
'floor-description': 'Suspended, no insulation (assumed)', 'floor-energy-eff': None, 'floor-env-eff': None,
'windows-description': 'Fully double glazed', 'windows-energy-eff': 'Average', 'windows-env-eff': 'Average',
'walls-description': 'Cavity wall, as built, insulated (assumed)', 'walls-energy-eff': 'Good',
'walls-env-eff': 'Good', 'secondheat-description': 'Room heaters, electric', 'sheating-energy-eff': None,
'sheating-env-eff': None, 'roof-description': 'Pitched, 300+ mm loft insulation',
'roof-energy-eff': 'Very Good',
'roof-env-eff': 'Very Good', 'mainheat-description': 'Electric storage heaters',
'mainheat-energy-eff': 'Poor',
'mainheat-env-eff': 'Very Poor', 'mainheatcont-description': 'Automatic charge control',
'mainheatc-energy-eff': 'Average', 'mainheatc-env-eff': 'Average',
'lighting-description': 'Low energy lighting in 67% of fixed outlets', 'lighting-energy-eff': 'Good',
'lighting-env-eff': 'Good', 'main-fuel': 'electricity (not community)', 'wind-turbine-count': 0.0,
'heat-loss-corridor': 'NO DATA!', 'unheated-corridor-length': None, 'floor-height': 2.38,
'photo-supply': 0.0,
'solar-water-heating-flag': None, 'mechanical-ventilation': 'natural',
'address': '21, Fullers Close, Kelvedon',
'local-authority-label': 'Braintree', 'constituency-label': 'Witham', 'posttown': 'COLCHESTER',
'construction-age-band': 'England and Wales: 1983-1990', 'lodgement-datetime': '2012-02-20 10:20:54',
'tenure': 'owner-occupied', 'fixed-lighting-outlets-count': 6.0, 'low-energy-fixed-light-count': 4.0,
'uprn': 100090311351.0, 'uprn-source': 'Address Matched', 'property-type_y': None, 'built-form_y': None,
},
"heating_recommendation_descriptions": [],
"heating_controls_recommendation_descriptions": [],
"notes": "This test has electric storage heaters with automatic charge control - this case should be researched"
"and checked that a high heat retention storage recommendation is actually sensible. If it's not, "
"we should adjust accordingly or perhaps have just a control recommendation"
},
{
"epc": {
'lmk-key': '1356416458532015082116515621278108', 'address1': '19a, St. Stephens Road', 'address2': None,
'address3': None, 'postcode': 'TW3 2BH', 'building-reference-number': 5821158378,
'current-energy-rating': 'E', 'potential-energy-rating': 'C', 'current-energy-efficiency': 54,
'potential-energy-efficiency': 76, 'property-type': 'Maisonette', 'built-form': 'Semi-Detached',
'inspection-date': '2015-08-21', 'local-authority': 'E09000018', 'constituency': 'E14000593',
'county': 'Greater London Authority', 'lodgement-date': '2015-08-21', 'transaction-type': 'marketed sale',
'environment-impact-current': 48, 'environment-impact-potential': 78, 'energy-consumption-current': 383,
'energy-consumption-potential': 155, 'co2-emissions-current': 3.4, 'co2-emiss-curr-per-floor-area': 68,
'co2-emissions-potential': 1.4, 'lighting-cost-current': 52, 'lighting-cost-potential': 34,
'heating-cost-current': 560, 'heating-cost-potential': 255, 'hot-water-cost-current': 166,
'hot-water-cost-potential': 102, 'total-floor-area': 51.0, 'energy-tariff': 'Single', 'mains-gas-flag': 'Y',
'floor-level': '1st', 'flat-top-storey': 'Y', 'flat-storey-count': None, 'main-heating-controls': '2104',
'multi-glaze-proportion': 100.0, 'glazed-type': 'double glazing, unknown install date',
'glazed-area': 'Normal', 'extension-count': 0.0, 'number-habitable-rooms': 3.0, 'number-heated-rooms': 3.0,
'low-energy-lighting': 50.0, 'number-open-fireplaces': 0.0, 'hotwater-description': 'From main system',
'hot-water-energy-eff': 'Average', 'hot-water-env-eff': 'Average',
'floor-description': '(another dwelling below)', 'floor-energy-eff': 'NO DATA!', 'floor-env-eff': None,
'windows-description': 'Fully double glazed', 'windows-energy-eff': 'Average', 'windows-env-eff': 'Average',
'walls-description': 'Solid brick, as built, no insulation (assumed)', 'walls-energy-eff': 'Very Poor',
'walls-env-eff': 'Very Poor', 'secondheat-description': 'Room heaters, mains gas',
'sheating-energy-eff': None, 'sheating-env-eff': None,
'roof-description': 'Pitched, 100 mm loft insulation',
'roof-energy-eff': 'Average', 'roof-env-eff': 'Average',
'mainheat-description': 'Boiler and radiators, mains gas', 'mainheat-energy-eff': 'Good',
'mainheat-env-eff': 'Good', 'mainheatcont-description': 'Programmer and room thermostat',
'mainheatc-energy-eff': 'Average', 'mainheatc-env-eff': 'Average',
'lighting-description': 'Low energy lighting in 50% of fixed outlets', 'lighting-energy-eff': 'Good',
'lighting-env-eff': 'Good', 'main-fuel': 'mains gas (not community)', 'wind-turbine-count': 0.0,
'heat-loss-corridor': 'no corridor', 'unheated-corridor-length': None, 'floor-height': 2.5,
'photo-supply': None, 'solar-water-heating-flag': 'N', 'mechanical-ventilation': 'natural',
'address': '19a, St. Stephens Road', 'local-authority-label': 'Hounslow',
'constituency-label': 'Brentford and Isleworth', 'posttown': 'HOUNSLOW',
'construction-age-band': 'England and Wales: 1930-1949', 'lodgement-datetime': '2015-08-21 16:51:56',
'tenure': 'owner-occupied', 'fixed-lighting-outlets-count': None, 'low-energy-fixed-light-count': None,
'uprn': 100021560521.0, 'uprn-source': 'Address Matched',
},
"heating_recommendation_descriptions": [],
"heating_controls_recommendation_descriptions": [],
"notes": ""
},
{
"epc": {
'lmk-key': '1164410099442014062611405027442168', 'address1': '31, Brightside Road', 'address2': None,
'address3': None, 'postcode': 'SE13 6EP', 'building-reference-number': 5481394278,
'current-energy-rating': 'E', 'potential-energy-rating': 'C', 'current-energy-efficiency': 48,
'potential-energy-efficiency': 79, 'property-type': 'House', 'built-form': 'Mid-Terrace',
'inspection-date': '2014-06-26', 'local-authority': 'E09000023', 'constituency': 'E14000789',
'county': 'Greater London Authority', 'lodgement-date': '2014-06-26',
'transaction-type': 'assessment for green deal', 'environment-impact-current': 44,
'environment-impact-potential': 77, 'energy-consumption-current': 334,
'energy-consumption-potential': 121.0, 'co2-emissions-current': 5.1, 'co2-emiss-curr-per-floor-area': 64,
'co2-emissions-potential': 1.9, 'lighting-cost-current': 70.0, 'lighting-cost-potential': 49.0,
'heating-cost-current': 964.0, 'heating-cost-potential': 571.0, 'hot-water-cost-current': 107.0,
'hot-water-cost-potential': 72.0, 'total-floor-area': 80.0, 'energy-tariff': 'Single',
'mains-gas-flag': 'Y', 'floor-level': 'NODATA!', 'flat-top-storey': None, 'flat-storey-count': None,
'main-heating-controls': '2102', 'multi-glaze-proportion': 100.0,
'glazed-type': 'double glazing installed before 2002', 'glazed-area': 'Normal', 'extension-count': 1.0,
'number-habitable-rooms': 3.0, 'number-heated-rooms': 3.0, 'low-energy-lighting': 56.0,
'number-open-fireplaces': 0.0, 'hotwater-description': 'From main system', 'hot-water-energy-eff': 'Good',
'hot-water-env-eff': 'Good', 'floor-description': 'Suspended, no insulation (assumed)',
'floor-energy-eff': None, 'floor-env-eff': None, 'windows-description': 'Fully double glazed',
'windows-energy-eff': 'Average', 'windows-env-eff': 'Average',
'walls-description': 'Solid brick, as built, no insulation (assumed)', 'walls-energy-eff': 'Very Poor',
'walls-env-eff': 'Very Poor', 'secondheat-description': 'Room heaters, mains gas',
'sheating-energy-eff': None, 'sheating-env-eff': None,
'roof-description': 'Pitched, no insulation (assumed)',
'roof-energy-eff': 'Very Poor', 'roof-env-eff': 'Very Poor',
'mainheat-description': 'Boiler and radiators, mains gas', 'mainheat-energy-eff': 'Good',
'mainheat-env-eff': 'Good', 'mainheatcont-description': 'Programmer, no room thermostat',
'mainheatc-energy-eff': 'Very Poor', 'mainheatc-env-eff': 'Very Poor',
'lighting-description': 'Low energy lighting in 56% of fixed outlets', 'lighting-energy-eff': 'Good',
'lighting-env-eff': 'Good', 'main-fuel': 'mains gas (not community)', 'wind-turbine-count': 0.0,
'heat-loss-corridor': 'NO DATA!', 'unheated-corridor-length': None, 'floor-height': 2.5,
'photo-supply': 0.0,
'solar-water-heating-flag': None, 'mechanical-ventilation': 'natural', 'address': '31, Brightside Road',
'local-authority-label': 'Lewisham', 'constituency-label': 'Lewisham, Deptford', 'posttown': 'LONDON',
'construction-age-band': 'England and Wales: before 1900', 'lodgement-datetime': '2014-06-26 11:40:50',
'tenure': 'owner-occupied', 'fixed-lighting-outlets-count': 9.0, 'low-energy-fixed-light-count': 5.0,
'uprn': 100021936225.0, 'uprn-source': 'Address Matched',
},
"heating_recommendation_descriptions": [
'Install an air source heat pump, and upgrade heating controls to Smart Thermostats, room sensors and '
'smart radiator valves (time & temperature zone control). The cost includes the £7500 boiler upgrade '
'scheme grant',
],
"heating_controls_recommendation_descriptions": [
'upgrade heating controls to Room thermostat, programmer and TRVs',
'Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & '
'temperature zone control)'
],
"notes": "Because this property already has a boiler, we don't recommend HHR. We only have a "
"heating recommendation for an air source heat pump. Because the heating controls are "
"Programmer, no room thermostat, we have a programmer, room thermostat and trvs recommendation"
"for heating controls and for TTZC."
},
{
"epc": {
'lmk-key': '1139584119102014052116014126342698', 'address1': '13, Starbuck Street', 'address2': 'Rudry',
'address3': None, 'postcode': 'CF83 3DP', 'building-reference-number': 2187913278,
'current-energy-rating': 'E', 'potential-energy-rating': 'D', 'current-energy-efficiency': 44,
'potential-energy-efficiency': 61, 'property-type': 'Flat', 'built-form': 'Semi-Detached',
'inspection-date': '2014-05-21', 'local-authority': 'W06000018', 'constituency': 'W07000076',
'county': None,
'lodgement-date': '2014-05-21', 'transaction-type': 'rental (private)', 'environment-impact-current': 49,
'environment-impact-potential': 64, 'energy-consumption-current': 343,
'energy-consumption-potential': 240.0, 'co2-emissions-current': 4.0, 'co2-emiss-curr-per-floor-area': 61,
'co2-emissions-potential': 2.8, 'lighting-cost-current': 49.0, 'lighting-cost-potential': 49.0,
'heating-cost-current': 752.0, 'heating-cost-potential': 429.0, 'hot-water-cost-current': 281.0,
'hot-water-cost-potential': 281.0, 'total-floor-area': 66.0, 'energy-tariff': 'Single',
'mains-gas-flag': 'N', 'floor-level': '1st', 'flat-top-storey': 'Y', 'flat-storey-count': None,
'main-heating-controls': 2602.0, 'multi-glaze-proportion': 100.0,
'glazed-type': 'double glazing installed during or after 2002', 'glazed-area': 'Normal',
'extension-count': 0.0, 'number-habitable-rooms': 4.0, 'number-heated-rooms': 4.0,
'low-energy-lighting': 86.0, 'number-open-fireplaces': 0.0,
'hotwater-description': 'Electric immersion, standard tariff', 'hot-water-energy-eff': 'Very Poor',
'hot-water-env-eff': 'Very Poor', 'floor-description': '(other premises below)', 'floor-energy-eff': None,
'floor-env-eff': None, 'windows-description': 'Fully double glazed', 'windows-energy-eff': 'Good',
'windows-env-eff': 'Good', 'walls-description': 'Cavity wall, as built, no insulation (assumed)',
'walls-energy-eff': 'Poor', 'walls-env-eff': 'Poor', 'secondheat-description': 'None',
'sheating-energy-eff': None, 'sheating-env-eff': None,
'roof-description': 'Pitched, 200 mm loft insulation',
'roof-energy-eff': 'Good', 'roof-env-eff': 'Good', 'mainheat-description': 'Room heaters, electric',
'mainheat-energy-eff': 'Very Poor', 'mainheat-env-eff': 'Very Poor',
'mainheatcont-description': 'Appliance thermostats', 'mainheatc-energy-eff': 'Good',
'mainheatc-env-eff': 'Good', 'lighting-description': 'Low energy lighting in 86% of fixed outlets',
'lighting-energy-eff': 'Very Good', 'lighting-env-eff': 'Very Good',
'main-fuel': 'electricity (not community)', 'wind-turbine-count': 0.0, 'heat-loss-corridor': 'no corridor',
'unheated-corridor-length': None, 'floor-height': 2.5, 'photo-supply': 0.0,
'solar-water-heating-flag': None,
'mechanical-ventilation': 'natural', 'address': '13, Starbuck Street, Rudry',
'local-authority-label': 'Caerphilly', 'constituency-label': 'Caerphilly', 'posttown': 'CAERPHILLY',
'construction-age-band': 'England and Wales: 1950-1966', 'lodgement-datetime': '2014-05-21 16:01:41',
'tenure': 'rental (private)', 'fixed-lighting-outlets-count': 7.0, 'low-energy-fixed-light-count': 6.0,
'uprn': 43088770.0, 'uprn-source': 'Address Matched',
},
"heating_recommendation_descriptions": [
'Install high heat retention electric storage heaters and upgrade heating controls to High Heat Retention '
'Storage Heater Controls'
],
"heating_controls_recommendation_descriptions": [],
"notes": "This property is a flat so we don't have an ASHP recommendation. It also doesn't have access to the "
"mains and so it can't have a gas boiler. We don't expect any controls recommendations"
},
{
"epc": {
'lmk-key': '492646189022010060208143796198410', 'address1': '67, Ridgeway Road', 'address2': None,
'address3': None, 'postcode': 'HP5 2EW', 'building-reference-number': 1976846768,
'current-energy-rating': 'D', 'potential-energy-rating': 'D', 'current-energy-efficiency': 64,
'potential-energy-efficiency': 68, 'property-type': 'Bungalow', 'built-form': 'Detached',
'inspection-date': '2010-06-01', 'local-authority': 'E07000005', 'constituency': 'E14000631',
'county': 'Buckinghamshire', 'lodgement-date': '2010-06-02', 'transaction-type': 'marketed sale',
'environment-impact-current': 67, 'environment-impact-potential': 70, 'energy-consumption-current': 249,
'energy-consumption-potential': 231.0, 'co2-emissions-current': 3.5, 'co2-emiss-curr-per-floor-area': 35,
'co2-emissions-potential': 3.2, 'lighting-cost-current': 89.0, 'lighting-cost-potential': 51.0,
'heating-cost-current': 627.0, 'heating-cost-potential': 603.0, 'hot-water-cost-current': 105.0,
'hot-water-cost-potential': 105.0, 'total-floor-area': 76.0, 'energy-tariff': 'Single',
'mains-gas-flag': 'Y', 'floor-level': 'NO DATA!', 'flat-top-storey': None, 'flat-storey-count': None,
'main-heating-controls': 2104.0, 'multi-glaze-proportion': 100.0,
'glazed-type': 'double glazing installed during or after 2002', 'glazed-area': 'Normal',
'extension-count': 0.0, 'number-habitable-rooms': 7.0, 'number-heated-rooms': 7.0,
'low-energy-lighting': 25.0, 'number-open-fireplaces': 1.0, 'hotwater-description': 'From main system',
'hot-water-energy-eff': 'Very Good', 'hot-water-env-eff': 'Very Good',
'floor-description': 'Suspended, no insulation (assumed)', 'floor-energy-eff': None, 'floor-env-eff': None,
'windows-description': 'Fully double glazed', 'windows-energy-eff': 'Good', 'windows-env-eff': 'Good',
'walls-description': 'Cavity wall, filled cavity', 'walls-energy-eff': 'Good', 'walls-env-eff': 'Good',
'secondheat-description': 'Room heaters, wood logs', 'sheating-energy-eff': None, 'sheating-env-eff': None,
'roof-description': 'Pitched, 150 mm loft insulation', 'roof-energy-eff': 'Good', 'roof-env-eff': 'Good',
'mainheat-description': 'Boiler and radiators, mains gas', 'mainheat-energy-eff': 'Very Good',
'mainheat-env-eff': 'Very Good', 'mainheatcont-description': 'Programmer and room thermostat',
'mainheatc-energy-eff': 'Average', 'mainheatc-env-eff': 'Average',
'lighting-description': 'Low energy lighting in 25% of fixed outlets', 'lighting-energy-eff': 'Average',
'lighting-env-eff': 'Average',
'main-fuel': 'mains gas - this is for backwards compatibility only and should not be used',
'wind-turbine-count': 0.0, 'heat-loss-corridor': 'NO DATA!', 'unheated-corridor-length': None,
'floor-height': 2.4, 'photo-supply': 0.0, 'solar-water-heating-flag': 'N',
'mechanical-ventilation': 'natural', 'address': '67, Ridgeway Road', 'local-authority-label': 'Chiltern',
'constituency-label': 'Chesham and Amersham', 'posttown': 'CHESHAM',
'construction-age-band': 'England and Wales: 1930-1949', 'lodgement-datetime': '2010-06-02 08:14:37',
'tenure': 'owner-occupied', 'fixed-lighting-outlets-count': None, 'low-energy-fixed-light-count': None,
'uprn': 100080513604.0, 'uprn-source': 'Address Matched'
},
"heating_recommendation_descriptions": [
'Install an air source heat pump, and upgrade heating controls to Smart Thermostats, room sensors and '
'smart radiator valves (time & temperature zone control). The cost includes the £7500 boiler upgrade '
'scheme grant'
],
"heating_controls_recommendation_descriptions": [
'upgrade heating controls to Room thermostat, programmer and TRVs',
'Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & '
'temperature zone control)'
],
"notes": "This has a very efficient boiler and is a detached bungalow, but only has "
"Programmer and room thermostat for heating controls so we'd expect an ASHP heating recommendation"
"as the only option, and heating controls recommendations for programmer, room thermostats and trvs"
"as well as ttzc"
}
]
import random
from pathlib import Path
import inspect
import pandas as pd
# this can be used to get example data to build the test cases
src_file_path = inspect.getfile(lambda: None)
EPC_DIRECTORY = Path(src_file_path).parent / "local_data" / "all-domestic-certificates"
epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
directory = random.sample(epc_directories, 1)[0]
data = pd.read_csv(directory / "certificates.csv", low_memory=False)
# Rename the columns to the same format as the api returns
data.columns = [c.replace("_", "-").lower() for c in data.columns]
data["floor-height"] = data["floor-height"].fillna(2.45)
used_examples = pd.DataFrame(
[
{
"mainheat-description": x["epc"]["mainheat-description"],
"mainheat-energy-eff": x["epc"]["mainheat-energy-eff"],
"property-type": x["epc"]["property-type"],
"built-form": x["epc"]["built-form"],
"used": True
} for x in testing_examples
]
)
data = data.merge(
used_examples, how="left", on=["mainheat-description", "mainheat-energy-eff", "built-form", "property-type"]
)
data = data[pd.isnull(data["used"])].drop(columns=["used"])
eg = data.sample(1).to_dict("records")[0]
print(eg["mainheat-description"])
print(eg["mainheat-energy-eff"])
print(eg["property-type"])
print(eg["built-form"])
print(eg["mainheatcont-description"])

View file

@ -0,0 +1,124 @@
from datetime import datetime
import pandas as pd
import msgpack
from utils.s3 import read_dataframe_from_s3_parquet, read_from_s3
import pytest
from backend.Property import Property
from etl.epc.Record import EPCRecord
from etl.bill_savings.KwhData import KwhData
from recommendations.HeatingRecommender import HeatingRecommender
from recommendations.tests.test_data.heating_recommendations_data import testing_examples
class TestHeatingRecommendations:
@pytest.fixture
def cleaning_data(self):
return read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
)
@pytest.fixture
def cleaned(self):
df = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-dev"
)
df = msgpack.unpackb(df, raw=False)
return df
@pytest.fixture
def kwh_client(self):
client = KwhData(bucket="retrofit-data-dev", read_consumption_data=False)
# We fix this pricing table for these tests
client.retail_price_comparison = pd.DataFrame(
[
{
"Date": datetime.today().strftime("%Y-%m-%d"),
'Average standard variable tariff (Large legacy suppliers)': 1
}
]
)
client.retail_price_comparison["Date"] = pd.to_datetime(client.retail_price_comparison["Date"])
return client
@pytest.mark.parametrize(
"test_case",
testing_examples
)
def test_recommend(self, test_case, cleaning_data, cleaned, kwh_client):
"""
With this function, we test out multiple heating descriptions and check which recomendations
we retrieve alongside them
:return:
"""
if test_case["epc"]["uprn"] == 100090311351:
raise Exception(
"This test has electric storage heaters with automatic charge control - this case should be researched"
"and checked that a high heat retention storage recommendation is actually sensible. If it's not, "
"we should adjust accordingly or perhaps have just a control recommendation"
)
if test_case["epc"]["uprn"] == 100021560521:
raise Exception("Finish this test - could do so while on the train")
epc_records = {"original_epc": test_case["epc"].copy(), "full_sap_epc": {}, "old_data": []}
epc_record = EPCRecord(
epc_records=epc_records,
run_mode="newdata",
cleaning_data=cleaning_data
)
p = Property(
id=0,
postcode=test_case["epc"]["postcode"],
address=test_case["epc"]["address"],
epc_record=epc_record,
energy_assessment={
"condition": {},
"energy_assessment_is_newer": False
}
)
# For these tests, this can be fixed
kwh_predictions = {
"heating_kwh_predictions": pd.DataFrame(
[
{"id": p.uprn, "predictions": 12000}
]
),
"hotwater_kwh_predictions": pd.DataFrame(
[
{"id": p.uprn, "predictions": 3000}
]
),
}
p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=kwh_predictions)
recommender = HeatingRecommender(property_instance=p)
# Check they're empty
assert not recommender.heating_recommendations
assert not recommender.heating_control_recommendations
recommender.recommend(has_cavity_or_loft_recommendations=False)
assert len(recommender.heating_recommendations) == len(test_case["heating_recommendation_descriptions"])
assert (
len(recommender.heating_control_recommendations) ==
len(test_case["heating_controls_recommendation_descriptions"])
)
# Check the exact descriptions
assert (
{x["description"] for x in recommender.heating_recommendations} ==
set(test_case["heating_recommendation_descriptions"])
)
assert (
{x["description"] for x in recommender.heating_control_recommendations} ==
set(test_case["heating_controls_recommendation_descriptions"])
)

View file

@ -229,6 +229,39 @@ def read_excel_from_s3(bucket_name, file_key, header_row, drop_all_na=True):
return df
def save_excel_to_s3(df, bucket_name, file_key):
"""
Save a pandas DataFrame as an Excel file on S3.
:param df: DataFrame to save.
:param bucket_name: S3 bucket name.
:param file_key: S3 file key. This includes the file name and path.
"""
# Ensure the DataFrame is not empty
if df.empty:
raise ValueError("The DataFrame is empty. Nothing to save to Excel.")
# Ensure the file_key ends with an appropriate Excel file extension
if not file_key.endswith((".xls", ".xlsx")):
raise ValueError("The specified file key does not appear to be an Excel file.")
# Create a BytesIO buffer
output = BytesIO()
# Save DataFrame to an Excel file buffer
df.to_excel(output, index=False)
output.seek(0) # Important: move back to the beginning of the buffer
# Initialize a session using boto3
session = boto3.session.Session()
s3 = session.resource('s3')
# Upload the Excel file from the buffer to S3
bucket = s3.Bucket(bucket_name)
bucket.put_object(Body=output, Key=file_key)
logger.info(f"Excel file saved to S3 bucket '{bucket_name}' with key '{file_key}'")
def read_csv_from_s3(bucket_name, filepath):
s3 = boto3.client('s3')