Merge pull request #327 from Hestia-Homes/main

Pausing solar api, working on survey extraction
This commit is contained in:
KhalimCK 2024-07-24 16:57:16 +01:00 committed by GitHub
commit 6e714127c6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 1072 additions and 142 deletions

View file

@ -350,8 +350,21 @@ class Property:
r for r in property_representative_recommendations
if r["phase"] <= phase
]
epc_transformations = [x["description_simulation"] for x in represenative_recs_to_this_phase]
# TODO: This is placeholder, but it's to handle the case of having both internal and external wall
# insulation as options. This will cause the process below to fall over, so we take just
# external wall insulation in epc_transformations, if we have both
types = [
x["type"] for x in represenative_recs_to_this_phase
]
if "external_wall_insulation" in types and "internal_wall_insulation" in types:
epc_transformations = [
x["description_simulation"] for x in represenative_recs_to_this_phase if
x["type"] != "internal_wall_insulation"
]
else:
epc_transformations = [x["description_simulation"] for x in represenative_recs_to_this_phase]
# It is possible that we could have two simulations applied to the same descriptions
# We extract these out
phase_epc_transformation = {}

View file

@ -488,13 +488,6 @@ async def trigger_plan(body: PlanTriggerRequest):
"carbon_ending"]
)
from utils.s3 import save_dataframe_to_s3_parquet
save_dataframe_to_s3_parquet(
bucket_name="retrofit-datalake-dev",
file_key="recommendations_scoring_data_11th_july.parquet",
df=recommendations_scoring_data
)
model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
all_predictions = model_api.predictions_template()
@ -510,8 +503,6 @@ async def trigger_plan(body: PlanTriggerRequest):
for key, scored in predictions_dict.items():
all_predictions[key] = pd.concat([all_predictions[key], scored])
prediction_df = all_predictions["heating_cost_predictions"]
# Insert the predictions into the recommendations and run the optimiser
# TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a
# possibility with heating system

View file

View file

@ -133,7 +133,7 @@ def app():
energy_consumption_data = []
for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)):
# Skip the first 50
if i < 127:
if i < 250:
continue
data = pd.read_csv(directory / "certificates.csv", low_memory=False)

View file

@ -1 +1 @@
[{"EPC": "D", "count": 1718}, {"EPC": "C", "count": 1343}, {"EPC": "E", "count": 538}, {"EPC": "F", "count": 80}, {"EPC": "B", "count": 52}, {"EPC": "G", "count": 3}, {"EPC": "A", "count": 2}]
[{"EPC": "D", "count": 332}, {"EPC": "C", "count": 68}, {"EPC": "E", "count": 44}, {"EPC": "F", "count": 6}]

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
[{"is_real_epc": true, "count": 3736}, {"is_real_epc": false, "count": 1509}]
[{"index": true, "is_real_epc": 3736}, {"index": false, "is_real_epc": 1509}]

View file

@ -31,7 +31,8 @@ def make_epc_rating_piechart(epc_rating_breakdown):
labels = [x["EPC"] for x in epc_rating_breakdown]
values = [x["count"] for x in epc_rating_breakdown]
marker_colors = ["#117d58", "#2da55c", "#8dbd40", "#f7cd14", "#f3a96a", "#ef8026", "#e41e3b"]
# marker_colors = ["#117d58", "#2da55c", "#8dbd40", "#f7cd14", "#f3a96a", "#ef8026", "#e41e3b"]
marker_colors = ["#8dbd40", "#f7cd14", "#f3a96a", "#ef8026", "#e41e3b"]
fig = go.Figure(
data=[go.Pie(labels=labels, values=values, marker_colors=marker_colors, sort=False)],
@ -53,7 +54,10 @@ def make_map(locations):
# Create custom hover text
df['hover_text'] = df.apply(
lambda row: f"UPRN: {int(row['uprn'])}<br>Address: {row['standardised_address']}<br>Postcode: "
f"{row['standardised_postcode']}<br>Latitude: {row['LATITUDE']}<br>Longitude: {row['LONGITUDE']}",
f"{row['standardised_postcode']}<br>Latitude: {row['LATITUDE']}<br>Longitude: "
f"{row['LONGITUDE']}<br>Walls: {row['Walls']}<br>Roofs: {row['Roofs']}<br>Main Fuel: "
f"{row['Main Fuel']}<br>Heating: {row['Heating']}<br>Age: {row['Age']}<br>Property Type: "
f"{row['Property Type']}",
axis=1)
data = [
@ -93,8 +97,8 @@ def layout():
locations = json.load(file)
# Get the EPC breakdown data
with open("Stonewater real EPC breakdown.json") as file:
real_epc_breakdown = json.load(file)
# with open("Stonewater real EPC breakdown.json") as file:
# real_epc_breakdown = json.load(file)
# Get the EPC ratings data
with open("Stonewater EPC rating breakdown.json") as file:
@ -149,7 +153,8 @@ def layout():
style={"font-size": "2.5rem", "font-weight": "bold", "margin-bottom": "20px"}
),
html.P(
"This map shows the location of the properties that are to be surveyed by Osmosis.",
"This map shows the location of the properties that are to be surveyed by Osmosis. "
"These properties span across 30 counties and 155 postal regions",
style={"font-size": "1.25rem", "margin-bottom": "40px"}
),
],
@ -170,22 +175,22 @@ def layout():
),
dbc.Row(
[
dbc.Col(
[
html.Div(
"Breakdown of real EPCs",
style={"fontSize": "1.5rem", "fontWeight": "bold", "marginBottom": "1em"},
className='text-center'
),
html.Div(
"This pie chart shows the proportion of real EPCs in the asset list. Currently, "
"there are EPCs for 3736 of the 5245 properties that have a UPRN in the asset list",
style={"marginBottom": "1em"}
),
make_real_epc_piechart(real_epc_breakdown),
],
width={"size": 5},
),
# dbc.Col(
# [
# html.Div(
# "Breakdown of real EPCs",
# style={"fontSize": "1.5rem", "fontWeight": "bold", "marginBottom": "1em"},
# className='text-center'
# ),
# html.Div(
# "This pie chart shows the proportion of real EPCs in the asset list. Currently, "
# "there are EPCs for 3736 of the 5245 properties that have a UPRN in the asset list",
# style={"marginBottom": "1em"}
# ),
# make_real_epc_piechart(real_epc_breakdown),
# ],
# width={"size": 5},
# ),
dbc.Col(
[
html.Div(
@ -195,22 +200,9 @@ def layout():
),
html.Div(
[
"This pie chart shows the breakdown of EPC ratings, for properties that currently "
"have an EPC. "
"The ratings range from A to G, where surprisingly, there are two EPC properties "
"that were initially "
"expected by Parity's modelled SAP, to be EPC D or below. These properties can be"
" seen ",
html.A("here",
href="https://find-energy-certificate.service.gov.uk/energy-certificate"
"/2708-5001-7327-6090-7284",
target="_blank"),
" and ",
html.A("here",
href="https://find-energy-certificate.service.gov.uk/energy-certificate"
"/1037-4032-1009-0361-7292",
target="_blank"),
"."
"This pie chart shows the breakdown of expected and real EPC ratings, "
"for properties "
"that have been selected for sample",
],
style={"marginBottom": "1em"}
),

View file

@ -11,8 +11,9 @@ In this script, we do the following things:
import pandas as pd
import json
from utils.s3 import read_pickle_from_s3
from backend.app.utils import sap_to_epc
stonewater_asset_list = pd.read_csv("Stonewater asset list with archetypes V2.csv")
stonewater_asset_list = pd.read_csv("Stonewater asset list with archetypes V3.csv")
archetyped_asset_list = stonewater_asset_list[
[
"internal_id", "customer_asset_id", "external_address_id", "udprn", "uprn", "cluster",
@ -25,28 +26,15 @@ archetyped_asset_list["rank"] = archetyped_asset_list["rank"].astype(int)
archetyped_asset_list = archetyped_asset_list.sort_values(by=["cluster", "rank"])
# Read in and merge on clustering features
clustering_features = read_pickle_from_s3(
bucket_name="retrofit-data-dev",
s3_file_name="customers/Stonewater/clustering/clustering_dataframe.pkl"
clustering_features = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Stonewater/Stonewater archetyping features V3.csv",
)
# Move property-type and built-form to the first two columns
columns_to_move = ['property-type', 'built-form']
# Get the remaining columns
remaining_columns = [col for col in clustering_features.columns if col not in columns_to_move]
# Create the new column order
new_column_order = columns_to_move + remaining_columns
# Reorder the DataFrame
clustering_features = clustering_features[new_column_order]
archetyped_asset_list = archetyped_asset_list.merge(
clustering_features,
on="internal_id",
how="inner"
)
clustering_features.drop(columns=['uprn', 'Address ID', "rank", "cluster", "archetype_representative"]),
left_on="internal_id",
right_on="Osm. ID"
).drop(columns=["Osm. ID"])
archetyped_asset_list = archetyped_asset_list.rename(
columns={
@ -82,12 +70,47 @@ archetyped_asset_list["uprn"] = archetyped_asset_list["uprn"].astype('Int64')
# archetyped_asset_list.to_excel("Stonewater Archetyping Features.xlsx", index=False)
# We store the location data, which will be used for the mapping. We just need the longitude and latitude
stonewater_asset_list["uprn"] = stonewater_asset_list["uprn"].astype('Int64')
mapping_data = stonewater_asset_list[
stonewater_asset_list["archetype_representative"]
][["internal_id", "uprn", "standardised_address", "standardised_postcode"]]
][["internal_id", "uprn", "standardised_address", "standardised_postcode"]].merge(
archetyped_asset_list[["uprn", "Walls", "Roofs", "Main Fuel", "Heating", "Age", "Property Type"]],
how="left",
on="uprn"
)
# We need to merge on longitude and latitude
spatial_data_to_uprn = read_pickle_from_s3(
s3_file_name="scustomers/Stonewater/clustering/spatial_data_to_uprn.pkl",
bucket_name="retrofit-data-dev"
)
# Function to convert specific columns to bool dtype
def convert_specific_columns_to_bool(df, columns):
for column in columns:
if column in df.columns:
df[column] = df[column].astype(bool)
return df
spatial_data_to_uprn = [convert_specific_columns_to_bool(
df, ['conservation_status', 'is_listed_building', 'is_heritage_building']
) for df in spatial_data_to_uprn]
spatial_data_to_uprn = pd.concat(spatial_data_to_uprn)
spatial_data_to_uprn = spatial_data_to_uprn.drop(
columns=["partition", "filename"]
).rename(columns={"UPRN": "uprn"})
spatial_data_to_uprn["uprn"] = spatial_data_to_uprn["uprn"].astype(str).astype("Int64")
mapping_data = mapping_data.merge(
clustering_features[["internal_id", "LONGITUDE", "LATITUDE"]],
spatial_data_to_uprn[
["uprn", "LONGITUDE", "LATITUDE", "conservation_status", "is_listed_building", "is_heritage_building"]
],
how="left",
on="uprn"
)
mapping_data = mapping_data.drop(columns=["internal_id"])
@ -95,38 +118,28 @@ with open("etl/customers/stonewater/map_app/Stonewater Mapping Data.json", "w")
f.write(json.dumps(mapping_data.to_dict(orient="records")))
# We also include some data for visualising the breakdown of EPCS
proportion_of_real_epcs = clustering_features["estimated"].value_counts().to_frame().reset_index()
# Invert the true and false
proportion_of_real_epcs["estimated"] = ~proportion_of_real_epcs["estimated"]
proportion_of_real_epcs = proportion_of_real_epcs.rename(
columns={"estimated": "is_real_epc"}
)
# proportion_of_real_epcs = (~clustering_features["estimated"]).value_counts().to_frame().reset_index()
# proportion_of_real_epcs = proportion_of_real_epcs.rename(
# columns={"estimated": "is_real_epc"}
# )
#
# with open("etl/customers/stonewater/map_app/Stonewater real EPC breakdown.json", "w") as f:
# f.write(json.dumps(proportion_of_real_epcs.to_dict(orient="records")))
with open("etl/customers/stonewater/map_app/Stonewater real EPC breakdown.json", "w") as f:
f.write(json.dumps(proportion_of_real_epcs.to_dict(orient="records")))
# Produce the breakdown of EPC ratings for properties to be surveyed
clustering_features["representative_epc"] = clustering_features["representative_sap"].apply(sap_to_epc)
# Produce the breakdown of EPC ratings
epc_rating_breakdown = (
clustering_features[~clustering_features["estimated"]]["current-energy-rating"]
clustering_features[clustering_features["archetype_representative"]]["representative_epc"]
.value_counts()
.to_frame()
.reset_index()
)
epc_rating_breakdown = epc_rating_breakdown.rename(
columns={"current-energy-rating": "EPC"}
columns={"index": "EPC", "representative_epc": "count"}
)
with open("etl/customers/stonewater/map_app/Stonewater EPC rating breakdown.json", "w") as f:
f.write(json.dumps(epc_rating_breakdown.to_dict(orient="records")))
epc_a_properties = clustering_features[
(clustering_features["current-energy-rating"] == "A")
& (~clustering_features["estimated"])
]
epc_a_properties = epc_a_properties.merge(
stonewater_asset_list,
on="internal_id",
how="inner"
)

View file

@ -13,12 +13,13 @@ import numpy as np
import pandas as pd
import time
from utils.s3 import save_data_to_s3, read_excel_from_s3, read_from_s3, read_dataframe_from_s3_parquet, \
save_dataframe_to_s3_parquet, save_pickle_to_s3
save_dataframe_to_s3_parquet, save_pickle_to_s3, read_pickle_from_s3
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from scipy.spatial.distance import cdist
from sklearn.metrics import pairwise_distances_argmin_min
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
@ -1083,11 +1084,11 @@ def compile_data():
spatial_data_to_uprn = pd.concat(spatial_data_to_uprn)
# TODO: Let's store this in s3
save_data_to_s3(
data=json.dumps(spatial_data_to_uprn.to_dict("records")),
s3_file_name="scustomers/Stonewater/clustering/spatial_data_to_uprn.json",
bucket_name="retrofit-data-dev"
)
# save_data_to_s3(
# data=json.dumps(spatial_data_to_uprn.to_dict("records")),
# s3_file_name="scustomers/Stonewater/clustering/spatial_data_to_uprn.json",
# bucket_name="retrofit-data-dev"
# )
# We merge this spatial data onto final EPCS
@ -1429,17 +1430,17 @@ def compile_data_final():
older_epcs_batch_2[property["internal_id"]] = searcher.older_epcs
# Store in S3
# TODO - read in instead of running
save_pickle_to_s3(
data=epc_data_batch_2,
s3_file_name="customers/Stonewater/clustering/epc_data_batch_2.pkl",
bucket_name="retrofit-data-dev"
)
save_pickle_to_s3(
data=older_epcs_batch_2,
s3_file_name="customers/Stonewater/clustering/older_epcs_batch_2.pkl",
bucket_name="retrofit-data-dev"
)
# save_pickle_to_s3(
# data=epc_data_batch_2,
# s3_file_name="customers/Stonewater/clustering/epc_data_batch_2.pkl",
# bucket_name="retrofit-data-dev"
# )
#
# save_pickle_to_s3(
# data=older_epcs_batch_2,
# s3_file_name="customers/Stonewater/clustering/older_epcs_batch_2.pkl",
# bucket_name="retrofit-data-dev"
# )
epc_data_batch_2 = pd.DataFrame(epc_data_batch_2)
complete_epcs = pd.concat([epc_data, epc_data_batch_2])
@ -1799,6 +1800,10 @@ def compile_data_final():
'is_cavity_wall', 'is_solid_brick', 'property-type', 'is_pitched', 'is_flat', 'has_dwelling_above'
]
additional_features = [
]
# Define the preprocessing for numerical and categorical features
numerical_features = property_attributes.select_dtypes(include=['int64', 'float64']).columns.tolist()
categorical_features = property_attributes.select_dtypes(include=['object', 'category']).columns.tolist()
@ -1957,3 +1962,710 @@ def pull_ideal_postcodes(missing_uprn_with_udprn):
result["result"]
)
completed_id += 1
def updated_version():
"""
This version of the clustering factors in the updates recieved from Stonewater to simplify the archetyping process
using fewer variables and also factoring in their internal data sources
This work began on the 23rd July 2024
:return:
"""
########################################################################
# Read in data
########################################################################
asset_list = read_asset_list()
asset_list, uprn_lookup_2 = merge_uprn_to_asset_list(asset_list)
# Read in the properties that have been included in Osmosis' wave 2.1
osmosis_wave_2_1_asset_ids, osmosis_wave_2_1 = read_omosis_wave_2_1()
asset_list["In Osmosis Wave 2.1"] = asset_list["customer_asset_id"].isin(osmosis_wave_2_1_asset_ids)
# We also check the address & postcode
asset_list["In Osmosis Wave 2.1"] = np.where(
asset_list["address1"].isin(osmosis_wave_2_1["Name"]),
True,
asset_list["In Osmosis Wave 2.1"]
)
priority_postcodes, previous_waves_address_id, master_sheet = read_stonewater_asset_data()
# Pull in the EPC data
epc_data = read_epc_data(uprn_lookup_2)
# Pull in the spatial data to UPRN
spatial_data_to_uprn = read_pickle_from_s3(
s3_file_name="scustomers/Stonewater/clustering/spatial_data_to_uprn.pkl",
bucket_name="retrofit-data-dev"
)
# Function to convert specific columns to bool dtype
def convert_specific_columns_to_bool(df, columns):
for column in columns:
if column in df.columns:
df[column] = df[column].astype(bool)
return df
spatial_data_to_uprn = [convert_specific_columns_to_bool(
df, ['conservation_status', 'is_listed_building', 'is_heritage_building']
) for df in spatial_data_to_uprn]
spatial_data_to_uprn = pd.concat(spatial_data_to_uprn)
spatial_data_to_uprn = spatial_data_to_uprn.drop(
columns=["partition", "filename"]
).rename(columns={"UPRN": "uprn"})
spatial_data_to_uprn["uprn"] = spatial_data_to_uprn["uprn"].astype(str)
########################################################################
# Prepare the data
########################################################################
# Filter the asset list down to the priority postcodes
asset_list["is_priority_postcode"] = asset_list["postcode"].isin(priority_postcodes)
master_sheet = master_sheet[
master_sheet["Address ID"].isin(
asset_list["external_address_id"].values
)
]
master_sheet["days_since_lodgement"] = (
datetime.now() - pd.to_datetime(master_sheet["Lodgement Date"], errors="coerce", dayfirst=True)
).dt.days
asset_list = asset_list.drop(columns=["Lodgement Date"]).merge(
master_sheet[["Address ID", "days_since_lodgement", "Lodgement Date", "EPC Rating"]],
how="left",
left_on="external_address_id",
right_on="Address ID"
)
asset_list = asset_list.merge(
epc_data[["internal_id", "current-energy-efficiency", "lodgement-date", "estimated"]],
how="left",
on="internal_id"
)
asset_list["days_since_lodgement_epc"] = (
datetime.now() - pd.to_datetime(asset_list["lodgement-date"], errors="coerce", dayfirst=True)
).dt.days
# Flag properties that were surveyed within the last 5 years
asset_list["epc_within_5_years"] = asset_list["days_since_lodgement_epc"] < 5 * 365
# Identify properties where they've had an EPC done within the last 5 years, where the SAP rating is already
# a EPC C. Alternatively, any property with an EPC rating of 80 or above is also considered, regardless of when
# the EPC is done
asset_list["is_epc_c_or_above"] = (
((asset_list["EPC Rating"] >= 69) & asset_list["epc_within_5_years"]) |
(asset_list["EPC Rating"] >= 80)
)
clustering_features = asset_list[
asset_list["is_priority_postcode"] & ~asset_list["In Osmosis Wave 2.1"] & ~asset_list["is_epc_c_or_above"] &
~pd.isnull(asset_list["uprn"])
][
[
"internal_id", "uprn", "udprn", "customer_asset_id", "postcode", "house_number", "address1", "address2",
"city_town", "county", "external_address_id", "owner", "days_since_lodgement", "Lodgement Date",
"epc_within_5_years", "EPC Rating", "estimated", "current-energy-efficiency", "lodgement-date",
]
]
# Merge on the SAP data
clustering_features = clustering_features.merge(
master_sheet[
["Address ID", "SAP"]
].rename(columns={"SAP": "parity_modelled_sap"}),
how="left",
left_on="external_address_id",
right_on="Address ID"
)
# For SAP, we use the most recent EPC if epc_within_5_years is True, otherwise we use the parity modelled sap
clustering_features["current-energy-efficiency"] = clustering_features["current-energy-efficiency"].astype(float)
clustering_features["representative_sap"] = np.where(
clustering_features["epc_within_5_years"],
clustering_features["current-energy-efficiency"],
clustering_features["parity_modelled_sap"]
)
# We remove the final three entries from postcode to give us postal region. Removing two gives us 415 values which
# is too many
clustering_features["postal_region"] = clustering_features["postcode"].str[:-3]
# Merge on spatial features
clustering_features = clustering_features.merge(
spatial_data_to_uprn[["uprn", "conservation_status", "is_listed_building", "is_heritage_building"]],
how="left",
on="uprn"
)
# incorect_epcs = clustering_features[
# clustering_features["EPC Rating"] != clustering_features["current-energy-efficiency"]]
# incorect_epcs = incorect_epcs[
# ~pd.isnull(incorect_epcs["current-energy-efficiency"]) & pd.isnull(incorect_epcs["estimated"])
# ]
# incorect_epcs = incorect_epcs.rename(columns={"current-energy-efficiency": "Current SAP Rating"})
# # Store data
# incorect_epcs.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Stonewater/Incorrect EPCs.csv", index=False)
# We add in the key features, which are used for clustering
master_sheet_clustering_features = master_sheet[
["Address ID", "Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age", "Total Floor Area"]
].copy()
# Step 1: Remap walls - we end up with 11 types
master_sheet_clustering_features["walls_reduced"] = master_sheet_clustering_features["Walls"].replace(
{
"TimberFrame: AsBuilt": "Other wall type, as built",
"SystemBuilt: AsBuilt": "Other wall type, as built",
"Sandstone: AsBuilt": "Other wall type, as built",
"Sandstone: Internal": "Other wall type, internal or external",
"SystemBuilt: External": "Other wall type, internal or external",
"GraniteOrWhinstone: AsBuilt": "Other wall type, as built",
"TimberFrame: Internal": "Other wall type, internal or external",
"Cavity: FilledCavityPlusInternal": "Cavity: FilledCavity",
"SystemBuilt: Internal": "Other wall type, internal or external",
"Cavity: Internal": "Other wall type, internal or external",
}
)
# Step 2: Remap roofs - we split on the : where the first part of the string gives us the roof type, the second
# gives us the insulation thickness
# Clean an incorrect value
master_sheet_clustering_features["Roofs"] = master_sheet_clustering_features["Roofs"].replace(
{
"PitchedWithSlopingCeiling: mm250": "PitchedWithSlopingCeiling: 250mm",
"PitchedWithSlopingCeiling: 150mm+": "PitchedWithSlopingCeiling: 150mm",
'PitchedWithSlopingCeiling: mm25': "PitchedWithSlopingCeiling: 25mm",
'PitchedWithSlopingCeiling: mm200': "PitchedWithSlopingCeiling: 200mm",
'AnotherDwellingAbove: 50mm': 'PitchedNormalLoftAccess: 50mm',
}
)
master_sheet_clustering_features[['roof_type', 'roof_insulation_thickness']] = (
master_sheet_clustering_features['Roofs'].apply(
lambda x: pd.Series(x.split(':', 1) if ':' in x else [x, ''])
)
)
# Strip any extra whitespace
master_sheet_clustering_features['roof_type'] = master_sheet_clustering_features['roof_type'].str.strip()
master_sheet_clustering_features['roof_insulation_thickness'] = (
master_sheet_clustering_features['roof_insulation_thickness'].str.strip()
)
def map_thickness(thickness):
try:
value = float(thickness.replace('mm', '').replace('+', '').replace(' ', ''))
return "Above 250mm" if value > 250 else "Below 250mm"
except ValueError:
return thickness # Return the original value if it cannot be converted to a float
master_sheet_clustering_features['roof_insulation_category'] = (
master_sheet_clustering_features['roof_insulation_thickness'].apply(map_thickness)
)
# Ideas
# 1) We might need to remap the roof type to pitched, flat or another dwelling above and then have the access
# as a secondary category
# 2) Split out the (community) tag in the fuel as a secondary feature, which isn't strictly split
# (could split on :, take first part)
clustering_features = clustering_features.merge(
master_sheet_clustering_features,
how="left",
on="Address ID"
)
# Reduce down to the final set of features we need
clustering_features = clustering_features[
[
"internal_id",
"Property Type",
# Location
"postal_region",
'conservation_status',
'is_listed_building',
'is_heritage_building',
"county",
# Walls
"walls_reduced",
# Roof
"roof_type",
"roof_insulation_category",
# Heating
"Heating",
# Fuel
"Main Fuel",
"Age",
"Total Floor Area",
"representative_sap",
"days_since_lodgement",
]
]
clustering_features["days_since_lodgement"] = clustering_features["days_since_lodgement"].fillna(99999)
def split_property_type(row):
parts = row.split(':')
property_type = parts[0].strip()
built_form = parts[1].strip() if len(parts) > 1 else ''
property_extended_feature = parts[2].strip() if len(parts) > 2 else ''
return pd.Series([property_type, built_form, property_extended_feature])
clustering_features[['property_type', 'built_form', 'property_extended_feature']] = (
clustering_features['Property Type'].apply(split_property_type)
)
clustering_features = clustering_features.drop(columns=["Property Type"])
# These are the variables we MUST split by
grouping_columns = [
"property_type",
"walls_reduced",
"roof_type",
"Main Fuel",
"county",
]
def combine_small_groups(clustering_features, grouping_columns, threshold=2):
# Identify small groups
group_sizes = clustering_features.groupby(grouping_columns).size()
small_groups = group_sizes[group_sizes <= threshold].index.tolist()
# Remove small groups from the original clustering_features
small_group_data = clustering_features[clustering_features.set_index(grouping_columns).index.isin(small_groups)]
clustering_features_ok = clustering_features[
~clustering_features.set_index(grouping_columns).index.isin(small_groups)
]
if small_group_data.empty:
return clustering_features
# One-Hot Encode categorical variables
categorical_features = (
clustering_features_ok.drop(columns=["internal_id"])
.select_dtypes(include=['object', 'category']).columns.tolist()
)
ohe = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
ohe.fit(clustering_features_ok[categorical_features])
# Combine small groups with the nearest available group
small_group_ohe = ohe.transform(small_group_data[categorical_features])
large_group_ohe = ohe.transform(clustering_features_ok[categorical_features])
numerical_features = clustering_features_ok.select_dtypes(include=['int64', 'float64']).columns.tolist()
small_group_numerical = small_group_data[numerical_features].values
large_group_numerical = clustering_features_ok[numerical_features].values
# Concatenate one-hot encoded categorical and numerical features
small_group_features = np.hstack([small_group_ohe, small_group_numerical])
large_group_features = np.hstack([large_group_ohe, large_group_numerical])
# Calculate distances and find nearest groups
closest_groups, _ = pairwise_distances_argmin_min(small_group_features, large_group_features)
closest_group_index = clustering_features_ok.iloc[closest_groups].index
# Update small groups to the nearest large group
for small_group, closest_group in zip(small_groups, closest_group_index):
small_group_mask = small_group_data.set_index(grouping_columns).index == small_group
small_group_data.loc[small_group_mask, grouping_columns] = clustering_features_ok.loc[
closest_group, grouping_columns].values
combined_data = pd.concat([clustering_features_ok, small_group_data])
return combined_data
clustering_features_combined = combine_small_groups(clustering_features, grouping_columns)
########################################################################
# Clustering
########################################################################
numerical_features = clustering_features_combined.select_dtypes(include=['int64', 'float64']).columns.tolist()
categorical_features = clustering_features_combined.select_dtypes(include=['object', 'category']).columns.tolist()
categorical_features = [c for c in categorical_features if c not in ["internal_id", grouping_columns]]
for col in categorical_features:
clustering_features_combined[col] = clustering_features_combined[col].astype(str)
id_column = 'internal_id'
n_clusters = 450
random_state = 0
training_data_grouped = clustering_features_combined.groupby(grouping_columns)
group_sizes = {name: len(group) for name, group in training_data_grouped}
total_size = sum(group_sizes.values())
cluster_allocation = {
name: max(1, int(round(n_clusters * (size / total_size)))) for name, size in group_sizes.items()
}
# Adjust cluster allocation to ensure total clusters sum to 450
cluster_allocation = adjust_clusters(cluster_allocation, n_clusters)
final_clusters = []
for group_variables, group_data in tqdm(training_data_grouped, total=len(training_data_grouped)):
group_n_clusters = cluster_allocation[group_variables]
group_data.set_index(id_column, inplace=True)
preprocessor = ColumnTransformer(
transformers=[
('num', StandardScaler(), numerical_features),
('cat', OneHotEncoder(), categorical_features)
]
)
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
('kmeans', KMeans(n_clusters=group_n_clusters, random_state=random_state))])
# Fit the pipeline to the data
pipeline.fit(group_data)
# Transform the data using the fitted pipeline
processed_data = pipeline.named_steps['preprocessor'].transform(group_data)
# Get cluster labels
group_data['cluster'] = pipeline.named_steps['kmeans'].labels_
# Get centroids (already in the same transformed space)
centroids = pipeline.named_steps['kmeans'].cluster_centers_
# if the data isn't an array, make it one
if not isinstance(processed_data, np.ndarray):
processed_data = processed_data.toarray()
# Calculate distances from each point to the centroid of its cluster
distances_to_centroids = [
cdist(processed_data[i].reshape(1, -1), centroids[label].reshape(1, -1)).flatten()[0]
for i, label in enumerate(group_data['cluster'])
]
group_data['distance_to_centroid'] = distances_to_centroids
# Ranking rows by distance within each cluster
group_data['rank'] = group_data.groupby('cluster')['distance_to_centroid'].rank(method='first')
# Sorting to verify
group_data.sort_values(by=['cluster', 'rank'], inplace=True)
group_data.reset_index(inplace=True)
to_append = group_data[["internal_id", "cluster", "rank"]].copy()
to_append["cluster"] = to_append["cluster"].astype(str) + str(group_variables)
final_clusters.append(to_append)
final_clusters = pd.concat(final_clusters)
# remap the clusters from the current names to 1 -> n_clusters
cluster_mapping = {cluster: i for i, cluster in enumerate(final_clusters["cluster"].unique())}
final_clusters["cluster"] = final_clusters["cluster"].map(cluster_mapping)
final_clusters["cluster"] = final_clusters["cluster"].astype(str)
assigned_clusters = clustering_features_combined.merge(
final_clusters, how="left", on="internal_id"
)
assigned_clusters["archetype_representative"] = assigned_clusters["rank"] == 1
asset_list_with_archetypes = asset_list.merge(
assigned_clusters[["internal_id", "cluster", "archetype_representative", "rank"]], how="left",
on="internal_id"
).merge(
master_sheet_clustering_features[["Address ID", "Property Type", "Walls", "Roofs", "Heating"]],
how="left",
on="Address ID"
)
# We populate the reasons for no archetype
# 1) If it's not a priority postcode
asset_list_with_archetypes["cluster"] = np.where(
~asset_list_with_archetypes["is_priority_postcode"],
"NOT PRIORITY POSTCODE",
asset_list_with_archetypes["cluster"]
)
# 2) If it's EPC C or above
asset_list_with_archetypes["cluster"] = np.where(
asset_list_with_archetypes["is_epc_c_or_above"],
"EPC C OR ABOVE",
asset_list_with_archetypes["cluster"]
)
# If it's in Wave 2.1
asset_list_with_archetypes["cluster"] = np.where(
asset_list_with_archetypes["In Osmosis Wave 2.1"],
"IN WAVE 2.1",
asset_list_with_archetypes["cluster"]
)
# Has missing uprn
asset_list_with_archetypes["cluster"] = np.where(
pd.isnull(asset_list_with_archetypes["uprn"]),
"MISSING UPRN",
asset_list_with_archetypes["cluster"]
)
asset_list_with_archetypes["rank"] = asset_list_with_archetypes["rank"].fillna(-999)
asset_list_with_archetypes["rank"] = asset_list_with_archetypes["rank"].astype(int).astype(str)
asset_list_with_archetypes["rank"] = asset_list_with_archetypes["rank"].replace("-999", "NO ARCHETYPE")
asset_list_with_archetypes["archetype_representative"] = (
asset_list_with_archetypes["archetype_representative"].fillna(False)
)
asset_list_with_archetypes.to_csv("Stonewater asset list with archetypes V3.1.csv", index=False)
# Produce the archetyping features
archetyping_features_csv = assigned_clusters[
[
"internal_id", "cluster", "archetype_representative", "rank", "conservation_status", "is_listed_building",
"is_heritage_building", "postal_region", "county", "representative_sap", "days_since_lodgement"
]
].merge(
asset_list[
["internal_id", "uprn", "external_address_id"]
],
how="left",
on="internal_id"
).merge(
master_sheet_clustering_features,
how="left",
right_on="Address ID",
left_on="external_address_id"
).drop(columns=["Address ID"]).rename(
columns={
"internal_id": "Osm. ID",
"external_address_id": "Address ID",
}
)
archetyping_features_csv = archetyping_features_csv.sort_values(["cluster", "rank"], ascending=True)
archetyping_features_csv.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Stonewater/Stonewater archetyping features V3.csv", index=False
)
representatives = archetyping_features_csv[archetyping_features_csv["archetype_representative"]]
print(representatives["postal_region"].nunique())
print(representatives["county"].nunique())
def read_asset_list():
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24.xlsx",
header=4
)
udprn_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/UDPRN updated RA Sample for 5 year programme.xlsx", header=0
)[["AddressId", "UDPRN"]].rename(columns={"AddressId": "Address ID"})
udprn_data["UDPRN"] = udprn_data["UDPRN"].astype("Int64").astype(str)
udprn_data["Address ID"] = udprn_data["Address ID"].astype(str)
asset_list = asset_list.merge(udprn_data, how="inner", on="Address ID")
asset_list = asset_list.rename(columns={"UDPRN": "udprn"})
asset_list = asset_list.rename(
columns={
"Osm. ID": "internal_id",
"Org. ref.": "customer_asset_id",
"Postcode": "postcode",
"House no": "house_number",
"Name": "address1",
"Address line 2": "address2",
"City/Town": "city_town",
"County": "county",
"Address ID": "external_address_id",
"Owning body": "owner"
}
)
asset_list["full_address"] = np.where(
~pd.isnull(asset_list["address2"]),
(
asset_list["address1"] + ", " +
asset_list["address2"] + ", " +
asset_list["city_town"].str.title() + ", " +
asset_list["postcode"]
),
asset_list["address1"] + ", " +
asset_list["city_town"].str.title() + ", " +
asset_list["postcode"]
)
return asset_list
def merge_uprn_to_asset_list(asset_list):
# Read in the lookups
uprn_lookup_1 = pd.DataFrame(json.loads(read_from_s3(
bucket_name="retrofit-data-dev",
s3_file_name="scustomers/Stonewater/clustering/address_uprn_udprn_lookup.json"
)))
uprn_lookup_1["match_type"] = "Exact"
uprn_lookup_2 = pd.DataFrame(json.loads(read_from_s3(
bucket_name="retrofit-data-dev",
s3_file_name="scustomers/Stonewater/clustering/address_uprn_udprn_lookup_2.json"
)))
uprn_lookup_2 = uprn_lookup_2.rename(
columns={
"epc_address": "standardised_address",
"epc_postcode": "standardised_postcode"
}
)
uprn_lookup_2["match_type"] = "EPC"
uprn_lookup_2["uprn"] = np.where(
uprn_lookup_2["internal_id"] == 1091,
83143766,
uprn_lookup_2["uprn"]
)
uprn_lookup_3 = pd.DataFrame(json.loads(read_from_s3(
bucket_name="retrofit-data-dev",
s3_file_name="customers/Stonewater/clustering/ideal-postcodes_pull_2.json"
)))
uprn_lookup_3["standardised_address"] = uprn_lookup_3[["line_1", "line_2", "line_3", "district", "postcode"]].apply(
concatenate_row, axis=1
)
uprn_lookup_3 = uprn_lookup_3[
["udprn", "uprn", "standardised_address", "postcode"]
].rename(columns={"postcode": "standardised_postcode"})
uprn_lookup_3["match_type"] = "Exact"
uprn_lookup_4_basis = pd.read_csv("manual_fix_uprns-populated.csv", index_col=False)
uprn_lookup_4_basis["os_option_1_uprn"] = uprn_lookup_4_basis["os_option_1_uprn"].astype(str)
uprn_lookup_4_basis["os_option_2_uprn"] = uprn_lookup_4_basis["os_option_2_uprn"].astype("Int64").astype(str)
# prepare lookup 4
uprn_lookup_4 = []
for _, x in uprn_lookup_4_basis.iterrows():
property_type = None
built_form = None
if x["option"] == 1:
uprn = x["os_option_1_uprn"]
standardised_address = x["os_option_1_address"]
postcode = x["os_option_1_postcode"]
elif x["option"] == 2:
uprn = x["os_option_2_uprn"]
standardised_address = x["os_option_2_address"]
postcode = x["os_option_2_address"].split(", ")[-1]
else:
uprn = x["manual_uprn"]
standardised_address = x["manual_address"]
postcode = x["manual_postcode"]
uprn_lookup_4.append(
{
"internal_id": x["internal_id"],
"external_address_id": x["external_address_id"],
"uprn": uprn,
"standardised_address": standardised_address,
"standardised_postcode": postcode,
"property_type": property_type,
"built_form": built_form
}
)
uprn_lookup_4 = pd.DataFrame(uprn_lookup_4)
uprn_lookup_4["match_type"] = "Fuzzy"
# concat
uprn_lookup = pd.concat([uprn_lookup_1, uprn_lookup_2])
assert len(uprn_lookup) + len(uprn_lookup_3) + len(uprn_lookup_4) == len(asset_list)
# Final preps of lookups
uprn_lookup_3["udprn"] = uprn_lookup_3["udprn"].astype(str)
uprn_lookup_3 = uprn_lookup_3.merge(
asset_list[["udprn", "internal_id", "external_address_id"]], how="left", on="udprn"
)
uprn_lookup = pd.concat([
uprn_lookup,
uprn_lookup_3,
uprn_lookup_4
])
uprn_lookup["external_address_id"] = uprn_lookup["external_address_id"].astype(str)
asset_list = asset_list.merge(
uprn_lookup.drop(columns=["udprn"]),
how="inner",
on=["internal_id", "external_address_id"]
)
return asset_list, uprn_lookup_2
def read_omosis_wave_2_1():
osmosis_wave_2_1 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Stonewater/Stonewater Osmosis SHDF 2.1.xlsx",
header=4,
)
# Remove double spaces from "Name"
osmosis_wave_2_1["Name"] = osmosis_wave_2_1["Name"].str.replace(" ", " ")
osmosis_wave_2_1 = osmosis_wave_2_1.rename(columns={"Unnamed: 1": "Location"})
osmosis_wave_2_1 = osmosis_wave_2_1[osmosis_wave_2_1["Location"] != "Removed from program"]
# We produce a cleaned list of asset ids from osmosis_wave_2_1
osmosis_wave_2_1_asset_ids = [x for x in osmosis_wave_2_1["Asset ID"].values if not pd.isnull(x)]
# We have some ids that are in the form 'id1, id2' so we split them
osmosis_wave_2_1_asset_ids = [int(x.strip()) for id_str in osmosis_wave_2_1_asset_ids for x in id_str.split(",")]
return osmosis_wave_2_1_asset_ids, osmosis_wave_2_1
def read_stonewater_asset_data():
master_sheet = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Stonewater/Osmosis Reviewed - Parity Download 18.7 - master "
"sheet.csv",
encoding='latin1'
)
master_sheet["Address ID"] = master_sheet["Address ID"].astype(str)
previous_waves = master_sheet[
(master_sheet["In Osmosis W2.1"] == "Yes") |
(master_sheet["In Wates Wave 2.1"] == "Yes") |
(master_sheet["In Liv Green Wave 2.1"] == "Yes") |
(master_sheet["In CCS Wave 2.1"] == "Yes")
].copy()
previous_waves_address_id = [str(x) for x in previous_waves["Address ID"].values if not pd.isnull(x)]
# We also read the priority postcodes
priority_postcodes = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Stonewater/Osmosis Reviewed - Parity Download 18.7 - priority "
"postcodes.csv",
header=17
)
priority_postcodes = priority_postcodes["Postcode"].tolist()
return priority_postcodes, previous_waves_address_id, master_sheet
def read_epc_data(uprn_lookup_2):
epc_data = json.loads(
read_from_s3(
bucket_name="retrofit-data-dev",
s3_file_name="customers/Stonewater/clustering/epc_data.json"
)
)
epc_data = pd.DataFrame(epc_data)
epc_data["uprn"] = np.where(
epc_data["internal_id"] == 1091,
83143766,
epc_data["uprn"]
)
# We drop come EPCS
epc_data = epc_data[epc_data["internal_id"].isin(uprn_lookup_2["internal_id"].values)]
epc_data_batch_2 = read_pickle_from_s3(
s3_file_name="customers/Stonewater/clustering/epc_data_batch_2.pkl",
bucket_name="retrofit-data-dev"
)
epc_data_batch_2 = pd.DataFrame(epc_data_batch_2)
complete_epcs = pd.concat([epc_data, epc_data_batch_2])
return complete_epcs

View file

@ -0,0 +1,80 @@
import os
from tqdm import tqdm
import pandas as pd
from dotenv import load_dotenv
from backend.SearchEpc import SearchEpc
from backend.app.utils import sap_to_epc
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
def app():
"""
This script will retrieve EPC data, for postcodes and produce statistics on the SAP Score
:return:
"""
source_file = pd.read_excel("/Users/khalimconn-kowlessar/Downloads/Addresses - SFR rents.xlsx")
source_file["row_id"] = source_file.index
# Split out the town, which is the final portion of the string, separated by commas
source_file["Town"] = source_file["Address"].apply(lambda x: x.split(" ")[-1].strip() if not pd.isnull(x) else None)
source_file["Address"] = source_file["Address"].apply(
lambda x: " ".join(x.split(" ")[:-1]).strip() if not pd.isnull(x) else None
)
unique_postcodes = source_file[["Address", "Postcode"]].drop_duplicates()
# for each postcode, pull EPC data
collected_data = []
no_data_found = []
no_data_after_filters = []
for _, config in tqdm(unique_postcodes.iterrows(), total=len(unique_postcodes)):
address1 = config["Address"] if not pd.isnull(config["Address"]) else ""
searcher = SearchEpc(
postcode=config["Postcode"],
address1=address1,
auth_token=EPC_AUTH_TOKEN,
os_api_key=""
)
while True:
params = {
"postcode": config["Postcode"],
"address": address1,
}
results = searcher.client.domestic.search(params=params, size=10000)
if not results:
# We strip back address1
address1 = " ".join(address1.split(" ")[:-1])
if not address1:
break
else:
break
if not results:
no_data_found.append(config)
continue
data = pd.DataFrame(results["rows"])
data["current-energy-efficiency"] = data["current-energy-efficiency"].astype(int)
# Take EPCs post 2023
data["lodgement-date"] = pd.to_datetime(data["lodgement-date"], errors="coerce")
data = data[data["lodgement-date"] >= "2023-01-01"]
# Take private nrentals
data = data[data["tenure"].isin(["rental (private)", "Rented (private)"])]
if data.empty:
no_data_after_filters.append(config)
continue
agg = data.groupby(["property-type", "built-form"])["current-energy-efficiency"].mean().reset_index()
agg = agg.rename(columns={"current-energy-efficiency": "Average SAP"})
agg["Average EPC"] = agg["Average SAP"].apply(sap_to_epc)
agg.insert(0, "Postcode", config["Postcode"])
agg.insert(0, "Address", address1)
collected_data.append(agg)
collected_df = pd.concat(collected_data)
collected_df.to_csv("EPC Averages SFR.csv", index=False)

View file

@ -0,0 +1,37 @@
import pandas as pd
from utils.s3 import save_csv_to_s3
PORTFOLIO_ID = 85
USER_ID = 8
def app():
asset_list = [
{
"address": "120 Yarningale Road",
"postcode": "B14 6NB",
"uprn": 100070575194
}
]
asset_list = pd.DataFrame(asset_list)
filename = f"{USER_ID}/{PORTFOLIO_ID}/sample.csv"
save_csv_to_s3(
dataframe=asset_list,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increase EPC",
"goal_value": "C",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": "",
"budget": None,
}
print(body)

View file

@ -0,0 +1,3 @@
# Survey Extraction App
This app is responsible survey data from energy assessment XMLs

View file

@ -0,0 +1,9 @@
def main():
"""
This function executes the main process, which will retrieve data from the specified locations, extract the data
fields and store them to our database
:return:
"""
# TODO: Build solution to get this data from Onedrive and store what we need in S3
# In s3, we have a bucket called retrofit-energy-assessments-{stage} which

View file

@ -175,6 +175,12 @@ module "retrofit_hotwater_kwh_predictions" {
allowed_origins = var.allowed_origins
}
module "retrofit_energy_assessments" {
source = "./modules/s3"
bucketname = "retrofit-energy-assessments-${var.stage}"
allowed_origins = var.allowed_origins
}
# Set up the route53 record for the API
module "route53" {
source = "./modules/route53"

View file

@ -100,9 +100,10 @@ class HeatingControlRecommender:
We can then consider the heating system itself
:return:
"""
new_description = "Controls for high heat retention storage heaters"
# We recommend upgrading to Celect type controls
ending_config = MainheatControlAttributes("Controls for high heat retention storage heaters").process()
ending_config = MainheatControlAttributes(new_description).process()
# We look at what has changed in the ending config, and compare it to the current config
simulation_config = check_simulation_difference(
new_config=ending_config, old_config=self.property.main_heating_controls
@ -110,11 +111,17 @@ class HeatingControlRecommender:
# This upgrade will only take the heating system to average energy efficiency
simulation_config["mainheatc_energy_eff_ending"] = "Good"
description_simulation = {
"mainheatcont-description": new_description,
"mainheatc-energy-eff": simulation_config["mainheatc_energy_eff_ending"]
}
self.recommendation.append(
{
"description": "upgrade heating controls to High Heat Retention Storage Heater Controls",
**self.costs.celect_type_controls(),
"simulation_config": simulation_config
"simulation_config": simulation_config,
"description_simulation": description_simulation
}
)
@ -152,7 +159,9 @@ class HeatingControlRecommender:
if not can_recommend:
return
ending_config = MainheatControlAttributes("Programmer, room thermostat and TRVS").process()
new_controls_description = "Programmer, room thermostat and TRVS"
ending_config = MainheatControlAttributes(new_controls_description).process()
# We use this to determine how we should be updating the config
simulation_config = check_simulation_difference(
new_config=ending_config, old_config=self.property.main_heating_controls
@ -161,6 +170,13 @@ class HeatingControlRecommender:
# If the current system is below good, we make it good
if self.property.data["mainheatc-energy-eff"] in ["Poor", "Very Poor", "Average"]:
simulation_config["mainheatc_energy_eff_ending"] = "Good"
else:
simulation_config["mainheatc_energy_eff_ending"] = self.property.data["mainheatc-energy-eff"]
description_simulation = {
"mainheatcont-description": new_controls_description,
"mainheatc-energy-eff": simulation_config["mainheatc_energy_eff_ending"]
}
has_programmer = not needs_programmer
has_room_thermostat = not needs_room_thermostat
@ -191,10 +207,7 @@ class HeatingControlRecommender:
"sap_points": None,
"already_installed": already_installed,
"simulation_config": simulation_config,
"description_simulation": {
"mainheatcont-description": "Programmer, room thermostat and TRVS",
"mainheatc-energy-eff": "Good"
}
"description_simulation": description_simulation
}
)
@ -221,7 +234,9 @@ class HeatingControlRecommender:
# No recommendation needed
return
ending_config = MainheatControlAttributes("Time and temperature zone control").process()
new_controls_description = "Time and temperature zone control"
ending_config = MainheatControlAttributes(new_controls_description).process()
# We use this to determine how we should be updating the config
simulation_config = check_simulation_difference(
@ -231,7 +246,13 @@ class HeatingControlRecommender:
# If the current system is below very good, we make it very good
if self.property.data["mainheatc-energy-eff"] in ["Poor", "Very Poor", "Average", "Good"]:
simulation_config["mainheatc_energy_eff_ending"] = "Very Good"
else:
simulation_config["mainheatc_energy_eff_ending"] = self.property.data["mainheatc-energy-eff"]
description_simulation = {
"mainheatcont-description": new_controls_description,
"mainheatc-energy-eff": simulation_config["mainheatc_energy_eff_ending"]
}
cost_result = self.costs.time_and_temperature_zone_control(
number_heated_rooms=int(self.property.data["number-heated-rooms"])
)
@ -255,9 +276,6 @@ class HeatingControlRecommender:
"sap_points": None,
"already_installed": already_installed,
"simulation_config": simulation_config,
"description_simulation": {
"mainheatcont-description": "Time and temperature zone control",
"mainheatc-energy-eff": "Very Good"
}
"description_simulation": description_simulation
}
)

View file

@ -55,7 +55,7 @@ class HeatingRecommender:
# TODO: We could have a system flush recommendation for an existing boiler, where there is no need to replace
# the boiler, but instead flushing the system will make it run more efficiently. There is a cost for this
# in the Costs class, stored as SYSTEM_FLUSH_COST
exclusions = [] if exclusions is None else exclusions
self.heating_recommendations = []
@ -86,7 +86,8 @@ class HeatingRecommender:
electic_heating_has_mains = self.has_electric_heating_description and self.property.data["mains-gas-flag"]
portable_heaters_has_mains = (
self.property.main_heating["clean_description"] in ["Portable electric heaters assumed for most rooms"] and
self.property.main_heating["clean_description"] in ["Portable electric heaters assumed for most rooms"]
and
self.property.data["mains-gas-flag"]
)
@ -238,28 +239,31 @@ class HeatingRecommender:
description = description + (f" The cost includes the £"
f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant")
new_heating_description = "Air source heat pump, radiators, electric"
new_hot_water_description = "From main system"
simulation_config = {
"mainheat_energy_eff_ending": "Good",
"hot_water_energy_eff_ending": "Good"
}
description_simulation = {
"mainheat-description": "Air source heat pump, radiators, electric",
"mainheat-energy-eff": "Good",
"hot-water-energy-eff": "Good",
"hotwater-description": "From main system",
"mainheat-description": new_heating_description,
"mainheat-energy-eff": simulation_config["mainheat_energy_eff_ending"],
"hot-water-energy-eff": simulation_config["hot_water_energy_eff_ending"],
"hotwater-description": new_hot_water_description,
}
# Installation of a boiler improves the hot water system so we need to reflect this in
# the outcome of the recommendation
heating_ending_config = MainHeatAttributes("Air source heat pump, radiators, electric").process()
hotwater_ending_config = HotWaterAttributes("From main system").process()
heating_ending_config = MainHeatAttributes(new_heating_description).process()
hotwater_ending_config = HotWaterAttributes(new_hot_water_description).process()
# If the property does not currently have electric main fuel, we'll simulate the change
fuel_ending_config = {}
if self.property.main_fuel["fuel_type"] != "electricity":
fuel_ending_config = MainFuelAttributes("electricity (not community)").process()
new_fuel_description = "electricity (not community)"
fuel_ending_config = MainFuelAttributes(new_fuel_description).process()
description_simulation = {
**description_simulation,
"main-fuel": "electricity (not community)"
"main-fuel": new_fuel_description
}
# Check the simulation differences
@ -292,8 +296,7 @@ class HeatingRecommender:
description_simulation = {
**description_simulation,
"mainheatcont-description": "time and temperature zone control",
"mainheatc-energy-eff": "Very Good"
**controls_recommender.recommendation[0]["description_simulation"]
}
ashp_recommendation = {
@ -330,7 +333,14 @@ class HeatingRecommender:
return differences
def combine_heating_and_controls(
self, controls_recommendations, heating_simulation_config, costs, description, phase, heating_controls_only,
self,
controls_recommendations,
heating_simulation_config,
heating_description_simulation,
costs,
description,
phase,
heating_controls_only,
system_change
):
"""
@ -338,6 +348,7 @@ class HeatingRecommender:
into a single recommendation
:param controls_recommendations: The heating controls recommendations
:param heating_simulation_config: The simulation configuration for the heating system
:param heating_description_simulation: The simulation configuration for the heating description
:param costs: The costs of the heating system
:param description: The description of the recommendation
:param phase: The phase of the recommendation
@ -361,6 +372,7 @@ class HeatingRecommender:
for controls_switch in heating_controls_switch:
total_costs = costs.copy()
recommendation_simulation_config = heating_simulation_config.copy()
recommendation_description_simulation = heating_description_simulation.copy()
recommendation_description = description
if controls_switch:
# We add the costs of the heating controls, onto each key in the costs dictionary
@ -371,6 +383,12 @@ class HeatingRecommender:
**recommendation_simulation_config,
**controls_recommendations[0]["simulation_config"]
}
recommendation_description_simulation = {
**recommendation_description_simulation,
**controls_recommendations[0]["description_simulation"]
}
controls_description = controls_recommendations[0]['description']
# Make the first letter of the description lowercase
controls_description = (
@ -396,7 +414,8 @@ class HeatingRecommender:
"sap_points": None,
"already_installed": already_installed,
**total_costs,
"simulation_config": recommendation_simulation_config
"simulation_config": recommendation_simulation_config,
"description_simulation": recommendation_description_simulation
}
output.append(recommendation)
@ -474,8 +493,10 @@ class HeatingRecommender:
# No recommendation needed
return
new_heating_description = "Electric storage heaters, radiators"
# Set up artefacts, suitable for the simulation and regardless of controls
heating_ending_config = MainHeatAttributes("Electric storage heaters, radiators").process()
heating_ending_config = MainHeatAttributes(new_heating_description).process()
heating_simulation_config = check_simulation_difference(
new_config=heating_ending_config, old_config=self.property.main_heating
)
@ -497,9 +518,15 @@ class HeatingRecommender:
)
description = "Install high heat retention electric storage heaters"
heating_description_simulation = {
"mainheat-description": new_heating_description,
"mainheat-energy-eff": heating_simulation_config["mainheat_energy_eff_ending"],
}
recommendations = self.combine_heating_and_controls(
controls_recommendations=controls_recommender.recommendation,
heating_simulation_config=heating_simulation_config,
heating_description_simulation=heating_description_simulation,
costs=costs,
description=description,
phase=phase,
@ -580,6 +607,7 @@ class HeatingRecommender:
simulation_config = {}
boiler_costs = {}
boiler_recommendation = {}
description_simulation = {}
has_inefficient_space_heating = self.property.data["mainheat-energy-eff"] in ["Very Poor", "Poor", "Average"]
@ -603,12 +631,22 @@ class HeatingRecommender:
"mainheat_energy_eff_ending": "Good",
"hot_water_energy_eff_ending": "Good"
}
description_simulation = {
"mainheat-energy-eff": simulation_config["mainheat_energy_eff_ending"],
"hot-water-energy-eff": simulation_config["hot_water_energy_eff_ending"],
}
if system_change:
# Installation of a boiler improves the hot water system so we need to reflect this in
# the outcome of the recommendation
heating_ending_config = MainHeatAttributes("Boiler and radiators, mains gas").process()
hotwater_ending_config = HotWaterAttributes("From main system").process()
fuel_ending_config = MainFuelAttributes("mains gas (not community)").process()
new_heating_description = "Boiler and radiators, mains gas"
new_hotwater_description = "From main system"
new_fuel_description = "mains gas (not community)"
heating_ending_config = MainHeatAttributes(new_heating_description).process()
hotwater_ending_config = HotWaterAttributes(new_hotwater_description).process()
fuel_ending_config = MainFuelAttributes(new_fuel_description).process()
heating_simulation_config = check_simulation_difference(
new_config=heating_ending_config, old_config=self.property.main_heating
@ -627,6 +665,13 @@ class HeatingRecommender:
**fuel_simulation_config,
}
description_simulation = {
**description_simulation,
"mainheat-description": new_heating_description,
"hotwater-description": new_hotwater_description,
"main-fuel": new_fuel_description
}
boiler_costs = self.costs.boiler(
size=f"{boiler_size}kw",
exising_room_heaters=exising_room_heaters,
@ -652,6 +697,7 @@ class HeatingRecommender:
"sap_points": None,
"already_installed": already_installed,
"simulation_config": simulation_config,
"description_simulation": description_simulation,
**boiler_costs
}
@ -675,6 +721,7 @@ class HeatingRecommender:
combined_recommendation = self.combine_heating_and_controls(
controls_recommendations=[controls_recommendation],
heating_simulation_config=simulation_config,
heating_description_simulation=description_simulation,
costs=boiler_costs,
description=boiler_recommendation["description"],
phase=recommendation_phase,

View file

@ -782,6 +782,11 @@ class Recommendations:
}
}
# Prevent from being negative
predicted_sap_points = 0 if predicted_sap_points < 0 else predicted_sap_points
predicted_co2_savings = 0 if predicted_co2_savings < 0 else predicted_co2_savings
predicted_heat_demand = 0 if predicted_heat_demand < 0 else predicted_heat_demand
if rec["type"] == "low_energy_lighting":
# For the moment, we cap the number of SAP points that can be achieved by ventilation at 2
rec["sap_points"] = min(predicted_sap_points, LightingRecommendations.SAP_LIMIT)

View file

@ -524,6 +524,10 @@ class WallRecommendations(Definitions):
"already_installed": already_installed,
"sap_points": None,
"simulation_config": simulation_config,
"description_simulation": {
"walls-description": new_description,
"walls-energy-eff": simulation_config["walls_energy_eff_ending"]
},
**cost_result
}
)