mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Merge pull request #327 from Hestia-Homes/main
Pausing solar api, working on survey extraction
This commit is contained in:
commit
6e714127c6
19 changed files with 1072 additions and 142 deletions
|
|
@ -350,8 +350,21 @@ class Property:
|
|||
r for r in property_representative_recommendations
|
||||
if r["phase"] <= phase
|
||||
]
|
||||
epc_transformations = [x["description_simulation"] for x in represenative_recs_to_this_phase]
|
||||
|
||||
|
||||
# TODO: This is placeholder, but it's to handle the case of having both internal and external wall
|
||||
# insulation as options. This will cause the process below to fall over, so we take just
|
||||
# external wall insulation in epc_transformations, if we have both
|
||||
types = [
|
||||
x["type"] for x in represenative_recs_to_this_phase
|
||||
]
|
||||
if "external_wall_insulation" in types and "internal_wall_insulation" in types:
|
||||
epc_transformations = [
|
||||
x["description_simulation"] for x in represenative_recs_to_this_phase if
|
||||
x["type"] != "internal_wall_insulation"
|
||||
]
|
||||
else:
|
||||
epc_transformations = [x["description_simulation"] for x in represenative_recs_to_this_phase]
|
||||
|
||||
# It is possible that we could have two simulations applied to the same descriptions
|
||||
# We extract these out
|
||||
phase_epc_transformation = {}
|
||||
|
|
|
|||
|
|
@ -488,13 +488,6 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
"carbon_ending"]
|
||||
)
|
||||
|
||||
from utils.s3 import save_dataframe_to_s3_parquet
|
||||
save_dataframe_to_s3_parquet(
|
||||
bucket_name="retrofit-datalake-dev",
|
||||
file_key="recommendations_scoring_data_11th_july.parquet",
|
||||
df=recommendations_scoring_data
|
||||
)
|
||||
|
||||
model_api = ModelApi(portfolio_id=body.portfolio_id, timestamp=created_at)
|
||||
|
||||
all_predictions = model_api.predictions_template()
|
||||
|
|
@ -510,8 +503,6 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
for key, scored in predictions_dict.items():
|
||||
all_predictions[key] = pd.concat([all_predictions[key], scored])
|
||||
|
||||
prediction_df = all_predictions["heating_cost_predictions"]
|
||||
|
||||
# Insert the predictions into the recommendations and run the optimiser
|
||||
# TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a
|
||||
# possibility with heating system
|
||||
|
|
|
|||
|
|
@ -133,7 +133,7 @@ def app():
|
|||
energy_consumption_data = []
|
||||
for i, directory in tqdm(enumerate(epc_directories), total=len(epc_directories)):
|
||||
# Skip the first 50
|
||||
if i < 127:
|
||||
if i < 250:
|
||||
continue
|
||||
|
||||
data = pd.read_csv(directory / "certificates.csv", low_memory=False)
|
||||
|
|
|
|||
|
|
@ -1 +1 @@
|
|||
[{"EPC": "D", "count": 1718}, {"EPC": "C", "count": 1343}, {"EPC": "E", "count": 538}, {"EPC": "F", "count": 80}, {"EPC": "B", "count": 52}, {"EPC": "G", "count": 3}, {"EPC": "A", "count": 2}]
|
||||
[{"EPC": "D", "count": 332}, {"EPC": "C", "count": 68}, {"EPC": "E", "count": 44}, {"EPC": "F", "count": 6}]
|
||||
File diff suppressed because one or more lines are too long
|
|
@ -1 +1 @@
|
|||
[{"is_real_epc": true, "count": 3736}, {"is_real_epc": false, "count": 1509}]
|
||||
[{"index": true, "is_real_epc": 3736}, {"index": false, "is_real_epc": 1509}]
|
||||
|
|
@ -31,7 +31,8 @@ def make_epc_rating_piechart(epc_rating_breakdown):
|
|||
labels = [x["EPC"] for x in epc_rating_breakdown]
|
||||
values = [x["count"] for x in epc_rating_breakdown]
|
||||
|
||||
marker_colors = ["#117d58", "#2da55c", "#8dbd40", "#f7cd14", "#f3a96a", "#ef8026", "#e41e3b"]
|
||||
# marker_colors = ["#117d58", "#2da55c", "#8dbd40", "#f7cd14", "#f3a96a", "#ef8026", "#e41e3b"]
|
||||
marker_colors = ["#8dbd40", "#f7cd14", "#f3a96a", "#ef8026", "#e41e3b"]
|
||||
|
||||
fig = go.Figure(
|
||||
data=[go.Pie(labels=labels, values=values, marker_colors=marker_colors, sort=False)],
|
||||
|
|
@ -53,7 +54,10 @@ def make_map(locations):
|
|||
# Create custom hover text
|
||||
df['hover_text'] = df.apply(
|
||||
lambda row: f"UPRN: {int(row['uprn'])}<br>Address: {row['standardised_address']}<br>Postcode: "
|
||||
f"{row['standardised_postcode']}<br>Latitude: {row['LATITUDE']}<br>Longitude: {row['LONGITUDE']}",
|
||||
f"{row['standardised_postcode']}<br>Latitude: {row['LATITUDE']}<br>Longitude: "
|
||||
f"{row['LONGITUDE']}<br>Walls: {row['Walls']}<br>Roofs: {row['Roofs']}<br>Main Fuel: "
|
||||
f"{row['Main Fuel']}<br>Heating: {row['Heating']}<br>Age: {row['Age']}<br>Property Type: "
|
||||
f"{row['Property Type']}",
|
||||
axis=1)
|
||||
|
||||
data = [
|
||||
|
|
@ -93,8 +97,8 @@ def layout():
|
|||
locations = json.load(file)
|
||||
|
||||
# Get the EPC breakdown data
|
||||
with open("Stonewater real EPC breakdown.json") as file:
|
||||
real_epc_breakdown = json.load(file)
|
||||
# with open("Stonewater real EPC breakdown.json") as file:
|
||||
# real_epc_breakdown = json.load(file)
|
||||
|
||||
# Get the EPC ratings data
|
||||
with open("Stonewater EPC rating breakdown.json") as file:
|
||||
|
|
@ -149,7 +153,8 @@ def layout():
|
|||
style={"font-size": "2.5rem", "font-weight": "bold", "margin-bottom": "20px"}
|
||||
),
|
||||
html.P(
|
||||
"This map shows the location of the properties that are to be surveyed by Osmosis.",
|
||||
"This map shows the location of the properties that are to be surveyed by Osmosis. "
|
||||
"These properties span across 30 counties and 155 postal regions",
|
||||
style={"font-size": "1.25rem", "margin-bottom": "40px"}
|
||||
),
|
||||
],
|
||||
|
|
@ -170,22 +175,22 @@ def layout():
|
|||
),
|
||||
dbc.Row(
|
||||
[
|
||||
dbc.Col(
|
||||
[
|
||||
html.Div(
|
||||
"Breakdown of real EPCs",
|
||||
style={"fontSize": "1.5rem", "fontWeight": "bold", "marginBottom": "1em"},
|
||||
className='text-center'
|
||||
),
|
||||
html.Div(
|
||||
"This pie chart shows the proportion of real EPCs in the asset list. Currently, "
|
||||
"there are EPCs for 3736 of the 5245 properties that have a UPRN in the asset list",
|
||||
style={"marginBottom": "1em"}
|
||||
),
|
||||
make_real_epc_piechart(real_epc_breakdown),
|
||||
],
|
||||
width={"size": 5},
|
||||
),
|
||||
# dbc.Col(
|
||||
# [
|
||||
# html.Div(
|
||||
# "Breakdown of real EPCs",
|
||||
# style={"fontSize": "1.5rem", "fontWeight": "bold", "marginBottom": "1em"},
|
||||
# className='text-center'
|
||||
# ),
|
||||
# html.Div(
|
||||
# "This pie chart shows the proportion of real EPCs in the asset list. Currently, "
|
||||
# "there are EPCs for 3736 of the 5245 properties that have a UPRN in the asset list",
|
||||
# style={"marginBottom": "1em"}
|
||||
# ),
|
||||
# make_real_epc_piechart(real_epc_breakdown),
|
||||
# ],
|
||||
# width={"size": 5},
|
||||
# ),
|
||||
dbc.Col(
|
||||
[
|
||||
html.Div(
|
||||
|
|
@ -195,22 +200,9 @@ def layout():
|
|||
),
|
||||
html.Div(
|
||||
[
|
||||
"This pie chart shows the breakdown of EPC ratings, for properties that currently "
|
||||
"have an EPC. "
|
||||
"The ratings range from A to G, where surprisingly, there are two EPC properties "
|
||||
"that were initially "
|
||||
"expected by Parity's modelled SAP, to be EPC D or below. These properties can be"
|
||||
" seen ",
|
||||
html.A("here",
|
||||
href="https://find-energy-certificate.service.gov.uk/energy-certificate"
|
||||
"/2708-5001-7327-6090-7284",
|
||||
target="_blank"),
|
||||
" and ",
|
||||
html.A("here",
|
||||
href="https://find-energy-certificate.service.gov.uk/energy-certificate"
|
||||
"/1037-4032-1009-0361-7292",
|
||||
target="_blank"),
|
||||
"."
|
||||
"This pie chart shows the breakdown of expected and real EPC ratings, "
|
||||
"for properties "
|
||||
"that have been selected for sample",
|
||||
],
|
||||
style={"marginBottom": "1em"}
|
||||
),
|
||||
|
|
|
|||
|
|
@ -11,8 +11,9 @@ In this script, we do the following things:
|
|||
import pandas as pd
|
||||
import json
|
||||
from utils.s3 import read_pickle_from_s3
|
||||
from backend.app.utils import sap_to_epc
|
||||
|
||||
stonewater_asset_list = pd.read_csv("Stonewater asset list with archetypes V2.csv")
|
||||
stonewater_asset_list = pd.read_csv("Stonewater asset list with archetypes V3.csv")
|
||||
archetyped_asset_list = stonewater_asset_list[
|
||||
[
|
||||
"internal_id", "customer_asset_id", "external_address_id", "udprn", "uprn", "cluster",
|
||||
|
|
@ -25,28 +26,15 @@ archetyped_asset_list["rank"] = archetyped_asset_list["rank"].astype(int)
|
|||
archetyped_asset_list = archetyped_asset_list.sort_values(by=["cluster", "rank"])
|
||||
|
||||
# Read in and merge on clustering features
|
||||
clustering_features = read_pickle_from_s3(
|
||||
bucket_name="retrofit-data-dev",
|
||||
s3_file_name="customers/Stonewater/clustering/clustering_dataframe.pkl"
|
||||
clustering_features = pd.read_csv(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Stonewater/Stonewater archetyping features V3.csv",
|
||||
)
|
||||
|
||||
# Move property-type and built-form to the first two columns
|
||||
columns_to_move = ['property-type', 'built-form']
|
||||
|
||||
# Get the remaining columns
|
||||
remaining_columns = [col for col in clustering_features.columns if col not in columns_to_move]
|
||||
|
||||
# Create the new column order
|
||||
new_column_order = columns_to_move + remaining_columns
|
||||
|
||||
# Reorder the DataFrame
|
||||
clustering_features = clustering_features[new_column_order]
|
||||
|
||||
archetyped_asset_list = archetyped_asset_list.merge(
|
||||
clustering_features,
|
||||
on="internal_id",
|
||||
how="inner"
|
||||
)
|
||||
clustering_features.drop(columns=['uprn', 'Address ID', "rank", "cluster", "archetype_representative"]),
|
||||
left_on="internal_id",
|
||||
right_on="Osm. ID"
|
||||
).drop(columns=["Osm. ID"])
|
||||
|
||||
archetyped_asset_list = archetyped_asset_list.rename(
|
||||
columns={
|
||||
|
|
@ -82,12 +70,47 @@ archetyped_asset_list["uprn"] = archetyped_asset_list["uprn"].astype('Int64')
|
|||
# archetyped_asset_list.to_excel("Stonewater Archetyping Features.xlsx", index=False)
|
||||
|
||||
# We store the location data, which will be used for the mapping. We just need the longitude and latitude
|
||||
stonewater_asset_list["uprn"] = stonewater_asset_list["uprn"].astype('Int64')
|
||||
|
||||
mapping_data = stonewater_asset_list[
|
||||
stonewater_asset_list["archetype_representative"]
|
||||
][["internal_id", "uprn", "standardised_address", "standardised_postcode"]]
|
||||
][["internal_id", "uprn", "standardised_address", "standardised_postcode"]].merge(
|
||||
archetyped_asset_list[["uprn", "Walls", "Roofs", "Main Fuel", "Heating", "Age", "Property Type"]],
|
||||
how="left",
|
||||
on="uprn"
|
||||
)
|
||||
|
||||
# We need to merge on longitude and latitude
|
||||
spatial_data_to_uprn = read_pickle_from_s3(
|
||||
s3_file_name="scustomers/Stonewater/clustering/spatial_data_to_uprn.pkl",
|
||||
bucket_name="retrofit-data-dev"
|
||||
)
|
||||
|
||||
|
||||
# Function to convert specific columns to bool dtype
|
||||
def convert_specific_columns_to_bool(df, columns):
|
||||
for column in columns:
|
||||
if column in df.columns:
|
||||
df[column] = df[column].astype(bool)
|
||||
return df
|
||||
|
||||
|
||||
spatial_data_to_uprn = [convert_specific_columns_to_bool(
|
||||
df, ['conservation_status', 'is_listed_building', 'is_heritage_building']
|
||||
) for df in spatial_data_to_uprn]
|
||||
|
||||
spatial_data_to_uprn = pd.concat(spatial_data_to_uprn)
|
||||
spatial_data_to_uprn = spatial_data_to_uprn.drop(
|
||||
columns=["partition", "filename"]
|
||||
).rename(columns={"UPRN": "uprn"})
|
||||
spatial_data_to_uprn["uprn"] = spatial_data_to_uprn["uprn"].astype(str).astype("Int64")
|
||||
|
||||
mapping_data = mapping_data.merge(
|
||||
clustering_features[["internal_id", "LONGITUDE", "LATITUDE"]],
|
||||
spatial_data_to_uprn[
|
||||
["uprn", "LONGITUDE", "LATITUDE", "conservation_status", "is_listed_building", "is_heritage_building"]
|
||||
],
|
||||
how="left",
|
||||
on="uprn"
|
||||
)
|
||||
mapping_data = mapping_data.drop(columns=["internal_id"])
|
||||
|
||||
|
|
@ -95,38 +118,28 @@ with open("etl/customers/stonewater/map_app/Stonewater Mapping Data.json", "w")
|
|||
f.write(json.dumps(mapping_data.to_dict(orient="records")))
|
||||
|
||||
# We also include some data for visualising the breakdown of EPCS
|
||||
proportion_of_real_epcs = clustering_features["estimated"].value_counts().to_frame().reset_index()
|
||||
# Invert the true and false
|
||||
proportion_of_real_epcs["estimated"] = ~proportion_of_real_epcs["estimated"]
|
||||
proportion_of_real_epcs = proportion_of_real_epcs.rename(
|
||||
columns={"estimated": "is_real_epc"}
|
||||
)
|
||||
# proportion_of_real_epcs = (~clustering_features["estimated"]).value_counts().to_frame().reset_index()
|
||||
# proportion_of_real_epcs = proportion_of_real_epcs.rename(
|
||||
# columns={"estimated": "is_real_epc"}
|
||||
# )
|
||||
#
|
||||
# with open("etl/customers/stonewater/map_app/Stonewater real EPC breakdown.json", "w") as f:
|
||||
# f.write(json.dumps(proportion_of_real_epcs.to_dict(orient="records")))
|
||||
|
||||
with open("etl/customers/stonewater/map_app/Stonewater real EPC breakdown.json", "w") as f:
|
||||
f.write(json.dumps(proportion_of_real_epcs.to_dict(orient="records")))
|
||||
# Produce the breakdown of EPC ratings for properties to be surveyed
|
||||
clustering_features["representative_epc"] = clustering_features["representative_sap"].apply(sap_to_epc)
|
||||
|
||||
# Produce the breakdown of EPC ratings
|
||||
epc_rating_breakdown = (
|
||||
clustering_features[~clustering_features["estimated"]]["current-energy-rating"]
|
||||
clustering_features[clustering_features["archetype_representative"]]["representative_epc"]
|
||||
.value_counts()
|
||||
.to_frame()
|
||||
.reset_index()
|
||||
)
|
||||
|
||||
epc_rating_breakdown = epc_rating_breakdown.rename(
|
||||
columns={"current-energy-rating": "EPC"}
|
||||
columns={"index": "EPC", "representative_epc": "count"}
|
||||
)
|
||||
|
||||
with open("etl/customers/stonewater/map_app/Stonewater EPC rating breakdown.json", "w") as f:
|
||||
f.write(json.dumps(epc_rating_breakdown.to_dict(orient="records")))
|
||||
|
||||
epc_a_properties = clustering_features[
|
||||
(clustering_features["current-energy-rating"] == "A")
|
||||
& (~clustering_features["estimated"])
|
||||
]
|
||||
|
||||
epc_a_properties = epc_a_properties.merge(
|
||||
stonewater_asset_list,
|
||||
on="internal_id",
|
||||
how="inner"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -13,12 +13,13 @@ import numpy as np
|
|||
import pandas as pd
|
||||
import time
|
||||
from utils.s3 import save_data_to_s3, read_excel_from_s3, read_from_s3, read_dataframe_from_s3_parquet, \
|
||||
save_dataframe_to_s3_parquet, save_pickle_to_s3
|
||||
save_dataframe_to_s3_parquet, save_pickle_to_s3, read_pickle_from_s3
|
||||
from sklearn.cluster import KMeans
|
||||
from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
||||
from sklearn.compose import ColumnTransformer
|
||||
from sklearn.pipeline import Pipeline
|
||||
from scipy.spatial.distance import cdist
|
||||
from sklearn.metrics import pairwise_distances_argmin_min
|
||||
|
||||
load_dotenv(dotenv_path="backend/.env")
|
||||
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
||||
|
|
@ -1083,11 +1084,11 @@ def compile_data():
|
|||
spatial_data_to_uprn = pd.concat(spatial_data_to_uprn)
|
||||
|
||||
# TODO: Let's store this in s3
|
||||
save_data_to_s3(
|
||||
data=json.dumps(spatial_data_to_uprn.to_dict("records")),
|
||||
s3_file_name="scustomers/Stonewater/clustering/spatial_data_to_uprn.json",
|
||||
bucket_name="retrofit-data-dev"
|
||||
)
|
||||
# save_data_to_s3(
|
||||
# data=json.dumps(spatial_data_to_uprn.to_dict("records")),
|
||||
# s3_file_name="scustomers/Stonewater/clustering/spatial_data_to_uprn.json",
|
||||
# bucket_name="retrofit-data-dev"
|
||||
# )
|
||||
|
||||
# We merge this spatial data onto final EPCS
|
||||
|
||||
|
|
@ -1429,17 +1430,17 @@ def compile_data_final():
|
|||
older_epcs_batch_2[property["internal_id"]] = searcher.older_epcs
|
||||
# Store in S3
|
||||
# TODO - read in instead of running
|
||||
save_pickle_to_s3(
|
||||
data=epc_data_batch_2,
|
||||
s3_file_name="customers/Stonewater/clustering/epc_data_batch_2.pkl",
|
||||
bucket_name="retrofit-data-dev"
|
||||
)
|
||||
|
||||
save_pickle_to_s3(
|
||||
data=older_epcs_batch_2,
|
||||
s3_file_name="customers/Stonewater/clustering/older_epcs_batch_2.pkl",
|
||||
bucket_name="retrofit-data-dev"
|
||||
)
|
||||
# save_pickle_to_s3(
|
||||
# data=epc_data_batch_2,
|
||||
# s3_file_name="customers/Stonewater/clustering/epc_data_batch_2.pkl",
|
||||
# bucket_name="retrofit-data-dev"
|
||||
# )
|
||||
#
|
||||
# save_pickle_to_s3(
|
||||
# data=older_epcs_batch_2,
|
||||
# s3_file_name="customers/Stonewater/clustering/older_epcs_batch_2.pkl",
|
||||
# bucket_name="retrofit-data-dev"
|
||||
# )
|
||||
|
||||
epc_data_batch_2 = pd.DataFrame(epc_data_batch_2)
|
||||
complete_epcs = pd.concat([epc_data, epc_data_batch_2])
|
||||
|
|
@ -1799,6 +1800,10 @@ def compile_data_final():
|
|||
'is_cavity_wall', 'is_solid_brick', 'property-type', 'is_pitched', 'is_flat', 'has_dwelling_above'
|
||||
]
|
||||
|
||||
additional_features = [
|
||||
|
||||
]
|
||||
|
||||
# Define the preprocessing for numerical and categorical features
|
||||
numerical_features = property_attributes.select_dtypes(include=['int64', 'float64']).columns.tolist()
|
||||
categorical_features = property_attributes.select_dtypes(include=['object', 'category']).columns.tolist()
|
||||
|
|
@ -1957,3 +1962,710 @@ def pull_ideal_postcodes(missing_uprn_with_udprn):
|
|||
result["result"]
|
||||
)
|
||||
completed_id += 1
|
||||
|
||||
|
||||
def updated_version():
|
||||
"""
|
||||
This version of the clustering factors in the updates recieved from Stonewater to simplify the archetyping process
|
||||
using fewer variables and also factoring in their internal data sources
|
||||
|
||||
This work began on the 23rd July 2024
|
||||
:return:
|
||||
"""
|
||||
|
||||
########################################################################
|
||||
# Read in data
|
||||
########################################################################
|
||||
asset_list = read_asset_list()
|
||||
asset_list, uprn_lookup_2 = merge_uprn_to_asset_list(asset_list)
|
||||
|
||||
# Read in the properties that have been included in Osmosis' wave 2.1
|
||||
osmosis_wave_2_1_asset_ids, osmosis_wave_2_1 = read_omosis_wave_2_1()
|
||||
|
||||
asset_list["In Osmosis Wave 2.1"] = asset_list["customer_asset_id"].isin(osmosis_wave_2_1_asset_ids)
|
||||
|
||||
# We also check the address & postcode
|
||||
asset_list["In Osmosis Wave 2.1"] = np.where(
|
||||
asset_list["address1"].isin(osmosis_wave_2_1["Name"]),
|
||||
True,
|
||||
asset_list["In Osmosis Wave 2.1"]
|
||||
)
|
||||
|
||||
priority_postcodes, previous_waves_address_id, master_sheet = read_stonewater_asset_data()
|
||||
|
||||
# Pull in the EPC data
|
||||
epc_data = read_epc_data(uprn_lookup_2)
|
||||
|
||||
# Pull in the spatial data to UPRN
|
||||
spatial_data_to_uprn = read_pickle_from_s3(
|
||||
s3_file_name="scustomers/Stonewater/clustering/spatial_data_to_uprn.pkl",
|
||||
bucket_name="retrofit-data-dev"
|
||||
)
|
||||
|
||||
# Function to convert specific columns to bool dtype
|
||||
def convert_specific_columns_to_bool(df, columns):
|
||||
for column in columns:
|
||||
if column in df.columns:
|
||||
df[column] = df[column].astype(bool)
|
||||
return df
|
||||
|
||||
spatial_data_to_uprn = [convert_specific_columns_to_bool(
|
||||
df, ['conservation_status', 'is_listed_building', 'is_heritage_building']
|
||||
) for df in spatial_data_to_uprn]
|
||||
|
||||
spatial_data_to_uprn = pd.concat(spatial_data_to_uprn)
|
||||
spatial_data_to_uprn = spatial_data_to_uprn.drop(
|
||||
columns=["partition", "filename"]
|
||||
).rename(columns={"UPRN": "uprn"})
|
||||
spatial_data_to_uprn["uprn"] = spatial_data_to_uprn["uprn"].astype(str)
|
||||
|
||||
########################################################################
|
||||
# Prepare the data
|
||||
########################################################################
|
||||
|
||||
# Filter the asset list down to the priority postcodes
|
||||
asset_list["is_priority_postcode"] = asset_list["postcode"].isin(priority_postcodes)
|
||||
|
||||
master_sheet = master_sheet[
|
||||
master_sheet["Address ID"].isin(
|
||||
asset_list["external_address_id"].values
|
||||
)
|
||||
]
|
||||
|
||||
master_sheet["days_since_lodgement"] = (
|
||||
datetime.now() - pd.to_datetime(master_sheet["Lodgement Date"], errors="coerce", dayfirst=True)
|
||||
).dt.days
|
||||
|
||||
asset_list = asset_list.drop(columns=["Lodgement Date"]).merge(
|
||||
master_sheet[["Address ID", "days_since_lodgement", "Lodgement Date", "EPC Rating"]],
|
||||
how="left",
|
||||
left_on="external_address_id",
|
||||
right_on="Address ID"
|
||||
)
|
||||
|
||||
asset_list = asset_list.merge(
|
||||
epc_data[["internal_id", "current-energy-efficiency", "lodgement-date", "estimated"]],
|
||||
how="left",
|
||||
on="internal_id"
|
||||
)
|
||||
asset_list["days_since_lodgement_epc"] = (
|
||||
datetime.now() - pd.to_datetime(asset_list["lodgement-date"], errors="coerce", dayfirst=True)
|
||||
).dt.days
|
||||
|
||||
# Flag properties that were surveyed within the last 5 years
|
||||
asset_list["epc_within_5_years"] = asset_list["days_since_lodgement_epc"] < 5 * 365
|
||||
|
||||
# Identify properties where they've had an EPC done within the last 5 years, where the SAP rating is already
|
||||
# a EPC C. Alternatively, any property with an EPC rating of 80 or above is also considered, regardless of when
|
||||
# the EPC is done
|
||||
asset_list["is_epc_c_or_above"] = (
|
||||
((asset_list["EPC Rating"] >= 69) & asset_list["epc_within_5_years"]) |
|
||||
(asset_list["EPC Rating"] >= 80)
|
||||
)
|
||||
|
||||
clustering_features = asset_list[
|
||||
asset_list["is_priority_postcode"] & ~asset_list["In Osmosis Wave 2.1"] & ~asset_list["is_epc_c_or_above"] &
|
||||
~pd.isnull(asset_list["uprn"])
|
||||
][
|
||||
[
|
||||
"internal_id", "uprn", "udprn", "customer_asset_id", "postcode", "house_number", "address1", "address2",
|
||||
"city_town", "county", "external_address_id", "owner", "days_since_lodgement", "Lodgement Date",
|
||||
"epc_within_5_years", "EPC Rating", "estimated", "current-energy-efficiency", "lodgement-date",
|
||||
]
|
||||
]
|
||||
|
||||
# Merge on the SAP data
|
||||
clustering_features = clustering_features.merge(
|
||||
master_sheet[
|
||||
["Address ID", "SAP"]
|
||||
].rename(columns={"SAP": "parity_modelled_sap"}),
|
||||
how="left",
|
||||
left_on="external_address_id",
|
||||
right_on="Address ID"
|
||||
)
|
||||
|
||||
# For SAP, we use the most recent EPC if epc_within_5_years is True, otherwise we use the parity modelled sap
|
||||
clustering_features["current-energy-efficiency"] = clustering_features["current-energy-efficiency"].astype(float)
|
||||
clustering_features["representative_sap"] = np.where(
|
||||
clustering_features["epc_within_5_years"],
|
||||
clustering_features["current-energy-efficiency"],
|
||||
clustering_features["parity_modelled_sap"]
|
||||
)
|
||||
|
||||
# We remove the final three entries from postcode to give us postal region. Removing two gives us 415 values which
|
||||
# is too many
|
||||
clustering_features["postal_region"] = clustering_features["postcode"].str[:-3]
|
||||
|
||||
# Merge on spatial features
|
||||
clustering_features = clustering_features.merge(
|
||||
spatial_data_to_uprn[["uprn", "conservation_status", "is_listed_building", "is_heritage_building"]],
|
||||
how="left",
|
||||
on="uprn"
|
||||
)
|
||||
|
||||
# incorect_epcs = clustering_features[
|
||||
# clustering_features["EPC Rating"] != clustering_features["current-energy-efficiency"]]
|
||||
# incorect_epcs = incorect_epcs[
|
||||
# ~pd.isnull(incorect_epcs["current-energy-efficiency"]) & pd.isnull(incorect_epcs["estimated"])
|
||||
# ]
|
||||
# incorect_epcs = incorect_epcs.rename(columns={"current-energy-efficiency": "Current SAP Rating"})
|
||||
# # Store data
|
||||
# incorect_epcs.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Stonewater/Incorrect EPCs.csv", index=False)
|
||||
|
||||
# We add in the key features, which are used for clustering
|
||||
master_sheet_clustering_features = master_sheet[
|
||||
["Address ID", "Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age", "Total Floor Area"]
|
||||
].copy()
|
||||
|
||||
# Step 1: Remap walls - we end up with 11 types
|
||||
master_sheet_clustering_features["walls_reduced"] = master_sheet_clustering_features["Walls"].replace(
|
||||
{
|
||||
"TimberFrame: AsBuilt": "Other wall type, as built",
|
||||
"SystemBuilt: AsBuilt": "Other wall type, as built",
|
||||
"Sandstone: AsBuilt": "Other wall type, as built",
|
||||
"Sandstone: Internal": "Other wall type, internal or external",
|
||||
"SystemBuilt: External": "Other wall type, internal or external",
|
||||
"GraniteOrWhinstone: AsBuilt": "Other wall type, as built",
|
||||
"TimberFrame: Internal": "Other wall type, internal or external",
|
||||
"Cavity: FilledCavityPlusInternal": "Cavity: FilledCavity",
|
||||
"SystemBuilt: Internal": "Other wall type, internal or external",
|
||||
"Cavity: Internal": "Other wall type, internal or external",
|
||||
}
|
||||
)
|
||||
|
||||
# Step 2: Remap roofs - we split on the : where the first part of the string gives us the roof type, the second
|
||||
# gives us the insulation thickness
|
||||
|
||||
# Clean an incorrect value
|
||||
master_sheet_clustering_features["Roofs"] = master_sheet_clustering_features["Roofs"].replace(
|
||||
{
|
||||
"PitchedWithSlopingCeiling: mm250": "PitchedWithSlopingCeiling: 250mm",
|
||||
"PitchedWithSlopingCeiling: 150mm+": "PitchedWithSlopingCeiling: 150mm",
|
||||
'PitchedWithSlopingCeiling: mm25': "PitchedWithSlopingCeiling: 25mm",
|
||||
'PitchedWithSlopingCeiling: mm200': "PitchedWithSlopingCeiling: 200mm",
|
||||
'AnotherDwellingAbove: 50mm': 'PitchedNormalLoftAccess: 50mm',
|
||||
}
|
||||
)
|
||||
|
||||
master_sheet_clustering_features[['roof_type', 'roof_insulation_thickness']] = (
|
||||
master_sheet_clustering_features['Roofs'].apply(
|
||||
lambda x: pd.Series(x.split(':', 1) if ':' in x else [x, ''])
|
||||
)
|
||||
)
|
||||
|
||||
# Strip any extra whitespace
|
||||
master_sheet_clustering_features['roof_type'] = master_sheet_clustering_features['roof_type'].str.strip()
|
||||
master_sheet_clustering_features['roof_insulation_thickness'] = (
|
||||
master_sheet_clustering_features['roof_insulation_thickness'].str.strip()
|
||||
)
|
||||
|
||||
def map_thickness(thickness):
|
||||
try:
|
||||
value = float(thickness.replace('mm', '').replace('+', '').replace(' ', ''))
|
||||
return "Above 250mm" if value > 250 else "Below 250mm"
|
||||
except ValueError:
|
||||
return thickness # Return the original value if it cannot be converted to a float
|
||||
|
||||
master_sheet_clustering_features['roof_insulation_category'] = (
|
||||
master_sheet_clustering_features['roof_insulation_thickness'].apply(map_thickness)
|
||||
)
|
||||
|
||||
# Ideas
|
||||
# 1) We might need to remap the roof type to pitched, flat or another dwelling above and then have the access
|
||||
# as a secondary category
|
||||
# 2) Split out the (community) tag in the fuel as a secondary feature, which isn't strictly split
|
||||
# (could split on :, take first part)
|
||||
|
||||
clustering_features = clustering_features.merge(
|
||||
master_sheet_clustering_features,
|
||||
how="left",
|
||||
on="Address ID"
|
||||
)
|
||||
|
||||
# Reduce down to the final set of features we need
|
||||
clustering_features = clustering_features[
|
||||
[
|
||||
"internal_id",
|
||||
"Property Type",
|
||||
# Location
|
||||
"postal_region",
|
||||
'conservation_status',
|
||||
'is_listed_building',
|
||||
'is_heritage_building',
|
||||
"county",
|
||||
# Walls
|
||||
"walls_reduced",
|
||||
# Roof
|
||||
"roof_type",
|
||||
"roof_insulation_category",
|
||||
# Heating
|
||||
"Heating",
|
||||
# Fuel
|
||||
"Main Fuel",
|
||||
"Age",
|
||||
"Total Floor Area",
|
||||
"representative_sap",
|
||||
"days_since_lodgement",
|
||||
]
|
||||
]
|
||||
|
||||
clustering_features["days_since_lodgement"] = clustering_features["days_since_lodgement"].fillna(99999)
|
||||
|
||||
def split_property_type(row):
|
||||
parts = row.split(':')
|
||||
property_type = parts[0].strip()
|
||||
built_form = parts[1].strip() if len(parts) > 1 else ''
|
||||
property_extended_feature = parts[2].strip() if len(parts) > 2 else ''
|
||||
return pd.Series([property_type, built_form, property_extended_feature])
|
||||
|
||||
clustering_features[['property_type', 'built_form', 'property_extended_feature']] = (
|
||||
clustering_features['Property Type'].apply(split_property_type)
|
||||
)
|
||||
clustering_features = clustering_features.drop(columns=["Property Type"])
|
||||
|
||||
# These are the variables we MUST split by
|
||||
grouping_columns = [
|
||||
"property_type",
|
||||
"walls_reduced",
|
||||
"roof_type",
|
||||
"Main Fuel",
|
||||
"county",
|
||||
]
|
||||
|
||||
def combine_small_groups(clustering_features, grouping_columns, threshold=2):
|
||||
# Identify small groups
|
||||
group_sizes = clustering_features.groupby(grouping_columns).size()
|
||||
small_groups = group_sizes[group_sizes <= threshold].index.tolist()
|
||||
|
||||
# Remove small groups from the original clustering_features
|
||||
small_group_data = clustering_features[clustering_features.set_index(grouping_columns).index.isin(small_groups)]
|
||||
clustering_features_ok = clustering_features[
|
||||
~clustering_features.set_index(grouping_columns).index.isin(small_groups)
|
||||
]
|
||||
|
||||
if small_group_data.empty:
|
||||
return clustering_features
|
||||
|
||||
# One-Hot Encode categorical variables
|
||||
categorical_features = (
|
||||
clustering_features_ok.drop(columns=["internal_id"])
|
||||
.select_dtypes(include=['object', 'category']).columns.tolist()
|
||||
)
|
||||
ohe = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
|
||||
ohe.fit(clustering_features_ok[categorical_features])
|
||||
|
||||
# Combine small groups with the nearest available group
|
||||
small_group_ohe = ohe.transform(small_group_data[categorical_features])
|
||||
large_group_ohe = ohe.transform(clustering_features_ok[categorical_features])
|
||||
|
||||
numerical_features = clustering_features_ok.select_dtypes(include=['int64', 'float64']).columns.tolist()
|
||||
small_group_numerical = small_group_data[numerical_features].values
|
||||
large_group_numerical = clustering_features_ok[numerical_features].values
|
||||
|
||||
# Concatenate one-hot encoded categorical and numerical features
|
||||
small_group_features = np.hstack([small_group_ohe, small_group_numerical])
|
||||
large_group_features = np.hstack([large_group_ohe, large_group_numerical])
|
||||
|
||||
# Calculate distances and find nearest groups
|
||||
closest_groups, _ = pairwise_distances_argmin_min(small_group_features, large_group_features)
|
||||
closest_group_index = clustering_features_ok.iloc[closest_groups].index
|
||||
|
||||
# Update small groups to the nearest large group
|
||||
for small_group, closest_group in zip(small_groups, closest_group_index):
|
||||
small_group_mask = small_group_data.set_index(grouping_columns).index == small_group
|
||||
small_group_data.loc[small_group_mask, grouping_columns] = clustering_features_ok.loc[
|
||||
closest_group, grouping_columns].values
|
||||
|
||||
combined_data = pd.concat([clustering_features_ok, small_group_data])
|
||||
return combined_data
|
||||
|
||||
clustering_features_combined = combine_small_groups(clustering_features, grouping_columns)
|
||||
|
||||
########################################################################
|
||||
# Clustering
|
||||
########################################################################
|
||||
numerical_features = clustering_features_combined.select_dtypes(include=['int64', 'float64']).columns.tolist()
|
||||
categorical_features = clustering_features_combined.select_dtypes(include=['object', 'category']).columns.tolist()
|
||||
categorical_features = [c for c in categorical_features if c not in ["internal_id", grouping_columns]]
|
||||
|
||||
for col in categorical_features:
|
||||
clustering_features_combined[col] = clustering_features_combined[col].astype(str)
|
||||
|
||||
id_column = 'internal_id'
|
||||
n_clusters = 450
|
||||
random_state = 0
|
||||
|
||||
training_data_grouped = clustering_features_combined.groupby(grouping_columns)
|
||||
group_sizes = {name: len(group) for name, group in training_data_grouped}
|
||||
total_size = sum(group_sizes.values())
|
||||
cluster_allocation = {
|
||||
name: max(1, int(round(n_clusters * (size / total_size)))) for name, size in group_sizes.items()
|
||||
}
|
||||
|
||||
# Adjust cluster allocation to ensure total clusters sum to 450
|
||||
cluster_allocation = adjust_clusters(cluster_allocation, n_clusters)
|
||||
|
||||
final_clusters = []
|
||||
for group_variables, group_data in tqdm(training_data_grouped, total=len(training_data_grouped)):
|
||||
|
||||
group_n_clusters = cluster_allocation[group_variables]
|
||||
group_data.set_index(id_column, inplace=True)
|
||||
|
||||
preprocessor = ColumnTransformer(
|
||||
transformers=[
|
||||
('num', StandardScaler(), numerical_features),
|
||||
('cat', OneHotEncoder(), categorical_features)
|
||||
]
|
||||
)
|
||||
|
||||
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
|
||||
('kmeans', KMeans(n_clusters=group_n_clusters, random_state=random_state))])
|
||||
|
||||
# Fit the pipeline to the data
|
||||
pipeline.fit(group_data)
|
||||
|
||||
# Transform the data using the fitted pipeline
|
||||
processed_data = pipeline.named_steps['preprocessor'].transform(group_data)
|
||||
|
||||
# Get cluster labels
|
||||
group_data['cluster'] = pipeline.named_steps['kmeans'].labels_
|
||||
|
||||
# Get centroids (already in the same transformed space)
|
||||
centroids = pipeline.named_steps['kmeans'].cluster_centers_
|
||||
|
||||
# if the data isn't an array, make it one
|
||||
if not isinstance(processed_data, np.ndarray):
|
||||
processed_data = processed_data.toarray()
|
||||
|
||||
# Calculate distances from each point to the centroid of its cluster
|
||||
distances_to_centroids = [
|
||||
cdist(processed_data[i].reshape(1, -1), centroids[label].reshape(1, -1)).flatten()[0]
|
||||
for i, label in enumerate(group_data['cluster'])
|
||||
]
|
||||
|
||||
group_data['distance_to_centroid'] = distances_to_centroids
|
||||
|
||||
# Ranking rows by distance within each cluster
|
||||
group_data['rank'] = group_data.groupby('cluster')['distance_to_centroid'].rank(method='first')
|
||||
|
||||
# Sorting to verify
|
||||
group_data.sort_values(by=['cluster', 'rank'], inplace=True)
|
||||
group_data.reset_index(inplace=True)
|
||||
|
||||
to_append = group_data[["internal_id", "cluster", "rank"]].copy()
|
||||
to_append["cluster"] = to_append["cluster"].astype(str) + str(group_variables)
|
||||
final_clusters.append(to_append)
|
||||
|
||||
final_clusters = pd.concat(final_clusters)
|
||||
# remap the clusters from the current names to 1 -> n_clusters
|
||||
cluster_mapping = {cluster: i for i, cluster in enumerate(final_clusters["cluster"].unique())}
|
||||
final_clusters["cluster"] = final_clusters["cluster"].map(cluster_mapping)
|
||||
final_clusters["cluster"] = final_clusters["cluster"].astype(str)
|
||||
|
||||
assigned_clusters = clustering_features_combined.merge(
|
||||
final_clusters, how="left", on="internal_id"
|
||||
)
|
||||
|
||||
assigned_clusters["archetype_representative"] = assigned_clusters["rank"] == 1
|
||||
|
||||
asset_list_with_archetypes = asset_list.merge(
|
||||
assigned_clusters[["internal_id", "cluster", "archetype_representative", "rank"]], how="left",
|
||||
on="internal_id"
|
||||
).merge(
|
||||
master_sheet_clustering_features[["Address ID", "Property Type", "Walls", "Roofs", "Heating"]],
|
||||
how="left",
|
||||
on="Address ID"
|
||||
)
|
||||
|
||||
# We populate the reasons for no archetype
|
||||
# 1) If it's not a priority postcode
|
||||
asset_list_with_archetypes["cluster"] = np.where(
|
||||
~asset_list_with_archetypes["is_priority_postcode"],
|
||||
"NOT PRIORITY POSTCODE",
|
||||
asset_list_with_archetypes["cluster"]
|
||||
)
|
||||
|
||||
# 2) If it's EPC C or above
|
||||
asset_list_with_archetypes["cluster"] = np.where(
|
||||
asset_list_with_archetypes["is_epc_c_or_above"],
|
||||
"EPC C OR ABOVE",
|
||||
asset_list_with_archetypes["cluster"]
|
||||
)
|
||||
|
||||
# If it's in Wave 2.1
|
||||
asset_list_with_archetypes["cluster"] = np.where(
|
||||
asset_list_with_archetypes["In Osmosis Wave 2.1"],
|
||||
"IN WAVE 2.1",
|
||||
asset_list_with_archetypes["cluster"]
|
||||
)
|
||||
|
||||
# Has missing uprn
|
||||
asset_list_with_archetypes["cluster"] = np.where(
|
||||
pd.isnull(asset_list_with_archetypes["uprn"]),
|
||||
"MISSING UPRN",
|
||||
asset_list_with_archetypes["cluster"]
|
||||
)
|
||||
|
||||
asset_list_with_archetypes["rank"] = asset_list_with_archetypes["rank"].fillna(-999)
|
||||
asset_list_with_archetypes["rank"] = asset_list_with_archetypes["rank"].astype(int).astype(str)
|
||||
asset_list_with_archetypes["rank"] = asset_list_with_archetypes["rank"].replace("-999", "NO ARCHETYPE")
|
||||
|
||||
asset_list_with_archetypes["archetype_representative"] = (
|
||||
asset_list_with_archetypes["archetype_representative"].fillna(False)
|
||||
)
|
||||
|
||||
asset_list_with_archetypes.to_csv("Stonewater asset list with archetypes V3.1.csv", index=False)
|
||||
|
||||
# Produce the archetyping features
|
||||
archetyping_features_csv = assigned_clusters[
|
||||
[
|
||||
"internal_id", "cluster", "archetype_representative", "rank", "conservation_status", "is_listed_building",
|
||||
"is_heritage_building", "postal_region", "county", "representative_sap", "days_since_lodgement"
|
||||
]
|
||||
].merge(
|
||||
asset_list[
|
||||
["internal_id", "uprn", "external_address_id"]
|
||||
],
|
||||
how="left",
|
||||
on="internal_id"
|
||||
).merge(
|
||||
master_sheet_clustering_features,
|
||||
how="left",
|
||||
right_on="Address ID",
|
||||
left_on="external_address_id"
|
||||
).drop(columns=["Address ID"]).rename(
|
||||
columns={
|
||||
"internal_id": "Osm. ID",
|
||||
"external_address_id": "Address ID",
|
||||
}
|
||||
)
|
||||
|
||||
archetyping_features_csv = archetyping_features_csv.sort_values(["cluster", "rank"], ascending=True)
|
||||
archetyping_features_csv.to_csv(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Stonewater/Stonewater archetyping features V3.csv", index=False
|
||||
)
|
||||
|
||||
representatives = archetyping_features_csv[archetyping_features_csv["archetype_representative"]]
|
||||
print(representatives["postal_region"].nunique())
|
||||
print(representatives["county"].nunique())
|
||||
|
||||
|
||||
def read_asset_list():
|
||||
asset_list = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24.xlsx",
|
||||
header=4
|
||||
)
|
||||
|
||||
udprn_data = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Downloads/UDPRN updated RA Sample for 5 year programme.xlsx", header=0
|
||||
)[["AddressId", "UDPRN"]].rename(columns={"AddressId": "Address ID"})
|
||||
udprn_data["UDPRN"] = udprn_data["UDPRN"].astype("Int64").astype(str)
|
||||
udprn_data["Address ID"] = udprn_data["Address ID"].astype(str)
|
||||
|
||||
asset_list = asset_list.merge(udprn_data, how="inner", on="Address ID")
|
||||
asset_list = asset_list.rename(columns={"UDPRN": "udprn"})
|
||||
|
||||
asset_list = asset_list.rename(
|
||||
columns={
|
||||
"Osm. ID": "internal_id",
|
||||
"Org. ref.": "customer_asset_id",
|
||||
"Postcode": "postcode",
|
||||
"House no": "house_number",
|
||||
"Name": "address1",
|
||||
"Address line 2": "address2",
|
||||
"City/Town": "city_town",
|
||||
"County": "county",
|
||||
"Address ID": "external_address_id",
|
||||
"Owning body": "owner"
|
||||
}
|
||||
)
|
||||
|
||||
asset_list["full_address"] = np.where(
|
||||
~pd.isnull(asset_list["address2"]),
|
||||
(
|
||||
asset_list["address1"] + ", " +
|
||||
asset_list["address2"] + ", " +
|
||||
asset_list["city_town"].str.title() + ", " +
|
||||
asset_list["postcode"]
|
||||
),
|
||||
asset_list["address1"] + ", " +
|
||||
asset_list["city_town"].str.title() + ", " +
|
||||
asset_list["postcode"]
|
||||
)
|
||||
return asset_list
|
||||
|
||||
|
||||
def merge_uprn_to_asset_list(asset_list):
|
||||
# Read in the lookups
|
||||
uprn_lookup_1 = pd.DataFrame(json.loads(read_from_s3(
|
||||
bucket_name="retrofit-data-dev",
|
||||
s3_file_name="scustomers/Stonewater/clustering/address_uprn_udprn_lookup.json"
|
||||
)))
|
||||
uprn_lookup_1["match_type"] = "Exact"
|
||||
|
||||
uprn_lookup_2 = pd.DataFrame(json.loads(read_from_s3(
|
||||
bucket_name="retrofit-data-dev",
|
||||
s3_file_name="scustomers/Stonewater/clustering/address_uprn_udprn_lookup_2.json"
|
||||
)))
|
||||
uprn_lookup_2 = uprn_lookup_2.rename(
|
||||
columns={
|
||||
"epc_address": "standardised_address",
|
||||
"epc_postcode": "standardised_postcode"
|
||||
}
|
||||
)
|
||||
uprn_lookup_2["match_type"] = "EPC"
|
||||
uprn_lookup_2["uprn"] = np.where(
|
||||
uprn_lookup_2["internal_id"] == 1091,
|
||||
83143766,
|
||||
uprn_lookup_2["uprn"]
|
||||
)
|
||||
|
||||
uprn_lookup_3 = pd.DataFrame(json.loads(read_from_s3(
|
||||
bucket_name="retrofit-data-dev",
|
||||
s3_file_name="customers/Stonewater/clustering/ideal-postcodes_pull_2.json"
|
||||
)))
|
||||
uprn_lookup_3["standardised_address"] = uprn_lookup_3[["line_1", "line_2", "line_3", "district", "postcode"]].apply(
|
||||
concatenate_row, axis=1
|
||||
)
|
||||
uprn_lookup_3 = uprn_lookup_3[
|
||||
["udprn", "uprn", "standardised_address", "postcode"]
|
||||
].rename(columns={"postcode": "standardised_postcode"})
|
||||
uprn_lookup_3["match_type"] = "Exact"
|
||||
|
||||
uprn_lookup_4_basis = pd.read_csv("manual_fix_uprns-populated.csv", index_col=False)
|
||||
uprn_lookup_4_basis["os_option_1_uprn"] = uprn_lookup_4_basis["os_option_1_uprn"].astype(str)
|
||||
uprn_lookup_4_basis["os_option_2_uprn"] = uprn_lookup_4_basis["os_option_2_uprn"].astype("Int64").astype(str)
|
||||
# prepare lookup 4
|
||||
uprn_lookup_4 = []
|
||||
for _, x in uprn_lookup_4_basis.iterrows():
|
||||
|
||||
property_type = None
|
||||
built_form = None
|
||||
if x["option"] == 1:
|
||||
uprn = x["os_option_1_uprn"]
|
||||
standardised_address = x["os_option_1_address"]
|
||||
postcode = x["os_option_1_postcode"]
|
||||
elif x["option"] == 2:
|
||||
uprn = x["os_option_2_uprn"]
|
||||
standardised_address = x["os_option_2_address"]
|
||||
postcode = x["os_option_2_address"].split(", ")[-1]
|
||||
else:
|
||||
uprn = x["manual_uprn"]
|
||||
standardised_address = x["manual_address"]
|
||||
postcode = x["manual_postcode"]
|
||||
|
||||
uprn_lookup_4.append(
|
||||
{
|
||||
"internal_id": x["internal_id"],
|
||||
"external_address_id": x["external_address_id"],
|
||||
"uprn": uprn,
|
||||
"standardised_address": standardised_address,
|
||||
"standardised_postcode": postcode,
|
||||
"property_type": property_type,
|
||||
"built_form": built_form
|
||||
}
|
||||
)
|
||||
uprn_lookup_4 = pd.DataFrame(uprn_lookup_4)
|
||||
uprn_lookup_4["match_type"] = "Fuzzy"
|
||||
|
||||
# concat
|
||||
uprn_lookup = pd.concat([uprn_lookup_1, uprn_lookup_2])
|
||||
|
||||
assert len(uprn_lookup) + len(uprn_lookup_3) + len(uprn_lookup_4) == len(asset_list)
|
||||
|
||||
# Final preps of lookups
|
||||
uprn_lookup_3["udprn"] = uprn_lookup_3["udprn"].astype(str)
|
||||
uprn_lookup_3 = uprn_lookup_3.merge(
|
||||
asset_list[["udprn", "internal_id", "external_address_id"]], how="left", on="udprn"
|
||||
)
|
||||
uprn_lookup = pd.concat([
|
||||
uprn_lookup,
|
||||
uprn_lookup_3,
|
||||
uprn_lookup_4
|
||||
])
|
||||
uprn_lookup["external_address_id"] = uprn_lookup["external_address_id"].astype(str)
|
||||
|
||||
asset_list = asset_list.merge(
|
||||
uprn_lookup.drop(columns=["udprn"]),
|
||||
how="inner",
|
||||
on=["internal_id", "external_address_id"]
|
||||
)
|
||||
|
||||
return asset_list, uprn_lookup_2
|
||||
|
||||
|
||||
def read_omosis_wave_2_1():
|
||||
osmosis_wave_2_1 = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Stonewater/Stonewater Osmosis SHDF 2.1.xlsx",
|
||||
header=4,
|
||||
)
|
||||
# Remove double spaces from "Name"
|
||||
osmosis_wave_2_1["Name"] = osmosis_wave_2_1["Name"].str.replace(" ", " ")
|
||||
|
||||
osmosis_wave_2_1 = osmosis_wave_2_1.rename(columns={"Unnamed: 1": "Location"})
|
||||
osmosis_wave_2_1 = osmosis_wave_2_1[osmosis_wave_2_1["Location"] != "Removed from program"]
|
||||
# We produce a cleaned list of asset ids from osmosis_wave_2_1
|
||||
osmosis_wave_2_1_asset_ids = [x for x in osmosis_wave_2_1["Asset ID"].values if not pd.isnull(x)]
|
||||
# We have some ids that are in the form 'id1, id2' so we split them
|
||||
osmosis_wave_2_1_asset_ids = [int(x.strip()) for id_str in osmosis_wave_2_1_asset_ids for x in id_str.split(",")]
|
||||
|
||||
return osmosis_wave_2_1_asset_ids, osmosis_wave_2_1
|
||||
|
||||
|
||||
def read_stonewater_asset_data():
|
||||
master_sheet = pd.read_csv(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Stonewater/Osmosis Reviewed - Parity Download 18.7 - master "
|
||||
"sheet.csv",
|
||||
encoding='latin1'
|
||||
)
|
||||
|
||||
master_sheet["Address ID"] = master_sheet["Address ID"].astype(str)
|
||||
|
||||
previous_waves = master_sheet[
|
||||
(master_sheet["In Osmosis W2.1"] == "Yes") |
|
||||
(master_sheet["In Wates Wave 2.1"] == "Yes") |
|
||||
(master_sheet["In Liv Green Wave 2.1"] == "Yes") |
|
||||
(master_sheet["In CCS Wave 2.1"] == "Yes")
|
||||
].copy()
|
||||
|
||||
previous_waves_address_id = [str(x) for x in previous_waves["Address ID"].values if not pd.isnull(x)]
|
||||
|
||||
# We also read the priority postcodes
|
||||
priority_postcodes = pd.read_csv(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Stonewater/Osmosis Reviewed - Parity Download 18.7 - priority "
|
||||
"postcodes.csv",
|
||||
header=17
|
||||
)
|
||||
|
||||
priority_postcodes = priority_postcodes["Postcode"].tolist()
|
||||
|
||||
return priority_postcodes, previous_waves_address_id, master_sheet
|
||||
|
||||
|
||||
def read_epc_data(uprn_lookup_2):
|
||||
epc_data = json.loads(
|
||||
read_from_s3(
|
||||
bucket_name="retrofit-data-dev",
|
||||
s3_file_name="customers/Stonewater/clustering/epc_data.json"
|
||||
)
|
||||
)
|
||||
epc_data = pd.DataFrame(epc_data)
|
||||
|
||||
epc_data["uprn"] = np.where(
|
||||
epc_data["internal_id"] == 1091,
|
||||
83143766,
|
||||
epc_data["uprn"]
|
||||
)
|
||||
|
||||
# We drop come EPCS
|
||||
epc_data = epc_data[epc_data["internal_id"].isin(uprn_lookup_2["internal_id"].values)]
|
||||
|
||||
epc_data_batch_2 = read_pickle_from_s3(
|
||||
s3_file_name="customers/Stonewater/clustering/epc_data_batch_2.pkl",
|
||||
bucket_name="retrofit-data-dev"
|
||||
)
|
||||
epc_data_batch_2 = pd.DataFrame(epc_data_batch_2)
|
||||
|
||||
complete_epcs = pd.concat([epc_data, epc_data_batch_2])
|
||||
|
||||
return complete_epcs
|
||||
|
|
|
|||
80
etl/sfr/epc_average_by_postcode.py
Normal file
80
etl/sfr/epc_average_by_postcode.py
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
import os
|
||||
from tqdm import tqdm
|
||||
import pandas as pd
|
||||
from dotenv import load_dotenv
|
||||
from backend.SearchEpc import SearchEpc
|
||||
from backend.app.utils import sap_to_epc
|
||||
|
||||
load_dotenv(dotenv_path="backend/.env")
|
||||
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
||||
|
||||
|
||||
def app():
|
||||
"""
|
||||
This script will retrieve EPC data, for postcodes and produce statistics on the SAP Score
|
||||
:return:
|
||||
"""
|
||||
|
||||
source_file = pd.read_excel("/Users/khalimconn-kowlessar/Downloads/Addresses - SFR rents.xlsx")
|
||||
source_file["row_id"] = source_file.index
|
||||
# Split out the town, which is the final portion of the string, separated by commas
|
||||
source_file["Town"] = source_file["Address"].apply(lambda x: x.split(" ")[-1].strip() if not pd.isnull(x) else None)
|
||||
source_file["Address"] = source_file["Address"].apply(
|
||||
lambda x: " ".join(x.split(" ")[:-1]).strip() if not pd.isnull(x) else None
|
||||
)
|
||||
|
||||
unique_postcodes = source_file[["Address", "Postcode"]].drop_duplicates()
|
||||
|
||||
# for each postcode, pull EPC data
|
||||
collected_data = []
|
||||
no_data_found = []
|
||||
no_data_after_filters = []
|
||||
for _, config in tqdm(unique_postcodes.iterrows(), total=len(unique_postcodes)):
|
||||
address1 = config["Address"] if not pd.isnull(config["Address"]) else ""
|
||||
searcher = SearchEpc(
|
||||
postcode=config["Postcode"],
|
||||
address1=address1,
|
||||
auth_token=EPC_AUTH_TOKEN,
|
||||
os_api_key=""
|
||||
)
|
||||
while True:
|
||||
params = {
|
||||
"postcode": config["Postcode"],
|
||||
"address": address1,
|
||||
}
|
||||
results = searcher.client.domestic.search(params=params, size=10000)
|
||||
if not results:
|
||||
# We strip back address1
|
||||
address1 = " ".join(address1.split(" ")[:-1])
|
||||
if not address1:
|
||||
break
|
||||
else:
|
||||
break
|
||||
|
||||
if not results:
|
||||
no_data_found.append(config)
|
||||
continue
|
||||
|
||||
data = pd.DataFrame(results["rows"])
|
||||
|
||||
data["current-energy-efficiency"] = data["current-energy-efficiency"].astype(int)
|
||||
# Take EPCs post 2023
|
||||
data["lodgement-date"] = pd.to_datetime(data["lodgement-date"], errors="coerce")
|
||||
data = data[data["lodgement-date"] >= "2023-01-01"]
|
||||
# Take private nrentals
|
||||
data = data[data["tenure"].isin(["rental (private)", "Rented (private)"])]
|
||||
|
||||
if data.empty:
|
||||
no_data_after_filters.append(config)
|
||||
continue
|
||||
|
||||
agg = data.groupby(["property-type", "built-form"])["current-energy-efficiency"].mean().reset_index()
|
||||
agg = agg.rename(columns={"current-energy-efficiency": "Average SAP"})
|
||||
agg["Average EPC"] = agg["Average SAP"].apply(sap_to_epc)
|
||||
agg.insert(0, "Postcode", config["Postcode"])
|
||||
agg.insert(0, "Address", address1)
|
||||
|
||||
collected_data.append(agg)
|
||||
|
||||
collected_df = pd.concat(collected_data)
|
||||
collected_df.to_csv("EPC Averages SFR.csv", index=False)
|
||||
37
etl/sfr/example_retrofit_plan.py
Normal file
37
etl/sfr/example_retrofit_plan.py
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
import pandas as pd
|
||||
from utils.s3 import save_csv_to_s3
|
||||
|
||||
PORTFOLIO_ID = 85
|
||||
USER_ID = 8
|
||||
|
||||
|
||||
def app():
|
||||
asset_list = [
|
||||
{
|
||||
"address": "120 Yarningale Road",
|
||||
"postcode": "B14 6NB",
|
||||
"uprn": 100070575194
|
||||
}
|
||||
]
|
||||
|
||||
asset_list = pd.DataFrame(asset_list)
|
||||
|
||||
filename = f"{USER_ID}/{PORTFOLIO_ID}/sample.csv"
|
||||
save_csv_to_s3(
|
||||
dataframe=asset_list,
|
||||
bucket_name="retrofit-plan-inputs-dev",
|
||||
file_name=filename
|
||||
)
|
||||
|
||||
body = {
|
||||
"portfolio_id": str(PORTFOLIO_ID),
|
||||
"housing_type": "Private",
|
||||
"goal": "Increase EPC",
|
||||
"goal_value": "C",
|
||||
"trigger_file_path": filename,
|
||||
"already_installed_file_path": "",
|
||||
"patches_file_path": "",
|
||||
"non_invasive_recommendations_file_path": "",
|
||||
"budget": None,
|
||||
}
|
||||
print(body)
|
||||
3
etl/xml_survey_extraction/README.md
Normal file
3
etl/xml_survey_extraction/README.md
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
# Survey Extraction App
|
||||
|
||||
This app is responsible survey data from energy assessment XMLs
|
||||
9
etl/xml_survey_extraction/app.py
Normal file
9
etl/xml_survey_extraction/app.py
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
def main():
|
||||
"""
|
||||
This function executes the main process, which will retrieve data from the specified locations, extract the data
|
||||
fields and store them to our database
|
||||
:return:
|
||||
"""
|
||||
|
||||
# TODO: Build solution to get this data from Onedrive and store what we need in S3
|
||||
# In s3, we have a bucket called retrofit-energy-assessments-{stage} which
|
||||
|
|
@ -175,6 +175,12 @@ module "retrofit_hotwater_kwh_predictions" {
|
|||
allowed_origins = var.allowed_origins
|
||||
}
|
||||
|
||||
module "retrofit_energy_assessments" {
|
||||
source = "./modules/s3"
|
||||
bucketname = "retrofit-energy-assessments-${var.stage}"
|
||||
allowed_origins = var.allowed_origins
|
||||
}
|
||||
|
||||
# Set up the route53 record for the API
|
||||
module "route53" {
|
||||
source = "./modules/route53"
|
||||
|
|
|
|||
|
|
@ -100,9 +100,10 @@ class HeatingControlRecommender:
|
|||
We can then consider the heating system itself
|
||||
:return:
|
||||
"""
|
||||
new_description = "Controls for high heat retention storage heaters"
|
||||
|
||||
# We recommend upgrading to Celect type controls
|
||||
ending_config = MainheatControlAttributes("Controls for high heat retention storage heaters").process()
|
||||
ending_config = MainheatControlAttributes(new_description).process()
|
||||
# We look at what has changed in the ending config, and compare it to the current config
|
||||
simulation_config = check_simulation_difference(
|
||||
new_config=ending_config, old_config=self.property.main_heating_controls
|
||||
|
|
@ -110,11 +111,17 @@ class HeatingControlRecommender:
|
|||
# This upgrade will only take the heating system to average energy efficiency
|
||||
simulation_config["mainheatc_energy_eff_ending"] = "Good"
|
||||
|
||||
description_simulation = {
|
||||
"mainheatcont-description": new_description,
|
||||
"mainheatc-energy-eff": simulation_config["mainheatc_energy_eff_ending"]
|
||||
}
|
||||
|
||||
self.recommendation.append(
|
||||
{
|
||||
"description": "upgrade heating controls to High Heat Retention Storage Heater Controls",
|
||||
**self.costs.celect_type_controls(),
|
||||
"simulation_config": simulation_config
|
||||
"simulation_config": simulation_config,
|
||||
"description_simulation": description_simulation
|
||||
}
|
||||
)
|
||||
|
||||
|
|
@ -152,7 +159,9 @@ class HeatingControlRecommender:
|
|||
if not can_recommend:
|
||||
return
|
||||
|
||||
ending_config = MainheatControlAttributes("Programmer, room thermostat and TRVS").process()
|
||||
new_controls_description = "Programmer, room thermostat and TRVS"
|
||||
|
||||
ending_config = MainheatControlAttributes(new_controls_description).process()
|
||||
# We use this to determine how we should be updating the config
|
||||
simulation_config = check_simulation_difference(
|
||||
new_config=ending_config, old_config=self.property.main_heating_controls
|
||||
|
|
@ -161,6 +170,13 @@ class HeatingControlRecommender:
|
|||
# If the current system is below good, we make it good
|
||||
if self.property.data["mainheatc-energy-eff"] in ["Poor", "Very Poor", "Average"]:
|
||||
simulation_config["mainheatc_energy_eff_ending"] = "Good"
|
||||
else:
|
||||
simulation_config["mainheatc_energy_eff_ending"] = self.property.data["mainheatc-energy-eff"]
|
||||
|
||||
description_simulation = {
|
||||
"mainheatcont-description": new_controls_description,
|
||||
"mainheatc-energy-eff": simulation_config["mainheatc_energy_eff_ending"]
|
||||
}
|
||||
|
||||
has_programmer = not needs_programmer
|
||||
has_room_thermostat = not needs_room_thermostat
|
||||
|
|
@ -191,10 +207,7 @@ class HeatingControlRecommender:
|
|||
"sap_points": None,
|
||||
"already_installed": already_installed,
|
||||
"simulation_config": simulation_config,
|
||||
"description_simulation": {
|
||||
"mainheatcont-description": "Programmer, room thermostat and TRVS",
|
||||
"mainheatc-energy-eff": "Good"
|
||||
}
|
||||
"description_simulation": description_simulation
|
||||
}
|
||||
)
|
||||
|
||||
|
|
@ -221,7 +234,9 @@ class HeatingControlRecommender:
|
|||
# No recommendation needed
|
||||
return
|
||||
|
||||
ending_config = MainheatControlAttributes("Time and temperature zone control").process()
|
||||
new_controls_description = "Time and temperature zone control"
|
||||
|
||||
ending_config = MainheatControlAttributes(new_controls_description).process()
|
||||
|
||||
# We use this to determine how we should be updating the config
|
||||
simulation_config = check_simulation_difference(
|
||||
|
|
@ -231,7 +246,13 @@ class HeatingControlRecommender:
|
|||
# If the current system is below very good, we make it very good
|
||||
if self.property.data["mainheatc-energy-eff"] in ["Poor", "Very Poor", "Average", "Good"]:
|
||||
simulation_config["mainheatc_energy_eff_ending"] = "Very Good"
|
||||
else:
|
||||
simulation_config["mainheatc_energy_eff_ending"] = self.property.data["mainheatc-energy-eff"]
|
||||
|
||||
description_simulation = {
|
||||
"mainheatcont-description": new_controls_description,
|
||||
"mainheatc-energy-eff": simulation_config["mainheatc_energy_eff_ending"]
|
||||
}
|
||||
cost_result = self.costs.time_and_temperature_zone_control(
|
||||
number_heated_rooms=int(self.property.data["number-heated-rooms"])
|
||||
)
|
||||
|
|
@ -255,9 +276,6 @@ class HeatingControlRecommender:
|
|||
"sap_points": None,
|
||||
"already_installed": already_installed,
|
||||
"simulation_config": simulation_config,
|
||||
"description_simulation": {
|
||||
"mainheatcont-description": "Time and temperature zone control",
|
||||
"mainheatc-energy-eff": "Very Good"
|
||||
}
|
||||
"description_simulation": description_simulation
|
||||
}
|
||||
)
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ class HeatingRecommender:
|
|||
# TODO: We could have a system flush recommendation for an existing boiler, where there is no need to replace
|
||||
# the boiler, but instead flushing the system will make it run more efficiently. There is a cost for this
|
||||
# in the Costs class, stored as SYSTEM_FLUSH_COST
|
||||
|
||||
|
||||
exclusions = [] if exclusions is None else exclusions
|
||||
|
||||
self.heating_recommendations = []
|
||||
|
|
@ -86,7 +86,8 @@ class HeatingRecommender:
|
|||
electic_heating_has_mains = self.has_electric_heating_description and self.property.data["mains-gas-flag"]
|
||||
|
||||
portable_heaters_has_mains = (
|
||||
self.property.main_heating["clean_description"] in ["Portable electric heaters assumed for most rooms"] and
|
||||
self.property.main_heating["clean_description"] in ["Portable electric heaters assumed for most rooms"]
|
||||
and
|
||||
self.property.data["mains-gas-flag"]
|
||||
)
|
||||
|
||||
|
|
@ -238,28 +239,31 @@ class HeatingRecommender:
|
|||
description = description + (f" The cost includes the £"
|
||||
f"{BOILER_UPGRADE_SCHEME_ASHP_VALUE} boiler upgrade scheme grant")
|
||||
|
||||
new_heating_description = "Air source heat pump, radiators, electric"
|
||||
new_hot_water_description = "From main system"
|
||||
simulation_config = {
|
||||
"mainheat_energy_eff_ending": "Good",
|
||||
"hot_water_energy_eff_ending": "Good"
|
||||
}
|
||||
description_simulation = {
|
||||
"mainheat-description": "Air source heat pump, radiators, electric",
|
||||
"mainheat-energy-eff": "Good",
|
||||
"hot-water-energy-eff": "Good",
|
||||
"hotwater-description": "From main system",
|
||||
"mainheat-description": new_heating_description,
|
||||
"mainheat-energy-eff": simulation_config["mainheat_energy_eff_ending"],
|
||||
"hot-water-energy-eff": simulation_config["hot_water_energy_eff_ending"],
|
||||
"hotwater-description": new_hot_water_description,
|
||||
}
|
||||
# Installation of a boiler improves the hot water system so we need to reflect this in
|
||||
# the outcome of the recommendation
|
||||
heating_ending_config = MainHeatAttributes("Air source heat pump, radiators, electric").process()
|
||||
hotwater_ending_config = HotWaterAttributes("From main system").process()
|
||||
heating_ending_config = MainHeatAttributes(new_heating_description).process()
|
||||
hotwater_ending_config = HotWaterAttributes(new_hot_water_description).process()
|
||||
|
||||
# If the property does not currently have electric main fuel, we'll simulate the change
|
||||
fuel_ending_config = {}
|
||||
if self.property.main_fuel["fuel_type"] != "electricity":
|
||||
fuel_ending_config = MainFuelAttributes("electricity (not community)").process()
|
||||
new_fuel_description = "electricity (not community)"
|
||||
fuel_ending_config = MainFuelAttributes(new_fuel_description).process()
|
||||
description_simulation = {
|
||||
**description_simulation,
|
||||
"main-fuel": "electricity (not community)"
|
||||
"main-fuel": new_fuel_description
|
||||
}
|
||||
|
||||
# Check the simulation differences
|
||||
|
|
@ -292,8 +296,7 @@ class HeatingRecommender:
|
|||
|
||||
description_simulation = {
|
||||
**description_simulation,
|
||||
"mainheatcont-description": "time and temperature zone control",
|
||||
"mainheatc-energy-eff": "Very Good"
|
||||
**controls_recommender.recommendation[0]["description_simulation"]
|
||||
}
|
||||
|
||||
ashp_recommendation = {
|
||||
|
|
@ -330,7 +333,14 @@ class HeatingRecommender:
|
|||
return differences
|
||||
|
||||
def combine_heating_and_controls(
|
||||
self, controls_recommendations, heating_simulation_config, costs, description, phase, heating_controls_only,
|
||||
self,
|
||||
controls_recommendations,
|
||||
heating_simulation_config,
|
||||
heating_description_simulation,
|
||||
costs,
|
||||
description,
|
||||
phase,
|
||||
heating_controls_only,
|
||||
system_change
|
||||
):
|
||||
"""
|
||||
|
|
@ -338,6 +348,7 @@ class HeatingRecommender:
|
|||
into a single recommendation
|
||||
:param controls_recommendations: The heating controls recommendations
|
||||
:param heating_simulation_config: The simulation configuration for the heating system
|
||||
:param heating_description_simulation: The simulation configuration for the heating description
|
||||
:param costs: The costs of the heating system
|
||||
:param description: The description of the recommendation
|
||||
:param phase: The phase of the recommendation
|
||||
|
|
@ -361,6 +372,7 @@ class HeatingRecommender:
|
|||
for controls_switch in heating_controls_switch:
|
||||
total_costs = costs.copy()
|
||||
recommendation_simulation_config = heating_simulation_config.copy()
|
||||
recommendation_description_simulation = heating_description_simulation.copy()
|
||||
recommendation_description = description
|
||||
if controls_switch:
|
||||
# We add the costs of the heating controls, onto each key in the costs dictionary
|
||||
|
|
@ -371,6 +383,12 @@ class HeatingRecommender:
|
|||
**recommendation_simulation_config,
|
||||
**controls_recommendations[0]["simulation_config"]
|
||||
}
|
||||
|
||||
recommendation_description_simulation = {
|
||||
**recommendation_description_simulation,
|
||||
**controls_recommendations[0]["description_simulation"]
|
||||
}
|
||||
|
||||
controls_description = controls_recommendations[0]['description']
|
||||
# Make the first letter of the description lowercase
|
||||
controls_description = (
|
||||
|
|
@ -396,7 +414,8 @@ class HeatingRecommender:
|
|||
"sap_points": None,
|
||||
"already_installed": already_installed,
|
||||
**total_costs,
|
||||
"simulation_config": recommendation_simulation_config
|
||||
"simulation_config": recommendation_simulation_config,
|
||||
"description_simulation": recommendation_description_simulation
|
||||
}
|
||||
|
||||
output.append(recommendation)
|
||||
|
|
@ -474,8 +493,10 @@ class HeatingRecommender:
|
|||
# No recommendation needed
|
||||
return
|
||||
|
||||
new_heating_description = "Electric storage heaters, radiators"
|
||||
|
||||
# Set up artefacts, suitable for the simulation and regardless of controls
|
||||
heating_ending_config = MainHeatAttributes("Electric storage heaters, radiators").process()
|
||||
heating_ending_config = MainHeatAttributes(new_heating_description).process()
|
||||
heating_simulation_config = check_simulation_difference(
|
||||
new_config=heating_ending_config, old_config=self.property.main_heating
|
||||
)
|
||||
|
|
@ -497,9 +518,15 @@ class HeatingRecommender:
|
|||
)
|
||||
description = "Install high heat retention electric storage heaters"
|
||||
|
||||
heating_description_simulation = {
|
||||
"mainheat-description": new_heating_description,
|
||||
"mainheat-energy-eff": heating_simulation_config["mainheat_energy_eff_ending"],
|
||||
}
|
||||
|
||||
recommendations = self.combine_heating_and_controls(
|
||||
controls_recommendations=controls_recommender.recommendation,
|
||||
heating_simulation_config=heating_simulation_config,
|
||||
heating_description_simulation=heating_description_simulation,
|
||||
costs=costs,
|
||||
description=description,
|
||||
phase=phase,
|
||||
|
|
@ -580,6 +607,7 @@ class HeatingRecommender:
|
|||
simulation_config = {}
|
||||
boiler_costs = {}
|
||||
boiler_recommendation = {}
|
||||
description_simulation = {}
|
||||
|
||||
has_inefficient_space_heating = self.property.data["mainheat-energy-eff"] in ["Very Poor", "Poor", "Average"]
|
||||
|
||||
|
|
@ -603,12 +631,22 @@ class HeatingRecommender:
|
|||
"mainheat_energy_eff_ending": "Good",
|
||||
"hot_water_energy_eff_ending": "Good"
|
||||
}
|
||||
|
||||
description_simulation = {
|
||||
"mainheat-energy-eff": simulation_config["mainheat_energy_eff_ending"],
|
||||
"hot-water-energy-eff": simulation_config["hot_water_energy_eff_ending"],
|
||||
}
|
||||
|
||||
if system_change:
|
||||
# Installation of a boiler improves the hot water system so we need to reflect this in
|
||||
# the outcome of the recommendation
|
||||
heating_ending_config = MainHeatAttributes("Boiler and radiators, mains gas").process()
|
||||
hotwater_ending_config = HotWaterAttributes("From main system").process()
|
||||
fuel_ending_config = MainFuelAttributes("mains gas (not community)").process()
|
||||
new_heating_description = "Boiler and radiators, mains gas"
|
||||
new_hotwater_description = "From main system"
|
||||
new_fuel_description = "mains gas (not community)"
|
||||
|
||||
heating_ending_config = MainHeatAttributes(new_heating_description).process()
|
||||
hotwater_ending_config = HotWaterAttributes(new_hotwater_description).process()
|
||||
fuel_ending_config = MainFuelAttributes(new_fuel_description).process()
|
||||
|
||||
heating_simulation_config = check_simulation_difference(
|
||||
new_config=heating_ending_config, old_config=self.property.main_heating
|
||||
|
|
@ -627,6 +665,13 @@ class HeatingRecommender:
|
|||
**fuel_simulation_config,
|
||||
}
|
||||
|
||||
description_simulation = {
|
||||
**description_simulation,
|
||||
"mainheat-description": new_heating_description,
|
||||
"hotwater-description": new_hotwater_description,
|
||||
"main-fuel": new_fuel_description
|
||||
}
|
||||
|
||||
boiler_costs = self.costs.boiler(
|
||||
size=f"{boiler_size}kw",
|
||||
exising_room_heaters=exising_room_heaters,
|
||||
|
|
@ -652,6 +697,7 @@ class HeatingRecommender:
|
|||
"sap_points": None,
|
||||
"already_installed": already_installed,
|
||||
"simulation_config": simulation_config,
|
||||
"description_simulation": description_simulation,
|
||||
**boiler_costs
|
||||
}
|
||||
|
||||
|
|
@ -675,6 +721,7 @@ class HeatingRecommender:
|
|||
combined_recommendation = self.combine_heating_and_controls(
|
||||
controls_recommendations=[controls_recommendation],
|
||||
heating_simulation_config=simulation_config,
|
||||
heating_description_simulation=description_simulation,
|
||||
costs=boiler_costs,
|
||||
description=boiler_recommendation["description"],
|
||||
phase=recommendation_phase,
|
||||
|
|
|
|||
|
|
@ -782,6 +782,11 @@ class Recommendations:
|
|||
}
|
||||
}
|
||||
|
||||
# Prevent from being negative
|
||||
predicted_sap_points = 0 if predicted_sap_points < 0 else predicted_sap_points
|
||||
predicted_co2_savings = 0 if predicted_co2_savings < 0 else predicted_co2_savings
|
||||
predicted_heat_demand = 0 if predicted_heat_demand < 0 else predicted_heat_demand
|
||||
|
||||
if rec["type"] == "low_energy_lighting":
|
||||
# For the moment, we cap the number of SAP points that can be achieved by ventilation at 2
|
||||
rec["sap_points"] = min(predicted_sap_points, LightingRecommendations.SAP_LIMIT)
|
||||
|
|
|
|||
|
|
@ -524,6 +524,10 @@ class WallRecommendations(Definitions):
|
|||
"already_installed": already_installed,
|
||||
"sap_points": None,
|
||||
"simulation_config": simulation_config,
|
||||
"description_simulation": {
|
||||
"walls-description": new_description,
|
||||
"walls-energy-eff": simulation_config["walls_energy_eff_ending"]
|
||||
},
|
||||
**cost_result
|
||||
}
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue