mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
328 lines
18 KiB
Python
328 lines
18 KiB
Python
from tqdm import tqdm
|
|
import os
|
|
import pandas as pd
|
|
import msgpack
|
|
|
|
from model_data.EpcClean import EpcClean
|
|
from model_data.analysis.UvalueEstimations import UvalueEstimations
|
|
from model_data.simulation_system.core.Settings import EARLIEST_EPC_DATE
|
|
from pathlib import Path
|
|
from utils.s3 import save_data_to_s3
|
|
|
|
LAND_REGISTRY_PATHS = [
|
|
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-monthly-update-new-version.csv",
|
|
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2022 (1).csv",
|
|
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2021.csv",
|
|
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2020.csv",
|
|
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2019.csv",
|
|
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2018.csv",
|
|
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2017-part1.csv",
|
|
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2017-part2.csv",
|
|
]
|
|
|
|
EPC_DIRECTORY = Path(__file__).parent / "model_data" / "simulation_system" / "data" / "all-domestic-certificates"
|
|
|
|
ENVIRONMENT = os.getenv("ENVIRONMENT", "dev")
|
|
|
|
|
|
def app():
|
|
"""
|
|
For a pre-defined list of constituencies and property data_types, we'll download EPC data from the API
|
|
and produce a dataset of cleaned fields so that when we get new properties, we can quickly
|
|
sanitise any description data
|
|
|
|
Currently, this application is just run on a local machine
|
|
"""
|
|
|
|
cleaned_data = {}
|
|
epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
|
|
for directory in tqdm(epc_directories):
|
|
directory_destructured = str(directory).split("/")[-1].split("-")
|
|
gss_code = directory_destructured[1]
|
|
local_authority = directory_destructured[2]
|
|
|
|
data = pd.read_csv(directory / "certificates.csv", low_memory=False)
|
|
# Rename the columns to the same format as the api returns
|
|
data.columns = [c.replace("_", "-").lower() for c in data.columns]
|
|
# Take just date before the date threshold
|
|
data = data[data["lodgement-date"] >= EARLIEST_EPC_DATE]
|
|
|
|
# Convert to list of dictioaries as returned by the api
|
|
data = data.to_dict("records")
|
|
|
|
# Incorporate input data into cleaning
|
|
cleaner = EpcClean(data)
|
|
|
|
cleaner.clean()
|
|
# Extended cleaned_data
|
|
for k, data in cleaner.cleaned.items():
|
|
if k not in cleaned_data:
|
|
cleaned_data[k] = data
|
|
else:
|
|
existing_descriptions = [x["original_description"] for x in cleaned_data[k]]
|
|
new_data = [x for x in data if x["original_description"] not in existing_descriptions]
|
|
cleaned_data[k].extend(new_data)
|
|
|
|
# TODO: Add property age band into this
|
|
# uvalue_estimates = UvalueEstimations(data=data)
|
|
# uvalue_estimates.get_estimates(cleaner=cleaner)
|
|
# # TODO: Store these to a s3
|
|
# uvalue_estimates.walls
|
|
# uvalue_estimates.floors
|
|
# uvalue_estimates.roofs
|
|
|
|
# Basic check to make sure all descriptions are unique
|
|
for _, cleaned in cleaned_data.items():
|
|
descriptions = [x["original_description"] for x in cleaned]
|
|
if len(descriptions) != len(set(descriptions)):
|
|
raise ValueError("Duplicated descriptions found, check me")
|
|
|
|
# Finally, we attach u-values to the descriptions for walls, roofs and floors
|
|
|
|
wall_types = [
|
|
"Stone: granite or whinstone as built",
|
|
"Stone: sandstone or limestone as built",
|
|
"Solid brick as built",
|
|
"Stone/solid brick with 50 mm external or internal insulation",
|
|
"Stone/solid brick with 100 mm external or internal insulation",
|
|
"Stone/solid brick with 150 mm external or internal insulation",
|
|
"Stone/solid brick with 200 mm external or internal insulation",
|
|
"Cob as built",
|
|
"Cob with 50 mm external or internal insulation",
|
|
"Cob with 100 mm external or internal insulation",
|
|
"Cob with 150 mm external or internal insulation",
|
|
"Cob with 200 mm external or internal insulation",
|
|
"Cavity as built",
|
|
"Unfilled cavity with 50 mm external or internal insulation",
|
|
"Unfilled cavity with 100 mm external or internal insulation",
|
|
"Unfilled cavity with 150 mm external or internal insulation",
|
|
"Unfilled cavity with 200 mm external or internal insulation",
|
|
"Filled cavity",
|
|
"Filled cavity with 50 mm external or internal insulation",
|
|
"Filled cavity with 100 mm external or internal insulation",
|
|
"Filled cavity with 150 mm external or internal insulation",
|
|
"Filled cavity with 200 mm external or internal insulation",
|
|
"Timber frame as built",
|
|
"Timber frame with internal insulation",
|
|
"System build as built",
|
|
"System build with 50 mm external or internal insulation",
|
|
"System build with 100 mm external or internal insulation",
|
|
"System build with 150 mm external or internal insulation",
|
|
"System build with 200 mm external or internal insulation",
|
|
]
|
|
|
|
u_values = [
|
|
["a", "a", "a", "a", "1.7b", "1.0", "0.6", "0.60", "0.45", "0.35", "0.30", "0.28"],
|
|
["a", "a", "a", "a", "1.7b", "1.0", "0.6", "0.60", "0.45", "0.35", "0.30", "0.28"],
|
|
["1.7", "1.7", "1.7", "1.7", "1.7", "1.0", "0.60", "0.60", "0.45", "0.35", "0.30", "0.28"],
|
|
["0.55", "0.55", "0.55", "0.55", "0.55", "0.45", "0.35", "0.35", "0.30", "0.25", "0.21", "0.21"],
|
|
["0.32", "0.32", "0.32", "0.32", "0.32", "0.28", "0.24", "0.24", "0.21", "0.19", "0.17", "0.16"],
|
|
["0.23", "0.23", "0.23", "0.23", "0.23", "0.21", "0.18", "0.18", "0.17", "0.15", "0.14", "0.14"],
|
|
["0.18", "0.18", "0.18", "0.18", "0.18", "0.17", "0.15", "0.15", "0.14", "0.13", "0.12", "0.12"],
|
|
["0.80", "0.80", "0.80", "0.80", "0.80", "0.80", "0.60", "0.60", "0.45", "0.35", "0.30", "0.28"],
|
|
["0.40", "0.40", "0.40", "0.40", "0.40", "0.40", "0.35", "0.35", "0.30", "0.25", "0.21", "0.21"],
|
|
["0.26", "0.26", "0.26", "0.26", "0.26", "0.26", "0.24", "0.24", "0.21", "0.19", "0.17", "0.16"],
|
|
["0.20", "0.20", "0.20", "0.20", "0.20", "0.20", "0.18", "0.18", "0.17", "0.15", "0.14", "0.14"],
|
|
["0.16", "0.16", "0.16", "0.16", "0.16", "0.16", "0.15", "0.15", "0.14", "0.13", "0.12", "0.12"],
|
|
["1.5", "1.5", "1.5", "1.5", "1.5", "1.0", "0.60", "0.60", "0.45", "0.35", "0.30", "0.28"],
|
|
["0.53", "0.53", "0.53", "0.53", "0.53", "0.45", "0.35", "0.35", "0.30", "0.25", "0.21", "0.21"],
|
|
["0.32", "0.32", "0.32", "0.32", "0.32", "0.30", "0.24", "0.24", "0.21", "0.19", "0.17", "0.16"],
|
|
["0.23", "0.23", "0.23", "0.23", "0.23", "0.21", "0.18", "0.18", "0.17", "0.15", "0.14", "0.14"],
|
|
["0.18", "0.18", "0.18", "0.18", "0.18", "0.17", "0.15", "0.15", "0.14", "0.13", "0.12", "0.12"],
|
|
["0.7", "0.7", "0.7", "0.7", "0.7", "0.40", "0.35", "0.35", "0.45", "0.35", "0.30", "0.28"],
|
|
["0.37", "0.37", "0.37", "0.37", "0.37", "0.27", "0.25", "0.25", "0.25", "0.25", "0.21", "0.21"],
|
|
["0.25", "0.25", "0.25", "0.25", "0.25", "0.20", "0.19", "0.19", "0.19", "0.19", "0.17", "0.16"],
|
|
["0.19", "0.19", "0.19", "0.19", "0.19", "0.16", "0.15", "0.15", "0.15", "0.15", "0.14", "0.14"],
|
|
["0.16", "0.16", "0.16", "0.16", "0.16", "0.13", "0.13", "0.13", "0.13", "0.13", "0.12", "0.12"],
|
|
["2.5", "1.9", "1.9", "1.0", "0.80", "0.45", "0.40", "0.40", "0.40", "0.35", "0.30", "0.28"],
|
|
["0.60", "0.55", "0.55", "0.40", "0.40", "0.40", "0.40", "0.40", "0.40", "0.35", "0.30", "0.28"],
|
|
["2.0", "2.0", "2.0", "2.0", "1.7", "1.0", "0.60", "0.60", "0.45", "0.35", "0.30", "0.28"],
|
|
["0.60", "0.60", "0.60", "0.60", "0.55", "0.45", "0.35", "0.35", "0.30", "0.25", "0.21", "0.21"],
|
|
["0.35", "0.35", "0.35", "0.35", "0.35", "0.32", "0.24", "0.24", "0.21", "0.19", "0.17", "0.16"],
|
|
["0.25", "0.25", "0.25", "0.25", "0.25", "0.21", "0.18", "0.18", "0.17", "0.15", "0.14", "0.14"],
|
|
["0.18", "0.18", "0.18", "0.18", "0.18", "0.17", "0.15", "0.15", "0.14", "0.13", "0.12", "0.12"],
|
|
]
|
|
|
|
age_bands = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L"]
|
|
|
|
wall_uvalues = []
|
|
for i, wall_type in enumerate(wall_types):
|
|
row = {"Wall_type": wall_type}
|
|
for j, age_band in enumerate(age_bands):
|
|
row[age_band] = u_values[i][j]
|
|
wall_uvalues.append(row)
|
|
|
|
parkhome_wall_uvalues = [
|
|
{"Wall_type": "Park home as built", "F": "1.7", "G": "1.2", "I": "0.7", "K": "0.6"},
|
|
{"Wall_type": "Park home with additional insulation", }
|
|
]
|
|
|
|
wall_uvalues.extend(parkhome_wall_uvalues)
|
|
|
|
wall_uvalues_df = pd.DataFrame(wall_uvalues)
|
|
|
|
# This maps the descriptions in the EPC data to the descriptions in the table
|
|
epc_wall_description_map = {
|
|
############################
|
|
# Cavity wall mappings
|
|
############################
|
|
"Cavity wall, as built, partial insulation": "Filled cavity",
|
|
"Cavity wall, filled cavity": "Filled cavity",
|
|
"Cavity wall, as built, no insulation": "Cavity as built",
|
|
"Cavity wall, as built, insulated": "Unfilled cavity with 100 mm external or internal insulation",
|
|
"Cavity wall, with external insulation": "Unfilled cavity with 100 mm external or internal insulation",
|
|
"Cavity wall,": "Cavity as built", # General case of cavity wall without further details
|
|
"Cavity wall, filled cavity and external insulation":
|
|
"Filled cavity with 100 mm external or internal insulation",
|
|
"Cavity wall, filled cavity and internal insulation":
|
|
"Filled cavity with 100 mm external or internal insulation",
|
|
"Cavity wall, with internal insulation": "Unfilled cavity with 100 mm external or internal insulation",
|
|
|
|
############################
|
|
# Solid brick wall mappings
|
|
############################
|
|
"Solid brick, as built, no insulation": "Solid brick as built",
|
|
"Solid brick, with internal insulation": "Stone/solid brick with 100 mm external or internal insulation",
|
|
"Solid brick, as built, insulated": "Stone/solid brick with 100 mm external or internal insulation",
|
|
"Solid brick, with external insulation": "Stone/solid brick with 100 mm external or internal insulation",
|
|
"Solid brick, as built, partial insulation": "Stone/solid brick with 50 mm external or internal insulation",
|
|
|
|
############################
|
|
# Timber frame wall mappings
|
|
############################
|
|
# These mappings are perhaps the most dubious due to the lack of timber options in the RdSAP table
|
|
"Timber frame, as built, insulated": "Timber frame with internal insulation",
|
|
"Timber frame, with additional insulation": "Timber frame with internal insulation",
|
|
"Timber frame, as built, partial insulation": "Timber frame as built",
|
|
"Timber frame, as built, no insulation": "Timber frame as built",
|
|
"Timber frame, with external insulation": "Timber frame with internal insulation",
|
|
|
|
############################
|
|
# Sandstone/limestones wall mappings
|
|
############################
|
|
"Sandstone or limestone, as built, no insulation": "Stone: sandstone or limestone as built",
|
|
"Sandstone or limestone, with internal insulation":
|
|
"Stone/solid brick with 100 mm external or internal insulation",
|
|
"Sandstone or limestone, as built, partial insulation": "Stone/solid brick with 50 mm external or internal "
|
|
"insulation",
|
|
"Sandstone, as built, no insulation": "Stone: sandstone or limestone as built",
|
|
"Sandstone or limestone, as built, insulated": "Stone/solid brick with 100 mm external or internal"
|
|
"insulation",
|
|
"Sandstone, as built, insulated": "Stone/solid brick with 100 mm external or internal insulation",
|
|
"Sandstone, with internal insulation": "Stone/solid brick with 100 mm external or internal insulation",
|
|
"Sandstone or limestone, with external insulation": "Stone/solid brick with 100 mm external or internal "
|
|
"insulation",
|
|
"Sandstone, with external insulation": "Stone/solid brick with 100 mm external or internal insulation",
|
|
"Sandstone, as built, partial insulation": "Stone/solid brick with 50 mm external or internal insulation",
|
|
|
|
############################
|
|
# Granite/whinstone wall mappings
|
|
############################
|
|
"Granite or whinstone, as built, no insulation": "Stone: granite or whinstone as built",
|
|
"Granite or whinstone, with internal insulation": "Stone/solid brick with 100 mm external or internal "
|
|
"insulation",
|
|
"Granite or whinstone, as built, partial insulation": "Stone/solid brick with 50 mm external or internal "
|
|
"insulation",
|
|
"Granite or whinstone, as built, insulated": "Stone/solid brick with 100 mm external or internal "
|
|
"insulation",
|
|
"Granite or whinstone, with external insulation": "Stone/solid brick with 100 mm external or internal "
|
|
"insulation",
|
|
|
|
############################
|
|
# System built wall mappings
|
|
############################
|
|
"System built, as built, no insulation": "System build as built",
|
|
"System built, as built, partial insulation": "System build with 50 mm external or internal insulation",
|
|
"System built, with internal insulation": "System build with 100 mm external or internal insulation",
|
|
"System built, with external insulation": "System build with 100 mm external or internal insulation",
|
|
"System built, as built, insulated": "System build with 100 mm external or internal insulation",
|
|
|
|
############################
|
|
# Cob wall mappings
|
|
############################
|
|
"Cob, as built": "Cob as built",
|
|
"Cob, with external insulation": "Cob with 100 mm external or internal insulation",
|
|
"Cob, with internal insulation": "Cob with 100 mm external or internal insulation",
|
|
|
|
############################
|
|
# Park home mappings
|
|
############################
|
|
"Park home wall, as built": "Park home as built",
|
|
"Park home wall, with external insulation": "Park home with additional insulation",
|
|
"Park home wall, with internal insulation": "Park home with additional insulation",
|
|
}
|
|
|
|
from recommendations.rdsap_tables import default_wall_thickness
|
|
|
|
def apply_formula_s_5_1_1(is_granite_or_whinstone, is_sandstone_or_limestone, age_band):
|
|
"""
|
|
As the u-value table in https://bregroup.com/wp-content/uploads/2019/09/RdSAP_2012_9.94-20-09-2019.pdf
|
|
on page 19, certain u-values as indicated by an "a", should be populated using a formula as defined in section
|
|
S.5.1.1
|
|
:param wall_type:
|
|
:return:
|
|
"""
|
|
|
|
stone_wall_thickness = [x for x in default_wall_thickness if x["type"] == "stone"][0]
|
|
|
|
thickness = stone_wall_thickness["J_K_L"] if age_band in ["J", "L", "L"] else stone_wall_thickness[age_band]
|
|
|
|
if is_granite_or_whinstone:
|
|
return 3.3 - 0.002 * thickness
|
|
|
|
if is_sandstone_or_limestone:
|
|
return 3 - 0.002 * thickness
|
|
|
|
for wall in cleaned_data["walls-description"]:
|
|
if wall["thermal_transmittance"]:
|
|
continue
|
|
|
|
description = wall["clean_description"]
|
|
# Remove (assumed)
|
|
description = description.replace("(assumed)", "").rstrip()
|
|
|
|
mapped_description = epc_wall_description_map[description]
|
|
# Get the u-value
|
|
for ab in age_bands:
|
|
mapped_value = wall_uvalues_df[wall_uvalues_df["Wall_type"] == mapped_description][ab].values[0]
|
|
if mapped_value == "a":
|
|
# The rdSap documentation indicateswe should use a formula to calculate the u-value
|
|
uvalue = float(
|
|
apply_formula_s_5_1_1(
|
|
is_granite_or_whinstone=wall["is_granite_or_whinstone"],
|
|
is_sandstone_or_limestone=wall["is_sandstone_or_limestone"],
|
|
age_band=ab
|
|
)
|
|
)
|
|
elif "b" in mapped_value:
|
|
potential_uvalue = float(mapped_value.replace("b", ""))
|
|
formula_uvalue = float(apply_formula_s_5_1_1(
|
|
is_granite_or_whinstone=wall["is_granite_or_whinstone"],
|
|
is_sandstone_or_limestone=wall["is_sandstone_or_limestone"],
|
|
age_band=ab
|
|
))
|
|
uvalue = min(potential_uvalue, formula_uvalue)
|
|
else:
|
|
uvalue = float(mapped_value)
|
|
|
|
df = pd.DataFrame(cleaned_data["walls-description"])
|
|
df = df[pd.isnull(df["thermal_transmittance"])]
|
|
|
|
df["clean_description"].values
|
|
|
|
# We store a singular file however we could store the data under the following file path:
|
|
# cleaned_epc_data/{component}/{original_description}/cleaned.bson
|
|
# where component is one of the keys of cleaned_data. If we store it against the original data, this
|
|
# data being read in will be extremely small, meaning quicker load times. We'll begin by storing as a single
|
|
# file and monitor usage patterns to see if it makes sense to split the data up
|
|
|
|
save_data_to_s3(
|
|
data=msgpack.packb(cleaned_data, use_bin_type=True),
|
|
s3_file_name="cleaned_epc_data/cleaned.bson",
|
|
bucket_name=f"retrofit-data-{ENVIRONMENT}"
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
print("Initialising cleaner app run")
|
|
app()
|