Adding recommendation materials details to recommendations outputs

This commit is contained in:
Khalim Conn-Kowlessar 2023-08-21 14:39:34 +01:00
parent f2fc921bc5
commit 6ac397b565
28 changed files with 206 additions and 137 deletions

View file

@ -2,10 +2,10 @@ from datetime import datetime
import re
from epc_api.client import EpcClient
from model_data.config import EPC_AUTH_TOKEN
from model_data.BaseUtility import BaseUtility
from model_data.BaseUtility import Definitions
class Property(BaseUtility):
class Property(Definitions):
ATTRIBUTE_MAP = {
"floor-description": "floor",
"hotwater-description": "hotwater",

View file

@ -96,6 +96,9 @@ def upload_recommendations(session, recommendations_to_upload, property_id):
"recommendation_id": recommendation_id,
"material_id": part["id"],
"depth": part["depths"][0] if part["depths"] else None,
"quantity": part["quantity"],
"quantity_unit": part["quantity_unit"],
"estimated_cost": part["estimated_cost"],
}
for rec, recommendation_id in zip(recommendations_to_upload, uploaded_recommendation_ids)
for part in rec["parts"]

View file

@ -1,8 +1,9 @@
from sqlalchemy import Column, BigInteger, String, Float, Boolean, TIMESTAMP, ForeignKey
from sqlalchemy.orm import declarative_base, relationship
from sqlalchemy import Column, BigInteger, String, Float, Boolean, TIMESTAMP, ForeignKey, Enum
from sqlalchemy.orm import declarative_base
from sqlalchemy.sql import func
from backend.app.db.models.portfolio import Portfolio, PropertyModel
from backend.app.db.models.materials import Material
from datatypes.enums import QuantityUnits
Base = declarative_base()
@ -37,6 +38,9 @@ class RecommendationMaterials(Base):
material_id = Column(BigInteger, ForeignKey(Material.id), nullable=False)
created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
depth = Column(Float, nullable=False)
quantity = Column(Float, nullable=False)
quantity_unit = Column(Enum(QuantityUnits, values_callable=lambda x: [e.value for e in x]), nullable=False)
estimated_cost = Column(Float, nullable=False)
class Plan(Base):

View file

@ -115,7 +115,7 @@ def insert_temp_recommendation_id(property_recommendations):
Creates a temporary recommendation id which is needed for
filtering recommendations between default and no, after the optimiser has been
run
:param property_recommendations: nested list of recommendations, grouped by types
:param property_recommendations: nested list of recommendations, grouped by data_types
:return: Updated recommendations_to_upload, where where recommendation has a "recommendation_id"
integer inserted
"""

5
datatypes/enums.py Normal file
View file

@ -0,0 +1,5 @@
import enum
class QuantityUnits(enum.Enum):
m2 = "m2"

View file

@ -1,4 +1,4 @@
class BaseUtility:
class Definitions:
"""
This class contains some base attributes which are used across multiple other classes
"""
@ -38,7 +38,7 @@ class BaseUtility:
# addresses will take time to develop to deal with these and future anomalies.
#
# There are several fields within the lodged data where it is possible to enter multiple entries to cater for
# different types of build within a single property, i.e. extensions. This results in multiple entries for
# different data_types of build within a single property, i.e. extensions. This results in multiple entries for
# the description fields for floor, roof and wall. For the purposes of this data release only the information
# contained within the first of these multiple entries is being provided. As there are no restrictions on the
# value in this first field it means that sometimes the first field in a multiple entry description field may

View file

@ -22,7 +22,7 @@ LAND_REGISTRY_PATHS = [
def app():
"""
For a pre-defined list of constituencies and property types, we'll download EPC data from the API
For a pre-defined list of constituencies and property data_types, we'll download EPC data from the API
and produce a dataset of cleaned fields so that when we get new properties, we can quickly
sanitise any description data
:return:

View file

@ -1,9 +1,9 @@
from typing import Dict, Union
from model_data.BaseUtility import BaseUtility
from model_data.BaseUtility import Definitions
from model_data.epc_attributes.attribute_utils import extract_thermal_transmittance, extract_component_types
class FloorAttributes(BaseUtility):
class FloorAttributes(Definitions):
DWELLING_BELOW = ["another dwelling below", "other premises below"]
FLOOR_TYPES = ["assumed", "to unheated space", "to external air", "suspended", "solid"]

View file

@ -1,9 +1,9 @@
from typing import Dict, Union
from model_data.BaseUtility import BaseUtility
from model_data.BaseUtility import Definitions
from model_data.epc_attributes.attribute_utils import clean_description, find_keyword
class HotWaterAttributes(BaseUtility):
class HotWaterAttributes(Definitions):
# HEATER_TYPES refer to the main devices used for heating water. These devices can be powered by different energy
# sources.
HEATER_TYPES = [

View file

@ -1,9 +1,9 @@
from typing import Dict, Union
from model_data.BaseUtility import BaseUtility
from model_data.BaseUtility import Definitions
from model_data.epc_attributes.attribute_utils import clean_description, remove_punctuation, find_keyword
class MainFuelAttributes(BaseUtility):
class MainFuelAttributes(Definitions):
FUEL_KEYWORDS = [
'heat network',
'mains gas',
@ -96,7 +96,7 @@ class MainFuelAttributes(BaseUtility):
if not result["fuel_type"]:
result["fuel_type"] = self.UNKNOWN_FUEL
# We'll do checks on unknown fuel types to ensure we don't miss anything
# We'll do checks on unknown fuel data_types to ensure we don't miss anything
self.is_unknown = True
return result

View file

@ -1,9 +1,9 @@
from model_data.BaseUtility import BaseUtility
from model_data.BaseUtility import Definitions
from model_data.epc_attributes.attribute_utils import clean_description, process_part
from typing import Dict, Union
class MainHeatAttributes(BaseUtility):
class MainHeatAttributes(Definitions):
HEAT_SYSTEMS = [
"boiler", "air source heat pump", "room heaters", "electric storage heaters", "warm air",
"electric underfloor heating", "electric ceiling heating", "community scheme",

View file

@ -1,9 +1,9 @@
from typing import Dict, Union
from model_data.BaseUtility import BaseUtility
from model_data.BaseUtility import Definitions
from model_data.epc_attributes.attribute_utils import clean_description, find_keyword
class MainheatControlAttributes(BaseUtility):
class MainheatControlAttributes(Definitions):
# These systems allow for the automatic regulation of temperature
THERMOSTATIC_CONTROL_KEYWORDS = [
'room thermostats',

View file

@ -1,10 +1,10 @@
import re
from typing import Dict, Union
from model_data.BaseUtility import BaseUtility
from model_data.BaseUtility import Definitions
from model_data.epc_attributes.attribute_utils import extract_component_types, extract_thermal_transmittance
class RoofAttributes(BaseUtility):
class RoofAttributes(Definitions):
ROOF_TYPES = ['pitched', 'roof room', 'loft', 'flat', 'thatched', 'at rafters', 'assumed']
DWELLING_ABOVE = ["another dwelling above", "other premises above"]

View file

@ -1,9 +1,9 @@
from typing import Dict, Union
from model_data.BaseUtility import BaseUtility
from model_data.BaseUtility import Definitions
from model_data.epc_attributes.attribute_utils import extract_component_types, extract_thermal_transmittance
class WallAttributes(BaseUtility):
class WallAttributes(Definitions):
WALL_TYPES = ['cavity wall', 'filled cavity', 'solid brick', 'system built', 'timber frame', 'granite or whinstone',
'as built', 'cob', 'assumed', 'sandstone or limestone']

View file

@ -1,9 +1,9 @@
from typing import Dict, Union
from model_data.BaseUtility import BaseUtility
from model_data.BaseUtility import Definitions
from model_data.epc_attributes.attribute_utils import clean_description
class WindowAttributes(BaseUtility):
class WindowAttributes(Definitions):
GLAZING_KEYWORDS = ["glazing", "glazed", "glaze"]
GLAZING_COVERAGE = ["fully", "mostly", "partial", "some", "full", "thoughout"]
GLAZING_TYPES = ["double", "triple", "secondary", "multiple", "high performance", "single"]

View file

@ -36,13 +36,13 @@ def extract_component_types(result: dict, description: str, list_of_components:
Dict[str, Union[None, str, float]], str
]:
"""
Extracts component types from the description, updates the result dictionary, and removes the matched component
types from the description.
Extracts component data_types from the description, updates the result dictionary, and removes the matched component
data_types from the description.
:param result: Dictionary to store the results in.
:param description: Lowercase description string.
:param list_of_components: List of component types to extract from the description.
:return: A tuple containing the updated result dictionary and the description with the matched component types
:param list_of_components: List of component data_types to extract from the description.
:return: A tuple containing the updated result dictionary and the description with the matched component data_types
removed.
"""
for component in list_of_components:

View file

@ -1,7 +1,7 @@
from pathlib import Path
import numpy as np
import pandas as pd
from model_data.BaseUtility import BaseUtility
from model_data.BaseUtility import Definitions
from simulation_system.Settings import (
DATA_PROCESSOR_SETTINGS,
EARLIEST_EPC_DATE,
@ -12,7 +12,7 @@ from simulation_system.Settings import (
FLOOR_LEVEL_MAP,
BUILT_FORM_REMAP,
COLUMNS_TO_MERGE_ON
)
)
from typing import List
@ -43,11 +43,11 @@ class DataProcessor:
if DATA_PROCESSOR_SETTINGS['epc_minimum_count'] >= 1:
# If we have multiple EPC records, we can try and do filling
self.fill_na_fields()
self.data = self.data.sort_values(["UPRN", "LODGEMENT_DATE"], ascending=True)
return self.data
def fill_na_fields(self, columns_to_fill: List = COLUMNS_TO_MERGE_ON):
"""
If we have a minimum of 2 epcs, we can do back fill and forward fill on certain data fields
@ -56,35 +56,33 @@ class DataProcessor:
# The groupby changes the order and we use the index to make the original data
filled_data = self.data.groupby("UPRN", group_keys=True)[columns_to_fill].apply(
lambda group: group.fillna(method='bfill').fillna(method='ffill')
).reset_index().set_index('level_1').sort_index()
).reset_index().set_index('level_1').sort_index()
self.data[columns_to_fill] = filled_data[columns_to_fill]
self.data[columns_to_fill] = filled_data[columns_to_fill]
def remap_columns(self):
"""
Remap all columns, for any non values
"""
# Map all anomaly values to None
data_anomaly_map = dict(zip(BaseUtility.DATA_ANOMALY_MATCHES, [None]*len(BaseUtility.DATA_ANOMALY_MATCHES)))
data_anomaly_map = dict(zip(Definitions.DATA_ANOMALY_MATCHES, [None] * len(Definitions.DATA_ANOMALY_MATCHES)))
# Use replace function to map data (if exists in key), to corresponding value - i.e. Remove invalid values
data = self.data.replace(data_anomaly_map)
data = data.replace(np.NAN, None)
# Remap certain columns
data['FLOOR_LEVEL'] = data['FLOOR_LEVEL'].replace(FLOOR_LEVEL_MAP)
data['BUILT_FROM'] = data['BUILT_FORM'].replace(BUILT_FORM_REMAP)
self.data = data
def make_cleaning_averages(self) -> pd.DataFrame:
# Define a custom function to calculate the median, excluding missing values
def median_without_missing(group):
return group[AVERAGE_FIXED_FEATURES].median(skipna=True)
cleaning_averages = self.data.groupby(
["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
observed=True,
@ -93,41 +91,58 @@ class DataProcessor:
general_averages = self.data.groupby(["PROPERTY_TYPE", "BUILT_FORM"], observed=True).apply(
median_without_missing).reset_index()
property_averages = self.data.groupby(["PROPERTY_TYPE"], observed=True).apply(
median_without_missing).reset_index()
built_form_averages = self.data.groupby(["BUILT_FORM"], observed=True).apply(
median_without_missing).reset_index()
# We can clean up any NA's in the cleaning averages with the general averages here
cleaning_averages_filled = pd.merge(cleaning_averages, general_averages, on=['PROPERTY_TYPE', 'BUILT_FORM'], suffixes=['', '_AVERAGE'])
cleaning_averages_filled = pd.merge(cleaning_averages_filled, property_averages, on=['PROPERTY_TYPE'], suffixes=['', '_PROPERTY_AVERAGE'])
cleaning_averages_filled = pd.merge(cleaning_averages_filled, built_form_averages, on=['BUILT_FORM'], suffixes=['', '_BUILT_FORM_AVERAGE'])
cleaning_averages_filled = pd.merge(cleaning_averages, general_averages, on=['PROPERTY_TYPE', 'BUILT_FORM'],
suffixes=['', '_AVERAGE'])
cleaning_averages_filled = pd.merge(cleaning_averages_filled, property_averages, on=['PROPERTY_TYPE'],
suffixes=['', '_PROPERTY_AVERAGE'])
cleaning_averages_filled = pd.merge(cleaning_averages_filled, built_form_averages, on=['BUILT_FORM'],
suffixes=['', '_BUILT_FORM_AVERAGE'])
# Replace any missing NAN values with averages for the same Property type and built form
cleaning_averages_filled['TOTAL_FLOOR_AREA'] = cleaning_averages_filled['TOTAL_FLOOR_AREA'].fillna(cleaning_averages_filled['TOTAL_FLOOR_AREA_AVERAGE'])
cleaning_averages_filled['FLOOR_HEIGHT'] = cleaning_averages_filled['FLOOR_HEIGHT'].fillna(cleaning_averages_filled['FLOOR_HEIGHT_AVERAGE'])
cleaning_averages_filled = cleaning_averages_filled.drop(columns=['TOTAL_FLOOR_AREA_AVERAGE', 'FLOOR_HEIGHT_AVERAGE'])
cleaning_averages_filled['TOTAL_FLOOR_AREA'] = cleaning_averages_filled['TOTAL_FLOOR_AREA'].fillna(
cleaning_averages_filled['TOTAL_FLOOR_AREA_AVERAGE'])
cleaning_averages_filled['FLOOR_HEIGHT'] = cleaning_averages_filled['FLOOR_HEIGHT'].fillna(
cleaning_averages_filled['FLOOR_HEIGHT_AVERAGE'])
cleaning_averages_filled = cleaning_averages_filled.drop(
columns=['TOTAL_FLOOR_AREA_AVERAGE', 'FLOOR_HEIGHT_AVERAGE'])
# If there are still NA values i.e. the averages do not have values for a speicifc group of property tyope and built form
# If there are still NA values i.e. the averages do not have values for a speicifc group of property tyope
# and built form
# We can use just the property type average and replace
cleaning_averages_filled['TOTAL_FLOOR_AREA'] = cleaning_averages_filled['TOTAL_FLOOR_AREA'].fillna(cleaning_averages_filled['TOTAL_FLOOR_AREA_PROPERTY_AVERAGE'])
cleaning_averages_filled['FLOOR_HEIGHT'] = cleaning_averages_filled['FLOOR_HEIGHT'].fillna(cleaning_averages_filled['FLOOR_HEIGHT_PROPERTY_AVERAGE'])
cleaning_averages_filled = cleaning_averages_filled.drop(columns=['TOTAL_FLOOR_AREA_PROPERTY_AVERAGE', 'FLOOR_HEIGHT_PROPERTY_AVERAGE'])
cleaning_averages_filled['TOTAL_FLOOR_AREA'] = cleaning_averages_filled['TOTAL_FLOOR_AREA'].fillna(
cleaning_averages_filled['TOTAL_FLOOR_AREA_PROPERTY_AVERAGE'])
cleaning_averages_filled['FLOOR_HEIGHT'] = cleaning_averages_filled['FLOOR_HEIGHT'].fillna(
cleaning_averages_filled['FLOOR_HEIGHT_PROPERTY_AVERAGE'])
cleaning_averages_filled = cleaning_averages_filled.drop(
columns=['TOTAL_FLOOR_AREA_PROPERTY_AVERAGE', 'FLOOR_HEIGHT_PROPERTY_AVERAGE'])
# If there are still NA values, use BUILT FORM averages
cleaning_averages_filled['TOTAL_FLOOR_AREA'] = cleaning_averages_filled['TOTAL_FLOOR_AREA'].fillna(cleaning_averages_filled['TOTAL_FLOOR_AREA_BUILT_FORM_AVERAGE'])
cleaning_averages_filled['FLOOR_HEIGHT'] = cleaning_averages_filled['FLOOR_HEIGHT'].fillna(cleaning_averages_filled['FLOOR_HEIGHT_BUILT_FORM_AVERAGE'])
cleaning_averages_filled = cleaning_averages_filled.drop(columns=['TOTAL_FLOOR_AREA_BUILT_FORM_AVERAGE', 'FLOOR_HEIGHT_BUILT_FORM_AVERAGE'])
cleaning_averages_filled['TOTAL_FLOOR_AREA'] = cleaning_averages_filled['TOTAL_FLOOR_AREA'].fillna(
cleaning_averages_filled['TOTAL_FLOOR_AREA_BUILT_FORM_AVERAGE'])
cleaning_averages_filled['FLOOR_HEIGHT'] = cleaning_averages_filled['FLOOR_HEIGHT'].fillna(
cleaning_averages_filled['FLOOR_HEIGHT_BUILT_FORM_AVERAGE'])
cleaning_averages_filled = cleaning_averages_filled.drop(
columns=['TOTAL_FLOOR_AREA_BUILT_FORM_AVERAGE', 'FLOOR_HEIGHT_BUILT_FORM_AVERAGE'])
# If there still is na values, use average across all properties in consituecy
cleaning_averages_filled['TOTAL_FLOOR_AREA'] = cleaning_averages_filled['TOTAL_FLOOR_AREA'].fillna(cleaning_averages_filled['TOTAL_FLOOR_AREA'].mean())
cleaning_averages_filled['FLOOR_HEIGHT'] = cleaning_averages_filled['FLOOR_HEIGHT'].fillna(cleaning_averages_filled['FLOOR_HEIGHT'].mean())
cleaning_averages_filled['TOTAL_FLOOR_AREA'] = cleaning_averages_filled['TOTAL_FLOOR_AREA'].fillna(
cleaning_averages_filled['TOTAL_FLOOR_AREA'].mean())
cleaning_averages_filled['FLOOR_HEIGHT'] = cleaning_averages_filled['FLOOR_HEIGHT'].fillna(
cleaning_averages_filled['FLOOR_HEIGHT'].mean())
# If the consituency is all NA values, then take UK AVERAGE VALUES
cleaning_averages_filled['TOTAL_FLOOR_AREA'] = cleaning_averages_filled['TOTAL_FLOOR_AREA'].fillna(TOTAL_FLOOR_AREA_NATIONAL_AVERAGE)
cleaning_averages_filled['FLOOR_HEIGHT'] = cleaning_averages_filled['FLOOR_HEIGHT'].fillna(FLOOR_HEIGHT_NATIONAL_AVERAGE)
cleaning_averages_filled['TOTAL_FLOOR_AREA'] = cleaning_averages_filled['TOTAL_FLOOR_AREA'].fillna(
TOTAL_FLOOR_AREA_NATIONAL_AVERAGE)
cleaning_averages_filled['FLOOR_HEIGHT'] = cleaning_averages_filled['FLOOR_HEIGHT'].fillna(
FLOOR_HEIGHT_NATIONAL_AVERAGE)
return cleaning_averages_filled
@ -143,7 +158,6 @@ class DataProcessor:
counts = counts[counts["count"] > epc_minimum_count]
self.data = pd.merge(self.data, counts, on='UPRN')
def recast_df_columns(self, column_mappings: dict) -> None:
"""
Recast columns from the dataframe to ensure the behaviour we want
@ -156,7 +170,6 @@ class DataProcessor:
for value in values:
self.data[key] = self.data[key].astype(value)
def confine_data(self) -> None:
"""
Include all step to reduce down the data based on assumptions
@ -177,12 +190,11 @@ class DataProcessor:
self.data = self.data[self.data["TRANSACTION_TYPE"] != "new dwelling"]
self.data = self.data[~self.data["FLOOR_LEVEL"].isin(["top floor", "mid floor"])]
def clean_multi_glaze_proportion(self) -> None:
"""
If there is no multi-glaze proportion but the windows are fully glazed, then we should assume a score of 100
"""
no_multi_glaze_proportion_index = pd.isnull(self.data["MULTI_GLAZE_PROPORTION"]) & (self.data["WINDOWS_DESCRIPTION"].isin(FULLY_GLAZED_DESCRIPTIONS))
no_multi_glaze_proportion_index = pd.isnull(self.data["MULTI_GLAZE_PROPORTION"]) & (
self.data["WINDOWS_DESCRIPTION"].isin(FULLY_GLAZED_DESCRIPTIONS))
self.data.loc[no_multi_glaze_proportion_index, 'MULTI_GLAZE_PROPORTION'] = 100

View file

@ -1,13 +1,13 @@
import numpy as np
import pandas as pd
from tqdm import tqdm
from model_data.BaseUtility import BaseUtility
from model_data.BaseUtility import Definitions
from pathlib import Path
from model_data.simulation_system.Settings import (
MANDATORY_FIXED_FEATURES,
AVERAGE_FIXED_FEATURES,
LATEST_FIELD,
COMPONENT_FEATURES,
AVERAGE_FIXED_FEATURES,
LATEST_FIELD,
COMPONENT_FEATURES,
RDSAP_RESPONSE,
HEAT_DEMAND_RESPONSE,
COLUMNS_TO_MERGE_ON,
@ -18,6 +18,7 @@ from DataProcessor import DataProcessor
DATA_DIRECTORY = Path(__file__).parent / 'data' / 'all-domestic-certificates'
def app():
# Get all the files in the directory
@ -30,8 +31,9 @@ def app():
dataset = []
# 116
# 128048706
# PosixPath('/home/ubuntu/Documents/python/hestia/Model/model_data/simulation_system/data/all-domestic-certificates/domestic-E09000021-Kingston-upon-Thames')
for directory in tqdm(directories):
# PosixPath('/home/ubuntu/Documents/python/hestia/Model/model_data/simulation_system/data/all-domestic
# -certificates/domestic-E09000021-Kingston-upon-Thames')
for directory in tqdm(directories):
filepath = directory / "certificates.csv"
@ -45,7 +47,7 @@ def app():
# Fixed features - these are property attributes that shouldn't change over time
fixed_data = {}
# If a property has changed building type, we can ignore the epc rating i.e. this should be 1 unique row
# If a property has changed building type, we can ignore the epc rating i.e. this should be 1 unique row
if max(property_data[MANDATORY_FIXED_FEATURES].nunique()) > 1:
continue
@ -61,16 +63,21 @@ def app():
cleaned_columns_to_merge_on = na_columns.index[~na_columns].to_list()
# Get the corresponding groupby and merge, and fill in NA values
cleaning_averages_to_merge = cleaning_averages.groupby(cleaned_columns_to_merge_on)[['TOTAL_FLOOR_AREA', 'FLOOR_HEIGHT']].mean()
modified_property_data = pd.merge(property_data, cleaning_averages_to_merge, on=cleaned_columns_to_merge_on, suffixes=['', '_AVERAGE'])
modified_property_data['TOTAL_FLOOR_AREA'] = modified_property_data['TOTAL_FLOOR_AREA'].fillna(modified_property_data['TOTAL_FLOOR_AREA_AVERAGE'])
modified_property_data['FLOOR_HEIGHT'] = modified_property_data['FLOOR_HEIGHT'].fillna(modified_property_data['FLOOR_HEIGHT_AVERAGE'])
modified_property_data = modified_property_data.drop(columns=['TOTAL_FLOOR_AREA_AVERAGE', 'FLOOR_HEIGHT_AVERAGE'])
cleaning_averages_to_merge = cleaning_averages.groupby(cleaned_columns_to_merge_on)[
['TOTAL_FLOOR_AREA', 'FLOOR_HEIGHT']].mean()
modified_property_data = pd.merge(property_data, cleaning_averages_to_merge, on=cleaned_columns_to_merge_on,
suffixes=['', '_AVERAGE'])
modified_property_data['TOTAL_FLOOR_AREA'] = modified_property_data['TOTAL_FLOOR_AREA'].fillna(
modified_property_data['TOTAL_FLOOR_AREA_AVERAGE'])
modified_property_data['FLOOR_HEIGHT'] = modified_property_data['FLOOR_HEIGHT'].fillna(
modified_property_data['FLOOR_HEIGHT_AVERAGE'])
modified_property_data = modified_property_data.drop(
columns=['TOTAL_FLOOR_AREA_AVERAGE', 'FLOOR_HEIGHT_AVERAGE'])
for field in AVERAGE_FIXED_FEATURES:
vals = list(modified_property_data[field].dropna().unique())
vals = list(modified_property_data[field].dropna().unique())
if len(vals) > 1:
# Check the values are too far apart
# TODO: we could have multiple values here, why only use the first two?
@ -80,10 +87,10 @@ def app():
if len(vals) == 0:
wrong_var
fixed_data[field] = np.mean(vals)
#Combine all fields together
# Combine all fields together
fixed_data.update(mandatory_field_data)
fixed_data.update(latest_field_data)
@ -132,4 +139,4 @@ def app():
if __name__ == "__main__":
app()
app()

View file

@ -1,15 +1,15 @@
from pathlib import Path
from Settings import (
RDSAP_RESPONSE,
FLOOR_LEVEL_MAP,
RDSAP_RESPONSE,
FLOOR_LEVEL_MAP,
BUILT_FORM_REMAP,
EARLIEST_EPC_DATE,
EARLIEST_EPC_DATE,
FULLY_GLAZED_DESCRIPTIONS,
FIXED_FEATURES,
LATEST_FIELD,
COMPONENT_FEATURES
)
from model_data.BaseUtility import BaseUtility
)
from model_data.BaseUtility import Definitions
from tqdm import tqdm
import pandas as pd
import numpy as np
@ -21,17 +21,18 @@ RANDOM_SEED = 0
DATA_DIRECTORY = Path(__file__).parent / 'data' / 'all-domestic-certificates'
FLOAT_COLUMNS = [
'NUMBER_OPEN_FIREPLACES',
'EXTENSION_COUNT',
'TOTAL_FLOOR_AREA',
'PHOTO_SUPPLY',
'FIXED_LIGHTING_OUTLETS_COUNT',
'FLOOR_HEIGHT',
'NUMBER_HABITABLE_ROOMS',
'LOW_ENERGY_LIGHTING',
'MULTI_GLAZE_PROPORTION',
'NUMBER_HEATED_ROOMS'
]
'NUMBER_OPEN_FIREPLACES',
'EXTENSION_COUNT',
'TOTAL_FLOOR_AREA',
'PHOTO_SUPPLY',
'FIXED_LIGHTING_OUTLETS_COUNT',
'FLOOR_HEIGHT',
'NUMBER_HABITABLE_ROOMS',
'LOW_ENERGY_LIGHTING',
'MULTI_GLAZE_PROPORTION',
'NUMBER_HEATED_ROOMS'
]
def create_raw_data():
"""
@ -40,7 +41,7 @@ def create_raw_data():
directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
# directories = directories[0:10]
dfs = []
dfs = []
for directory in tqdm(directories):
filepath = directory / "certificates.csv"
df = pd.read_csv(filepath, low_memory=False)
@ -52,7 +53,8 @@ def create_raw_data():
df = df[~df["FLOOR_LEVEL"].isin(["top floor", "mid floor"])]
# Change multi glaze proportion
no_multi_glaze_proportion_index = pd.isnull(df["MULTI_GLAZE_PROPORTION"]) & (df["WINDOWS_DESCRIPTION"].isin(FULLY_GLAZED_DESCRIPTIONS))
no_multi_glaze_proportion_index = pd.isnull(df["MULTI_GLAZE_PROPORTION"]) & (
df["WINDOWS_DESCRIPTION"].isin(FULLY_GLAZED_DESCRIPTIONS))
df.loc[no_multi_glaze_proportion_index, 'MULTI_GLAZE_PROPORTION'] = 100
# Recast
@ -63,12 +65,12 @@ def create_raw_data():
df = df.sort_values(["UPRN", "LODGEMENT_DATE"], ascending=True)
# Map all anomaly values to None
data_anomaly_map = dict(zip(BaseUtility.DATA_ANOMALY_MATCHES, [None]*len(BaseUtility.DATA_ANOMALY_MATCHES)))
data_anomaly_map = dict(zip(Definitions.DATA_ANOMALY_MATCHES, [None] * len(Definitions.DATA_ANOMALY_MATCHES)))
# Use replace function to map data (if exists in key), to corresponding value - i.e. Remove invalid values
df = df.replace(data_anomaly_map)
df = df.replace(np.NAN, None)
# Remap certain columns
df['FLOOR_LEVEL'] = df['FLOOR_LEVEL'].replace(FLOOR_LEVEL_MAP)
df['BUILT_FROM'] = df['BUILT_FORM'].replace(BUILT_FORM_REMAP)
@ -83,7 +85,6 @@ def create_raw_data():
df[RDSAP_RESPONSE] = pd.to_numeric(df[RDSAP_RESPONSE], downcast='unsigned')
df[FLOAT_COLUMNS] = df[FLOAT_COLUMNS].apply(pd.to_numeric, downcast='float')
dfs.append(df)
data = pd.concat(dfs)
@ -95,23 +96,23 @@ def create_raw_data():
def main():
data = TabularDataset(data='./model_build_data/energy_data/cleaned_data/train_validation_data.parquet')
subsample_size = round(len(data)/100)
subsample_size = round(len(data) / 100)
data = data.sample(subsample_size, random_state=RANDOM_SEED)
predictor_RDSAP = TabularPredictor(
label=RDSAP_RESPONSE,
path="agModels-predictENERGY",
label=RDSAP_RESPONSE,
path="agModels-predictENERGY",
problem_type="regression",
eval_metric='mean_absolute_error'
).fit(data, time_limit=800, presets='high_quality', excluded_model_types=['KNN', 'CAT'])
).fit(data, time_limit=800, presets='high_quality', excluded_model_types=['KNN', 'CAT'])
test_data = TabularDataset('./model_build_data/energy_data/cleaned_data/test_data.parquet')
performance = predictor_RDSAP.evaluate(test_data)
predictions = predictor_RDSAP.predict(test_data)
predictor_RDSAP.feature_importance(test_data)
if __name__ == "__main__":
main()
main()

View file

@ -36,7 +36,7 @@ class TestCleanFloor:
# Test that invalid descriptions raise a ValueError
invalid_descriptions = [
"invalid description",
"description with no known floor types or thermal transmittance",
"description with no known floor data_types or thermal transmittance",
]
for description in invalid_descriptions:

View file

@ -29,7 +29,7 @@ class TestHotWaterAttributes:
# Test that invalid descriptions raise a ValueError
invalid_descriptions = [
"invalid description",
"description with no known hotwater types",
"description with no known hotwater data_types",
""
]

View file

@ -29,7 +29,7 @@ class TestMainHeatControlAttributes:
# Test that invalid descriptions raise a ValueError
invalid_descriptions = [
"invalid description",
"description with no known fuel types",
"description with no known fuel data_types",
]
for description in invalid_descriptions:

View file

@ -34,7 +34,7 @@ class TestMainHeatAttributes:
invalid_descriptions = [
"",
"invalid description",
"description with no known heating types",
"description with no known heating data_types",
]
for description in invalid_descriptions:

View file

@ -29,7 +29,7 @@ class TestMainHeatControlAttributes:
# Test that invalid descriptions raise a ValueError
invalid_descriptions = [
"invalid description",
"description with no known heating control types",
"description with no known heating control data_types",
]
for description in invalid_descriptions:

View file

@ -1,6 +1,7 @@
import math
from typing import List
from model_data.BaseUtility import BaseUtility
from model_data.BaseUtility import Definitions
from datatypes.enums import QuantityUnits
from backend.Property import Property
from recommendations.rdsap_tables import default_wall_thickness, age_band_data
from recommendations.recommendation_utils import (
@ -13,7 +14,7 @@ suspended_floor_insulation_parts = [
# Example product
# https://www.insulationsuperstore.co.uk/product/recticel-eurothane-general-purpose-pir-insulation-board-2400
# -x-1200-x-100mm.html
# All product types here:
# All product data_types here:
# https://www.insulationsuperstore.co.uk/browse/insulation/brand/recticel/filterby/application/floors.html
"type": "suspended_floor_insulation",
"description": "Rigid Insulation Foam Boards",
@ -29,7 +30,7 @@ suspended_floor_insulation_parts = [
{
# Example product
# https://www.insulationsuperstore.co.uk/product/rockwool-rwa45-acoustic-insulation-slab-100mm-2-88m2-pack.html
# All product types here:
# All product data_types here:
# https://www.insulationsuperstore.co.uk/browse/insulation/brand/rockwool/filterby/application/floors
# /material/mineral-wool.html
"type": "suspended_floor_insulation",
@ -49,7 +50,7 @@ solid_floor_insulation_parts = [
{
# Example product
# https://www.insulationexpress.co.uk/floor-insulation/solid-floor-insulation/k103-100mm
# All product types here:
# All product data_types here:
# https://www.insulationexpress.co.uk/floor-insulation/solid-floor-insulation?brand=7015&p=1
# Example screed https://www.screwfix.com/p/mapei-ultraplan-3240-self-levelling-compound-25kg/4959f
"type": "solid_floor_insulation",
@ -69,7 +70,7 @@ solid_floor_insulation_parts = [
parts = suspended_floor_insulation_parts + solid_floor_insulation_parts
class FloorRecommendations(BaseUtility):
class FloorRecommendations(Definitions):
# part L building regulations indicate that any rennovations on an existing property's walls should
# achieve a U-value of no higher than 0.3
BUILDING_REGULATIONS_PART_L_MAX_U_VALUE = 0.25
@ -305,17 +306,25 @@ class FloorRecommendations(BaseUtility):
if new_u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
lowest_selected_u_value = update_lowest_selected_u_value(lowest_selected_u_value, new_u_value)
estimated_cost = cost_per_unit * self.property.floor_area
self.recommendations.append(
{
"parts": [
get_recommended_part(part, depth, cost_per_unit),
get_recommended_part(
part=part,
selected_depth=depth,
quantity=self.property.floor_area,
quantity_unit=QuantityUnits.m2.value,
selected_total_cost=estimated_cost
),
],
"type": "floor_insulation",
"description": self._make_floor_description(part, depth),
"starting_u_value": u_value,
"new_u_value": new_u_value,
"sap_points": estimate_sap_points(),
"cost": cost_per_unit * self.property.floor_area,
"cost": estimated_cost,
}
)

View file

@ -1,8 +1,9 @@
import itertools
import math
from datatypes.enums import QuantityUnits
from backend.Property import Property
from model_data.BaseUtility import BaseUtility
from model_data.BaseUtility import Definitions
from recommendations.recommendation_utils import (
r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns, update_lowest_selected_u_value,
get_recommended_part, get_uvalue_estimate, estimate_sap_points
@ -184,7 +185,7 @@ internal_wall_insulation_parts = [
wall_parts = external_wall_insulation_parts + internal_wall_insulation_parts
class WallRecommendations(BaseUtility):
class WallRecommendations(Definitions):
YEAR_WALLS_BUILT_WITH_INSULATION = 1990
# After 1930, Solid brick walls became less populate and instead, cavity walls became a
# more popular choice
@ -332,15 +333,25 @@ class WallRecommendations(BaseUtility):
if new_u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
lowest_selected_u_value = update_lowest_selected_u_value(lowest_selected_u_value, new_u_value)
estimated_cost = cost_per_unit * self.property.insulation_wall_area
recommendations.append(
{
"parts": [get_recommended_part(part, depth, cost_per_unit)],
"parts": [
get_recommended_part(
part=part,
selected_depth=depth,
quantity=self.property.insulation_wall_area,
quantity_unit=QuantityUnits.m2.value,
selected_total_cost=estimated_cost
)
],
"type": "wall_insulation",
"description": "Install " + self._make_description(part, depth),
"starting_u_value": u_value,
"new_u_value": new_u_value,
"sap_points": estimate_sap_points(),
"cost": cost_per_unit * self.property.insulation_wall_area,
"cost": estimated_cost,
}
)
@ -394,10 +405,25 @@ class WallRecommendations(BaseUtility):
if combined_new_u_value - self.U_VALUE_ERROR <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
# Here you might want to define a way to add both recommendations together.
# For now, I'm adding them as separate items in the list
ewi_esimtated_cost = ewi_cost_per_unit * self.property.insulation_wall_area
iwi_esimtated_cost = iwi_cost_per_unit * self.property.insulation_wall_area
recommendation = {
"parts": [
get_recommended_part(ewi_part, ewi_depth, ewi_cost_per_unit),
get_recommended_part(iwi_part, iwi_depth, iwi_cost_per_unit)
get_recommended_part(
part=ewi_part,
selected_depth=ewi_depth,
quantity=self.property.insulation_wall_area,
quantity_unit=QuantityUnits.m2.value,
selected_total_cost=ewi_esimtated_cost
),
get_recommended_part(
part=iwi_part,
selected_depth=iwi_depth,
quantity=self.property.insulation_wall_area,
quantity_unit=QuantityUnits.m2.value,
selected_total_cost=iwi_esimtated_cost
)
],
"type": "wall_insulation",
"description": (
@ -407,10 +433,7 @@ class WallRecommendations(BaseUtility):
"starting_u_value": u_value,
"new_u_value": combined_new_u_value,
"sap_points": estimate_sap_points(),
"cost": (
ewi_cost_per_unit * self.property.insulation_wall_area + iwi_cost_per_unit *
self.property.insulation_wall_area
),
"cost": ewi_esimtated_cost + iwi_esimtated_cost,
}
self.recommendations.append(recommendation)

View file

@ -110,17 +110,21 @@ def update_lowest_selected_u_value(lowest_selected_u_value, new_u_value):
return lowest_selected_u_value
def get_recommended_part(part, selected_depth, selected_cost):
def get_recommended_part(part, selected_depth, selected_total_cost, quantity, quantity_unit):
"""
Utility function to return a recommended part with the selected depth.
:param part: part to be recommended
:param selected_depth: depth of the selected part
:param selected_cost: cost of the selected depth
:param selected_total_cost: Total cost of the selected part
:param quantity: Quantity of the selected part
:param quantity_unit: Unit of the quantity
:return:
"""
recommended_part = deepcopy(part)
recommended_part["depths"] = [selected_depth]
recommended_part["cost"] = [selected_cost]
recommended_part["estimated_cost"] = selected_total_cost
recommended_part["quantity"] = quantity
recommended_part["quantity_unit"] = quantity_unit
return recommended_part

View file

@ -46,6 +46,7 @@ package:
- 'model_data/EpcClean.py'
- 'model_data/utils.py'
- 'model_data/epc_attributes/**'
- 'datatypes/**'
- '!infrastructure/**'
- '!data_collection/**'
- '!node_modules/**'