Adding recommendation materials details to recommendations outputs

2026-07-27 23:35:01 +00:00 · 2023-08-21 14:39:34 +01:00 · 2023-08-21 14:39:34 +01:00 · 6ac397b565
commit 6ac397b565
parent f2fc921bc5
28 changed files with 206 additions and 137 deletions
--- a/backend/Property.py
+++ b/backend/Property.py
@ -2,10 +2,10 @@ from datetime import datetime
 import re
 from epc_api.client import EpcClient
 from model_data.config import EPC_AUTH_TOKEN
-from model_data.BaseUtility import BaseUtility
+from model_data.BaseUtility import Definitions


-class Property(BaseUtility):
+class Property(Definitions):
    ATTRIBUTE_MAP = {
        "floor-description": "floor",
        "hotwater-description": "hotwater",
--- a/backend/app/db/functions/recommendations_functions.py
+++ b/backend/app/db/functions/recommendations_functions.py
@ -96,6 +96,9 @@ def upload_recommendations(session, recommendations_to_upload, property_id):
            "recommendation_id": recommendation_id,
            "material_id": part["id"],
            "depth": part["depths"][0] if part["depths"] else None,
+            "quantity": part["quantity"],
+            "quantity_unit": part["quantity_unit"],
+            "estimated_cost": part["estimated_cost"],
        }
        for rec, recommendation_id in zip(recommendations_to_upload, uploaded_recommendation_ids)
        for part in rec["parts"]
--- a/backend/app/db/models/recommendations.py
+++ b/backend/app/db/models/recommendations.py
@ -1,8 +1,9 @@
-from sqlalchemy import Column, BigInteger, String, Float, Boolean, TIMESTAMP, ForeignKey
-from sqlalchemy.orm import declarative_base, relationship
+from sqlalchemy import Column, BigInteger, String, Float, Boolean, TIMESTAMP, ForeignKey, Enum
+from sqlalchemy.orm import declarative_base
 from sqlalchemy.sql import func
 from backend.app.db.models.portfolio import Portfolio, PropertyModel
 from backend.app.db.models.materials import Material
+from datatypes.enums import QuantityUnits

 Base = declarative_base()

@ -37,6 +38,9 @@ class RecommendationMaterials(Base):
    material_id = Column(BigInteger, ForeignKey(Material.id), nullable=False)
    created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
    depth = Column(Float, nullable=False)
+    quantity = Column(Float, nullable=False)
+    quantity_unit = Column(Enum(QuantityUnits, values_callable=lambda x: [e.value for e in x]), nullable=False)
+    estimated_cost = Column(Float, nullable=False)


 class Plan(Base):
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@ -115,7 +115,7 @@ def insert_temp_recommendation_id(property_recommendations):
    Creates a temporary recommendation id which is needed for
    filtering recommendations between default and no, after the optimiser has been
    run
-    :param property_recommendations:  nested list of recommendations, grouped by types
+    :param property_recommendations:  nested list of recommendations, grouped by data_types
    :return: Updated recommendations_to_upload, where where recommendation has a "recommendation_id"
             integer inserted
    """
--- a/datatypes/enums.py
+++ b/datatypes/enums.py
@ -0,0 +1,5 @@
+import enum
+
+
+class QuantityUnits(enum.Enum):
+    m2 = "m2"
--- a/model_data/BaseUtility.py
+++ b/model_data/BaseUtility.py
@ -1,4 +1,4 @@
-class BaseUtility:
+class Definitions:
    """
    This class contains some base attributes which are used across multiple other classes
    """
@ -38,7 +38,7 @@ class BaseUtility:
        # addresses will take time to develop to deal with these and future anomalies.
        #
        # There are several fields within the lodged data where it is possible to enter multiple entries to cater for 
-        # different types of build within a single property, i.e. extensions. This results in multiple entries for 
+        # different data_types of build within a single property, i.e. extensions. This results in multiple entries for 
        # the description fields for floor, roof and wall. For the purposes of this data release only the information 
        # contained within the first of these multiple entries is being provided. As there are no restrictions on the 
        # value in this first field it means that sometimes the first field in a multiple entry description field may 
--- a/model_data/app.py
+++ b/model_data/app.py
@ -22,7 +22,7 @@ LAND_REGISTRY_PATHS = [

 def app():
    """
-    For a pre-defined list of constituencies and property types, we'll download EPC data from the API
+    For a pre-defined list of constituencies and property data_types, we'll download EPC data from the API
    and produce a dataset of cleaned fields so that when we get new properties, we can quickly
    sanitise any description data
    :return:
--- a/model_data/epc_attributes/FloorAttributes.py
+++ b/model_data/epc_attributes/FloorAttributes.py
@ -1,9 +1,9 @@
 from typing import Dict, Union
-from model_data.BaseUtility import BaseUtility
+from model_data.BaseUtility import Definitions
 from model_data.epc_attributes.attribute_utils import extract_thermal_transmittance, extract_component_types


-class FloorAttributes(BaseUtility):
+class FloorAttributes(Definitions):
    DWELLING_BELOW = ["another dwelling below", "other premises below"]
    FLOOR_TYPES = ["assumed", "to unheated space", "to external air", "suspended", "solid"]

--- a/model_data/epc_attributes/HotWaterAttributes.py
+++ b/model_data/epc_attributes/HotWaterAttributes.py
@ -1,9 +1,9 @@
 from typing import Dict, Union
-from model_data.BaseUtility import BaseUtility
+from model_data.BaseUtility import Definitions
 from model_data.epc_attributes.attribute_utils import clean_description, find_keyword


-class HotWaterAttributes(BaseUtility):
+class HotWaterAttributes(Definitions):
    # HEATER_TYPES refer to the main devices used for heating water. These devices can be powered by different energy
    # sources.
    HEATER_TYPES = [
--- a/model_data/epc_attributes/MainFuelAttributes.py
+++ b/model_data/epc_attributes/MainFuelAttributes.py
@ -1,9 +1,9 @@
 from typing import Dict, Union
-from model_data.BaseUtility import BaseUtility
+from model_data.BaseUtility import Definitions
 from model_data.epc_attributes.attribute_utils import clean_description, remove_punctuation, find_keyword


-class MainFuelAttributes(BaseUtility):
+class MainFuelAttributes(Definitions):
    FUEL_KEYWORDS = [
        'heat network',
        'mains gas',
@ -96,7 +96,7 @@ class MainFuelAttributes(BaseUtility):

        if not result["fuel_type"]:
            result["fuel_type"] = self.UNKNOWN_FUEL
-            # We'll do checks on unknown fuel types to ensure we don't miss anything
+            # We'll do checks on unknown fuel data_types to ensure we don't miss anything
            self.is_unknown = True

        return result
--- a/model_data/epc_attributes/MainheatAttributes.py
+++ b/model_data/epc_attributes/MainheatAttributes.py
@ -1,9 +1,9 @@
-from model_data.BaseUtility import BaseUtility
+from model_data.BaseUtility import Definitions
 from model_data.epc_attributes.attribute_utils import clean_description, process_part
 from typing import Dict, Union


-class MainHeatAttributes(BaseUtility):
+class MainHeatAttributes(Definitions):
    HEAT_SYSTEMS = [
        "boiler", "air source heat pump", "room heaters", "electric storage heaters", "warm air",
        "electric underfloor heating", "electric ceiling heating", "community scheme",
--- a/model_data/epc_attributes/MainheatControlAttributes.py
+++ b/model_data/epc_attributes/MainheatControlAttributes.py
@ -1,9 +1,9 @@
 from typing import Dict, Union
-from model_data.BaseUtility import BaseUtility
+from model_data.BaseUtility import Definitions
 from model_data.epc_attributes.attribute_utils import clean_description, find_keyword


-class MainheatControlAttributes(BaseUtility):
+class MainheatControlAttributes(Definitions):
    # These systems allow for the automatic regulation of temperature
    THERMOSTATIC_CONTROL_KEYWORDS = [
        'room thermostats',
--- a/model_data/epc_attributes/RoofAttributes.py
+++ b/model_data/epc_attributes/RoofAttributes.py
@ -1,10 +1,10 @@
 import re
 from typing import Dict, Union
-from model_data.BaseUtility import BaseUtility
+from model_data.BaseUtility import Definitions
 from model_data.epc_attributes.attribute_utils import extract_component_types, extract_thermal_transmittance


-class RoofAttributes(BaseUtility):
+class RoofAttributes(Definitions):
    ROOF_TYPES = ['pitched', 'roof room', 'loft', 'flat', 'thatched', 'at rafters', 'assumed']
    DWELLING_ABOVE = ["another dwelling above", "other premises above"]

--- a/model_data/epc_attributes/WallAttributes.py
+++ b/model_data/epc_attributes/WallAttributes.py
@ -1,9 +1,9 @@
 from typing import Dict, Union
-from model_data.BaseUtility import BaseUtility
+from model_data.BaseUtility import Definitions
 from model_data.epc_attributes.attribute_utils import extract_component_types, extract_thermal_transmittance


-class WallAttributes(BaseUtility):
+class WallAttributes(Definitions):
    WALL_TYPES = ['cavity wall', 'filled cavity', 'solid brick', 'system built', 'timber frame', 'granite or whinstone',
                  'as built', 'cob', 'assumed', 'sandstone or limestone']

--- a/model_data/epc_attributes/WindowAttributes.py
+++ b/model_data/epc_attributes/WindowAttributes.py
@ -1,9 +1,9 @@
 from typing import Dict, Union
-from model_data.BaseUtility import BaseUtility
+from model_data.BaseUtility import Definitions
 from model_data.epc_attributes.attribute_utils import clean_description


-class WindowAttributes(BaseUtility):
+class WindowAttributes(Definitions):
    GLAZING_KEYWORDS = ["glazing", "glazed", "glaze"]
    GLAZING_COVERAGE = ["fully", "mostly", "partial", "some", "full", "thoughout"]
    GLAZING_TYPES = ["double", "triple", "secondary", "multiple", "high performance", "single"]
--- a/model_data/epc_attributes/attribute_utils.py
+++ b/model_data/epc_attributes/attribute_utils.py
@ -36,13 +36,13 @@ def extract_component_types(result: dict, description: str, list_of_components:
    Dict[str, Union[None, str, float]], str
 ]:
    """
-    Extracts component types from the description, updates the result dictionary, and removes the matched component
-    types from the description.
+    Extracts component data_types from the description, updates the result dictionary, and removes the matched component
+    data_types from the description.

    :param result: Dictionary to store the results in.
    :param description: Lowercase description string.
-    :param list_of_components: List of component types to extract from the description.
-    :return: A tuple containing the updated result dictionary and the description with the matched component types
+    :param list_of_components: List of component data_types to extract from the description.
+    :return: A tuple containing the updated result dictionary and the description with the matched component data_types
    removed.
    """
    for component in list_of_components:
--- a/model_data/simulation_system/DataProcessor.py
+++ b/model_data/simulation_system/DataProcessor.py
@ -1,7 +1,7 @@
 from pathlib import Path
 import numpy as np
 import pandas as pd
-from model_data.BaseUtility import BaseUtility
+from model_data.BaseUtility import Definitions
 from simulation_system.Settings import (
    DATA_PROCESSOR_SETTINGS,
    EARLIEST_EPC_DATE,
@ -12,7 +12,7 @@ from simulation_system.Settings import (
    FLOOR_LEVEL_MAP,
    BUILT_FORM_REMAP,
    COLUMNS_TO_MERGE_ON
-    )
+)
 from typing import List


@ -43,11 +43,11 @@ class DataProcessor:
        if DATA_PROCESSOR_SETTINGS['epc_minimum_count'] >= 1:
            # If we have multiple EPC records, we can try and do filling
            self.fill_na_fields()
-        
+
        self.data = self.data.sort_values(["UPRN", "LODGEMENT_DATE"], ascending=True)

        return self.data
-    
+
    def fill_na_fields(self, columns_to_fill: List = COLUMNS_TO_MERGE_ON):
        """
        If we have a minimum of 2 epcs, we can do back fill and forward fill on certain data fields
@ -56,35 +56,33 @@ class DataProcessor:
        # The groupby changes the order and we use the index to make the original data
        filled_data = self.data.groupby("UPRN", group_keys=True)[columns_to_fill].apply(
            lambda group: group.fillna(method='bfill').fillna(method='ffill')
-            ).reset_index().set_index('level_1').sort_index()
+        ).reset_index().set_index('level_1').sort_index()
+
+        self.data[columns_to_fill] = filled_data[columns_to_fill]

-        self.data[columns_to_fill] = filled_data[columns_to_fill]        
-        
-    
    def remap_columns(self):
        """
        Remap all columns, for any non values
        """

        # Map all anomaly values to None
-        data_anomaly_map = dict(zip(BaseUtility.DATA_ANOMALY_MATCHES, [None]*len(BaseUtility.DATA_ANOMALY_MATCHES)))
-        
+        data_anomaly_map = dict(zip(Definitions.DATA_ANOMALY_MATCHES, [None] * len(Definitions.DATA_ANOMALY_MATCHES)))
+
        # Use replace function to map data (if exists in key), to corresponding value - i.e. Remove invalid values
        data = self.data.replace(data_anomaly_map)
        data = data.replace(np.NAN, None)
-        
+
        # Remap certain columns
        data['FLOOR_LEVEL'] = data['FLOOR_LEVEL'].replace(FLOOR_LEVEL_MAP)
        data['BUILT_FROM'] = data['BUILT_FORM'].replace(BUILT_FORM_REMAP)

        self.data = data

-
    def make_cleaning_averages(self) -> pd.DataFrame:
        # Define a custom function to calculate the median, excluding missing values
        def median_without_missing(group):
            return group[AVERAGE_FIXED_FEATURES].median(skipna=True)
-    
+
        cleaning_averages = self.data.groupby(
            ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"],
            observed=True,
@ -93,41 +91,58 @@ class DataProcessor:

        general_averages = self.data.groupby(["PROPERTY_TYPE", "BUILT_FORM"], observed=True).apply(
            median_without_missing).reset_index()
-        
+
        property_averages = self.data.groupby(["PROPERTY_TYPE"], observed=True).apply(
            median_without_missing).reset_index()
-        
+
        built_form_averages = self.data.groupby(["BUILT_FORM"], observed=True).apply(
            median_without_missing).reset_index()
-        
+
        # We can clean up any NA's in the cleaning averages with the general averages here
-        cleaning_averages_filled = pd.merge(cleaning_averages, general_averages, on=['PROPERTY_TYPE', 'BUILT_FORM'], suffixes=['', '_AVERAGE'])
-        cleaning_averages_filled = pd.merge(cleaning_averages_filled, property_averages, on=['PROPERTY_TYPE'], suffixes=['', '_PROPERTY_AVERAGE'])
-        cleaning_averages_filled = pd.merge(cleaning_averages_filled, built_form_averages, on=['BUILT_FORM'], suffixes=['', '_BUILT_FORM_AVERAGE'])
+        cleaning_averages_filled = pd.merge(cleaning_averages, general_averages, on=['PROPERTY_TYPE', 'BUILT_FORM'],
+                                            suffixes=['', '_AVERAGE'])
+        cleaning_averages_filled = pd.merge(cleaning_averages_filled, property_averages, on=['PROPERTY_TYPE'],
+                                            suffixes=['', '_PROPERTY_AVERAGE'])
+        cleaning_averages_filled = pd.merge(cleaning_averages_filled, built_form_averages, on=['BUILT_FORM'],
+                                            suffixes=['', '_BUILT_FORM_AVERAGE'])

        # Replace any missing NAN values with averages for the same Property type and built form
-        cleaning_averages_filled['TOTAL_FLOOR_AREA'] = cleaning_averages_filled['TOTAL_FLOOR_AREA'].fillna(cleaning_averages_filled['TOTAL_FLOOR_AREA_AVERAGE'])
-        cleaning_averages_filled['FLOOR_HEIGHT'] = cleaning_averages_filled['FLOOR_HEIGHT'].fillna(cleaning_averages_filled['FLOOR_HEIGHT_AVERAGE'])
-        cleaning_averages_filled = cleaning_averages_filled.drop(columns=['TOTAL_FLOOR_AREA_AVERAGE', 'FLOOR_HEIGHT_AVERAGE'])
+        cleaning_averages_filled['TOTAL_FLOOR_AREA'] = cleaning_averages_filled['TOTAL_FLOOR_AREA'].fillna(
+            cleaning_averages_filled['TOTAL_FLOOR_AREA_AVERAGE'])
+        cleaning_averages_filled['FLOOR_HEIGHT'] = cleaning_averages_filled['FLOOR_HEIGHT'].fillna(
+            cleaning_averages_filled['FLOOR_HEIGHT_AVERAGE'])
+        cleaning_averages_filled = cleaning_averages_filled.drop(
+            columns=['TOTAL_FLOOR_AREA_AVERAGE', 'FLOOR_HEIGHT_AVERAGE'])

-        #  If there are still NA values i.e. the averages do not have values for a speicifc group of property tyope and built form
+        #  If there are still NA values i.e. the averages do not have values for a speicifc group of property tyope
+        #  and built form
        #  We can use just the property type average and replace
-        cleaning_averages_filled['TOTAL_FLOOR_AREA'] = cleaning_averages_filled['TOTAL_FLOOR_AREA'].fillna(cleaning_averages_filled['TOTAL_FLOOR_AREA_PROPERTY_AVERAGE'])
-        cleaning_averages_filled['FLOOR_HEIGHT'] = cleaning_averages_filled['FLOOR_HEIGHT'].fillna(cleaning_averages_filled['FLOOR_HEIGHT_PROPERTY_AVERAGE'])
-        cleaning_averages_filled = cleaning_averages_filled.drop(columns=['TOTAL_FLOOR_AREA_PROPERTY_AVERAGE', 'FLOOR_HEIGHT_PROPERTY_AVERAGE'])
+        cleaning_averages_filled['TOTAL_FLOOR_AREA'] = cleaning_averages_filled['TOTAL_FLOOR_AREA'].fillna(
+            cleaning_averages_filled['TOTAL_FLOOR_AREA_PROPERTY_AVERAGE'])
+        cleaning_averages_filled['FLOOR_HEIGHT'] = cleaning_averages_filled['FLOOR_HEIGHT'].fillna(
+            cleaning_averages_filled['FLOOR_HEIGHT_PROPERTY_AVERAGE'])
+        cleaning_averages_filled = cleaning_averages_filled.drop(
+            columns=['TOTAL_FLOOR_AREA_PROPERTY_AVERAGE', 'FLOOR_HEIGHT_PROPERTY_AVERAGE'])

        # If there are still NA values, use BUILT FORM averages
-        cleaning_averages_filled['TOTAL_FLOOR_AREA'] = cleaning_averages_filled['TOTAL_FLOOR_AREA'].fillna(cleaning_averages_filled['TOTAL_FLOOR_AREA_BUILT_FORM_AVERAGE'])
-        cleaning_averages_filled['FLOOR_HEIGHT'] = cleaning_averages_filled['FLOOR_HEIGHT'].fillna(cleaning_averages_filled['FLOOR_HEIGHT_BUILT_FORM_AVERAGE'])
-        cleaning_averages_filled = cleaning_averages_filled.drop(columns=['TOTAL_FLOOR_AREA_BUILT_FORM_AVERAGE', 'FLOOR_HEIGHT_BUILT_FORM_AVERAGE'])
+        cleaning_averages_filled['TOTAL_FLOOR_AREA'] = cleaning_averages_filled['TOTAL_FLOOR_AREA'].fillna(
+            cleaning_averages_filled['TOTAL_FLOOR_AREA_BUILT_FORM_AVERAGE'])
+        cleaning_averages_filled['FLOOR_HEIGHT'] = cleaning_averages_filled['FLOOR_HEIGHT'].fillna(
+            cleaning_averages_filled['FLOOR_HEIGHT_BUILT_FORM_AVERAGE'])
+        cleaning_averages_filled = cleaning_averages_filled.drop(
+            columns=['TOTAL_FLOOR_AREA_BUILT_FORM_AVERAGE', 'FLOOR_HEIGHT_BUILT_FORM_AVERAGE'])

        # If there still is na values, use average across all properties in consituecy
-        cleaning_averages_filled['TOTAL_FLOOR_AREA'] = cleaning_averages_filled['TOTAL_FLOOR_AREA'].fillna(cleaning_averages_filled['TOTAL_FLOOR_AREA'].mean())
-        cleaning_averages_filled['FLOOR_HEIGHT'] = cleaning_averages_filled['FLOOR_HEIGHT'].fillna(cleaning_averages_filled['FLOOR_HEIGHT'].mean())
+        cleaning_averages_filled['TOTAL_FLOOR_AREA'] = cleaning_averages_filled['TOTAL_FLOOR_AREA'].fillna(
+            cleaning_averages_filled['TOTAL_FLOOR_AREA'].mean())
+        cleaning_averages_filled['FLOOR_HEIGHT'] = cleaning_averages_filled['FLOOR_HEIGHT'].fillna(
+            cleaning_averages_filled['FLOOR_HEIGHT'].mean())

        # If the consituency is all NA values, then take UK AVERAGE VALUES
-        cleaning_averages_filled['TOTAL_FLOOR_AREA'] = cleaning_averages_filled['TOTAL_FLOOR_AREA'].fillna(TOTAL_FLOOR_AREA_NATIONAL_AVERAGE)
-        cleaning_averages_filled['FLOOR_HEIGHT'] = cleaning_averages_filled['FLOOR_HEIGHT'].fillna(FLOOR_HEIGHT_NATIONAL_AVERAGE)
+        cleaning_averages_filled['TOTAL_FLOOR_AREA'] = cleaning_averages_filled['TOTAL_FLOOR_AREA'].fillna(
+            TOTAL_FLOOR_AREA_NATIONAL_AVERAGE)
+        cleaning_averages_filled['FLOOR_HEIGHT'] = cleaning_averages_filled['FLOOR_HEIGHT'].fillna(
+            FLOOR_HEIGHT_NATIONAL_AVERAGE)

        return cleaning_averages_filled

@ -143,7 +158,6 @@ class DataProcessor:
        counts = counts[counts["count"] > epc_minimum_count]
        self.data = pd.merge(self.data, counts, on='UPRN')

-    
    def recast_df_columns(self, column_mappings: dict) -> None:
        """
        Recast columns from the dataframe to ensure the behaviour we want
@ -156,7 +170,6 @@ class DataProcessor:
            for value in values:
                self.data[key] = self.data[key].astype(value)

-
    def confine_data(self) -> None:
        """
        Include all step to reduce down the data based on assumptions
@ -177,12 +190,11 @@ class DataProcessor:
        self.data = self.data[self.data["TRANSACTION_TYPE"] != "new dwelling"]
        self.data = self.data[~self.data["FLOOR_LEVEL"].isin(["top floor", "mid floor"])]

-    
    def clean_multi_glaze_proportion(self) -> None:
        """
        If there is no multi-glaze proportion but the windows are fully glazed, then we should assume a score of 100
        """

-        no_multi_glaze_proportion_index = pd.isnull(self.data["MULTI_GLAZE_PROPORTION"]) & (self.data["WINDOWS_DESCRIPTION"].isin(FULLY_GLAZED_DESCRIPTIONS))
+        no_multi_glaze_proportion_index = pd.isnull(self.data["MULTI_GLAZE_PROPORTION"]) & (
+            self.data["WINDOWS_DESCRIPTION"].isin(FULLY_GLAZED_DESCRIPTIONS))
        self.data.loc[no_multi_glaze_proportion_index, 'MULTI_GLAZE_PROPORTION'] = 100
-
--- a/model_data/simulation_system/app.py
+++ b/model_data/simulation_system/app.py
@ -1,13 +1,13 @@
 import numpy as np
 import pandas as pd
 from tqdm import tqdm
-from model_data.BaseUtility import BaseUtility
+from model_data.BaseUtility import Definitions
 from pathlib import Path
 from model_data.simulation_system.Settings import (
    MANDATORY_FIXED_FEATURES,
-    AVERAGE_FIXED_FEATURES, 
-    LATEST_FIELD, 
-    COMPONENT_FEATURES, 
+    AVERAGE_FIXED_FEATURES,
+    LATEST_FIELD,
+    COMPONENT_FEATURES,
    RDSAP_RESPONSE,
    HEAT_DEMAND_RESPONSE,
    COLUMNS_TO_MERGE_ON,
@ -18,6 +18,7 @@ from DataProcessor import DataProcessor

 DATA_DIRECTORY = Path(__file__).parent / 'data' / 'all-domestic-certificates'

+
 def app():
    # Get all the files in the directory

@ -30,8 +31,9 @@ def app():
    dataset = []
    # 116 
    # 128048706
-    # PosixPath('/home/ubuntu/Documents/python/hestia/Model/model_data/simulation_system/data/all-domestic-certificates/domestic-E09000021-Kingston-upon-Thames')
-    for directory in tqdm(directories): 
+    # PosixPath('/home/ubuntu/Documents/python/hestia/Model/model_data/simulation_system/data/all-domestic
+    # -certificates/domestic-E09000021-Kingston-upon-Thames')
+    for directory in tqdm(directories):

        filepath = directory / "certificates.csv"

@ -45,7 +47,7 @@ def app():
            # Fixed features - these are property attributes that shouldn't change over time
            fixed_data = {}

-             # If a property has changed building type, we can ignore the epc rating i.e. this should be 1 unique row
+            # If a property has changed building type, we can ignore the epc rating i.e. this should be 1 unique row
            if max(property_data[MANDATORY_FIXED_FEATURES].nunique()) > 1:
                continue

@ -61,16 +63,21 @@ def app():
            cleaned_columns_to_merge_on = na_columns.index[~na_columns].to_list()

            #  Get the corresponding groupby and merge, and fill in NA values
-            cleaning_averages_to_merge = cleaning_averages.groupby(cleaned_columns_to_merge_on)[['TOTAL_FLOOR_AREA', 'FLOOR_HEIGHT']].mean()
-            
-            modified_property_data = pd.merge(property_data, cleaning_averages_to_merge, on=cleaned_columns_to_merge_on, suffixes=['', '_AVERAGE'])
-            modified_property_data['TOTAL_FLOOR_AREA'] = modified_property_data['TOTAL_FLOOR_AREA'].fillna(modified_property_data['TOTAL_FLOOR_AREA_AVERAGE'])
-            modified_property_data['FLOOR_HEIGHT'] = modified_property_data['FLOOR_HEIGHT'].fillna(modified_property_data['FLOOR_HEIGHT_AVERAGE'])
-            modified_property_data = modified_property_data.drop(columns=['TOTAL_FLOOR_AREA_AVERAGE', 'FLOOR_HEIGHT_AVERAGE'])
+            cleaning_averages_to_merge = cleaning_averages.groupby(cleaned_columns_to_merge_on)[
+                ['TOTAL_FLOOR_AREA', 'FLOOR_HEIGHT']].mean()
+
+            modified_property_data = pd.merge(property_data, cleaning_averages_to_merge, on=cleaned_columns_to_merge_on,
+                                              suffixes=['', '_AVERAGE'])
+            modified_property_data['TOTAL_FLOOR_AREA'] = modified_property_data['TOTAL_FLOOR_AREA'].fillna(
+                modified_property_data['TOTAL_FLOOR_AREA_AVERAGE'])
+            modified_property_data['FLOOR_HEIGHT'] = modified_property_data['FLOOR_HEIGHT'].fillna(
+                modified_property_data['FLOOR_HEIGHT_AVERAGE'])
+            modified_property_data = modified_property_data.drop(
+                columns=['TOTAL_FLOOR_AREA_AVERAGE', 'FLOOR_HEIGHT_AVERAGE'])

            for field in AVERAGE_FIXED_FEATURES:

-                vals =  list(modified_property_data[field].dropna().unique())
+                vals = list(modified_property_data[field].dropna().unique())
                if len(vals) > 1:
                    # Check the values are too far apart
                    # TODO: we could have multiple values here, why only use the first two?
@ -80,10 +87,10 @@ def app():

                if len(vals) == 0:
                    wrong_var
-         
+
                fixed_data[field] = np.mean(vals)

-            #Combine all fields together
+            # Combine all fields together
            fixed_data.update(mandatory_field_data)
            fixed_data.update(latest_field_data)

@ -132,4 +139,4 @@ def app():


 if __name__ == "__main__":
-    app()
+    app()
--- a/model_data/simulation_system/energy_predictor.py
+++ b/model_data/simulation_system/energy_predictor.py
@ -1,15 +1,15 @@
 from pathlib import Path
 from Settings import (
-    RDSAP_RESPONSE, 
-    FLOOR_LEVEL_MAP, 
+    RDSAP_RESPONSE,
+    FLOOR_LEVEL_MAP,
    BUILT_FORM_REMAP,
-    EARLIEST_EPC_DATE, 
+    EARLIEST_EPC_DATE,
    FULLY_GLAZED_DESCRIPTIONS,
    FIXED_FEATURES,
    LATEST_FIELD,
    COMPONENT_FEATURES
-    )
-from model_data.BaseUtility import BaseUtility
+)
+from model_data.BaseUtility import Definitions
 from tqdm import tqdm
 import pandas as pd
 import numpy as np
@ -21,17 +21,18 @@ RANDOM_SEED = 0
 DATA_DIRECTORY = Path(__file__).parent / 'data' / 'all-domestic-certificates'

 FLOAT_COLUMNS = [
-            'NUMBER_OPEN_FIREPLACES', 
-            'EXTENSION_COUNT',
-            'TOTAL_FLOOR_AREA',
-            'PHOTO_SUPPLY',
-            'FIXED_LIGHTING_OUTLETS_COUNT',
-            'FLOOR_HEIGHT',
-            'NUMBER_HABITABLE_ROOMS',
-            'LOW_ENERGY_LIGHTING',
-            'MULTI_GLAZE_PROPORTION',
-            'NUMBER_HEATED_ROOMS'
-        ]
+    'NUMBER_OPEN_FIREPLACES',
+    'EXTENSION_COUNT',
+    'TOTAL_FLOOR_AREA',
+    'PHOTO_SUPPLY',
+    'FIXED_LIGHTING_OUTLETS_COUNT',
+    'FLOOR_HEIGHT',
+    'NUMBER_HABITABLE_ROOMS',
+    'LOW_ENERGY_LIGHTING',
+    'MULTI_GLAZE_PROPORTION',
+    'NUMBER_HEATED_ROOMS'
+]
+

 def create_raw_data():
    """
@ -40,7 +41,7 @@ def create_raw_data():

    directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()]
    # directories = directories[0:10]
-    dfs = [] 
+    dfs = []
    for directory in tqdm(directories):
        filepath = directory / "certificates.csv"
        df = pd.read_csv(filepath, low_memory=False)
@ -52,7 +53,8 @@ def create_raw_data():
        df = df[~df["FLOOR_LEVEL"].isin(["top floor", "mid floor"])]

        # Change multi glaze proportion
-        no_multi_glaze_proportion_index = pd.isnull(df["MULTI_GLAZE_PROPORTION"]) & (df["WINDOWS_DESCRIPTION"].isin(FULLY_GLAZED_DESCRIPTIONS))
+        no_multi_glaze_proportion_index = pd.isnull(df["MULTI_GLAZE_PROPORTION"]) & (
+            df["WINDOWS_DESCRIPTION"].isin(FULLY_GLAZED_DESCRIPTIONS))
        df.loc[no_multi_glaze_proportion_index, 'MULTI_GLAZE_PROPORTION'] = 100

        # Recast 
@ -63,12 +65,12 @@ def create_raw_data():
        df = df.sort_values(["UPRN", "LODGEMENT_DATE"], ascending=True)

        # Map all anomaly values to None
-        data_anomaly_map = dict(zip(BaseUtility.DATA_ANOMALY_MATCHES, [None]*len(BaseUtility.DATA_ANOMALY_MATCHES)))
-        
+        data_anomaly_map = dict(zip(Definitions.DATA_ANOMALY_MATCHES, [None] * len(Definitions.DATA_ANOMALY_MATCHES)))
+
        # Use replace function to map data (if exists in key), to corresponding value - i.e. Remove invalid values
        df = df.replace(data_anomaly_map)
        df = df.replace(np.NAN, None)
-        
+
        # Remap certain columns
        df['FLOOR_LEVEL'] = df['FLOOR_LEVEL'].replace(FLOOR_LEVEL_MAP)
        df['BUILT_FROM'] = df['BUILT_FORM'].replace(BUILT_FORM_REMAP)
@ -83,7 +85,6 @@ def create_raw_data():
        df[RDSAP_RESPONSE] = pd.to_numeric(df[RDSAP_RESPONSE], downcast='unsigned')
        df[FLOAT_COLUMNS] = df[FLOAT_COLUMNS].apply(pd.to_numeric, downcast='float')

-
        dfs.append(df)

    data = pd.concat(dfs)
@ -95,23 +96,23 @@ def create_raw_data():


 def main():
-
    data = TabularDataset(data='./model_build_data/energy_data/cleaned_data/train_validation_data.parquet')

-    subsample_size = round(len(data)/100)
+    subsample_size = round(len(data) / 100)
    data = data.sample(subsample_size, random_state=RANDOM_SEED)

    predictor_RDSAP = TabularPredictor(
-        label=RDSAP_RESPONSE, 
-        path="agModels-predictENERGY", 
+        label=RDSAP_RESPONSE,
+        path="agModels-predictENERGY",
        problem_type="regression",
        eval_metric='mean_absolute_error'
-        ).fit(data, time_limit=800, presets='high_quality', excluded_model_types=['KNN', 'CAT'])
+    ).fit(data, time_limit=800, presets='high_quality', excluded_model_types=['KNN', 'CAT'])

    test_data = TabularDataset('./model_build_data/energy_data/cleaned_data/test_data.parquet')
    performance = predictor_RDSAP.evaluate(test_data)
    predictions = predictor_RDSAP.predict(test_data)
    predictor_RDSAP.feature_importance(test_data)

+
 if __name__ == "__main__":
-    main()
+    main()
--- a/model_data/tests/test_floor_attributes.py
+++ b/model_data/tests/test_floor_attributes.py
@ -36,7 +36,7 @@ class TestCleanFloor:
        # Test that invalid descriptions raise a ValueError
        invalid_descriptions = [
            "invalid description",
-            "description with no known floor types or thermal transmittance",
+            "description with no known floor data_types or thermal transmittance",
        ]

        for description in invalid_descriptions:
--- a/model_data/tests/test_hotwater_attributes.py
+++ b/model_data/tests/test_hotwater_attributes.py
@ -29,7 +29,7 @@ class TestHotWaterAttributes:
        # Test that invalid descriptions raise a ValueError
        invalid_descriptions = [
            "invalid description",
-            "description with no known hotwater types",
+            "description with no known hotwater data_types",
            ""
        ]

--- a/model_data/tests/test_mainfuel_attributes.py
+++ b/model_data/tests/test_mainfuel_attributes.py
@ -29,7 +29,7 @@ class TestMainHeatControlAttributes:
        # Test that invalid descriptions raise a ValueError
        invalid_descriptions = [
            "invalid description",
-            "description with no known fuel types",
+            "description with no known fuel data_types",
        ]

        for description in invalid_descriptions:
--- a/model_data/tests/test_mainheat_attributes.py
+++ b/model_data/tests/test_mainheat_attributes.py
@ -34,7 +34,7 @@ class TestMainHeatAttributes:
        invalid_descriptions = [
            "",
            "invalid description",
-            "description with no known heating types",
+            "description with no known heating data_types",
        ]

        for description in invalid_descriptions:
--- a/model_data/tests/test_mainheat_controls_attributes.py
+++ b/model_data/tests/test_mainheat_controls_attributes.py
@ -29,7 +29,7 @@ class TestMainHeatControlAttributes:
        # Test that invalid descriptions raise a ValueError
        invalid_descriptions = [
            "invalid description",
-            "description with no known heating control types",
+            "description with no known heating control data_types",
        ]

        for description in invalid_descriptions:
--- a/recommendations/FloorRecommendations.py
+++ b/recommendations/FloorRecommendations.py
@ -1,6 +1,7 @@
 import math
 from typing import List
-from model_data.BaseUtility import BaseUtility
+from model_data.BaseUtility import Definitions
+from datatypes.enums import QuantityUnits
 from backend.Property import Property
 from recommendations.rdsap_tables import default_wall_thickness, age_band_data
 from recommendations.recommendation_utils import (
@ -13,7 +14,7 @@ suspended_floor_insulation_parts = [
        # Example product
        # https://www.insulationsuperstore.co.uk/product/recticel-eurothane-general-purpose-pir-insulation-board-2400
        # -x-1200-x-100mm.html
-        # All product types here:
+        # All product data_types here:
        # https://www.insulationsuperstore.co.uk/browse/insulation/brand/recticel/filterby/application/floors.html
        "type": "suspended_floor_insulation",
        "description": "Rigid Insulation Foam Boards",
@ -29,7 +30,7 @@ suspended_floor_insulation_parts = [
    {
        # Example product
        # https://www.insulationsuperstore.co.uk/product/rockwool-rwa45-acoustic-insulation-slab-100mm-2-88m2-pack.html
-        # All product types here:
+        # All product data_types here:
        # https://www.insulationsuperstore.co.uk/browse/insulation/brand/rockwool/filterby/application/floors
        # /material/mineral-wool.html
        "type": "suspended_floor_insulation",
@ -49,7 +50,7 @@ solid_floor_insulation_parts = [
    {
        # Example product
        # https://www.insulationexpress.co.uk/floor-insulation/solid-floor-insulation/k103-100mm
-        # All product types here:
+        # All product data_types here:
        # https://www.insulationexpress.co.uk/floor-insulation/solid-floor-insulation?brand=7015&p=1
        # Example screed https://www.screwfix.com/p/mapei-ultraplan-3240-self-levelling-compound-25kg/4959f
        "type": "solid_floor_insulation",
@ -69,7 +70,7 @@ solid_floor_insulation_parts = [
 parts = suspended_floor_insulation_parts + solid_floor_insulation_parts


-class FloorRecommendations(BaseUtility):
+class FloorRecommendations(Definitions):
    # part L building regulations indicate that any rennovations on an existing property's walls should
    # achieve a U-value of no higher than 0.3
    BUILDING_REGULATIONS_PART_L_MAX_U_VALUE = 0.25
@ -305,17 +306,25 @@ class FloorRecommendations(BaseUtility):
                if new_u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
                    lowest_selected_u_value = update_lowest_selected_u_value(lowest_selected_u_value, new_u_value)

+                    estimated_cost = cost_per_unit * self.property.floor_area
+
                    self.recommendations.append(
                        {
                            "parts": [
-                                get_recommended_part(part, depth, cost_per_unit),
+                                get_recommended_part(
+                                    part=part,
+                                    selected_depth=depth,
+                                    quantity=self.property.floor_area,
+                                    quantity_unit=QuantityUnits.m2.value,
+                                    selected_total_cost=estimated_cost
+                                ),
                            ],
                            "type": "floor_insulation",
                            "description": self._make_floor_description(part, depth),
                            "starting_u_value": u_value,
                            "new_u_value": new_u_value,
                            "sap_points": estimate_sap_points(),
-                            "cost": cost_per_unit * self.property.floor_area,
+                            "cost": estimated_cost,
                        }
                    )

--- a/recommendations/WallRecommendations.py
+++ b/recommendations/WallRecommendations.py
@ -1,8 +1,9 @@
 import itertools
 import math

+from datatypes.enums import QuantityUnits
 from backend.Property import Property
-from model_data.BaseUtility import BaseUtility
+from model_data.BaseUtility import Definitions
 from recommendations.recommendation_utils import (
    r_value_per_mm_to_u_value, calculate_u_value_uplift, is_diminishing_returns, update_lowest_selected_u_value,
    get_recommended_part, get_uvalue_estimate, estimate_sap_points
@ -184,7 +185,7 @@ internal_wall_insulation_parts = [
 wall_parts = external_wall_insulation_parts + internal_wall_insulation_parts


-class WallRecommendations(BaseUtility):
+class WallRecommendations(Definitions):
    YEAR_WALLS_BUILT_WITH_INSULATION = 1990
    # After 1930, Solid brick walls became less populate and instead, cavity walls became a
    # more popular choice
@ -332,15 +333,25 @@ class WallRecommendations(BaseUtility):
                if new_u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
                    lowest_selected_u_value = update_lowest_selected_u_value(lowest_selected_u_value, new_u_value)

+                    estimated_cost = cost_per_unit * self.property.insulation_wall_area
+
                    recommendations.append(
                        {
-                            "parts": [get_recommended_part(part, depth, cost_per_unit)],
+                            "parts": [
+                                get_recommended_part(
+                                    part=part,
+                                    selected_depth=depth,
+                                    quantity=self.property.insulation_wall_area,
+                                    quantity_unit=QuantityUnits.m2.value,
+                                    selected_total_cost=estimated_cost
+                                )
+                            ],
                            "type": "wall_insulation",
                            "description": "Install " + self._make_description(part, depth),
                            "starting_u_value": u_value,
                            "new_u_value": new_u_value,
                            "sap_points": estimate_sap_points(),
-                            "cost": cost_per_unit * self.property.insulation_wall_area,
+                            "cost": estimated_cost,
                        }
                    )

@ -394,10 +405,25 @@ class WallRecommendations(BaseUtility):
                    if combined_new_u_value - self.U_VALUE_ERROR <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
                        # Here you might want to define a way to add both recommendations together.
                        # For now, I'm adding them as separate items in the list
+                        ewi_esimtated_cost = ewi_cost_per_unit * self.property.insulation_wall_area
+                        iwi_esimtated_cost = iwi_cost_per_unit * self.property.insulation_wall_area
+
                        recommendation = {
                            "parts": [
-                                get_recommended_part(ewi_part, ewi_depth, ewi_cost_per_unit),
-                                get_recommended_part(iwi_part, iwi_depth, iwi_cost_per_unit)
+                                get_recommended_part(
+                                    part=ewi_part,
+                                    selected_depth=ewi_depth,
+                                    quantity=self.property.insulation_wall_area,
+                                    quantity_unit=QuantityUnits.m2.value,
+                                    selected_total_cost=ewi_esimtated_cost
+                                ),
+                                get_recommended_part(
+                                    part=iwi_part,
+                                    selected_depth=iwi_depth,
+                                    quantity=self.property.insulation_wall_area,
+                                    quantity_unit=QuantityUnits.m2.value,
+                                    selected_total_cost=iwi_esimtated_cost
+                                )
                            ],
                            "type": "wall_insulation",
                            "description": (
@ -407,10 +433,7 @@ class WallRecommendations(BaseUtility):
                            "starting_u_value": u_value,
                            "new_u_value": combined_new_u_value,
                            "sap_points": estimate_sap_points(),
-                            "cost": (
-                                ewi_cost_per_unit * self.property.insulation_wall_area + iwi_cost_per_unit *
-                                self.property.insulation_wall_area
-                            ),
+                            "cost": ewi_esimtated_cost + iwi_esimtated_cost,
                        }
                        self.recommendations.append(recommendation)

--- a/recommendations/recommendation_utils.py
+++ b/recommendations/recommendation_utils.py
@ -110,17 +110,21 @@ def update_lowest_selected_u_value(lowest_selected_u_value, new_u_value):
    return lowest_selected_u_value


-def get_recommended_part(part, selected_depth, selected_cost):
+def get_recommended_part(part, selected_depth, selected_total_cost, quantity, quantity_unit):
    """
    Utility function to return a recommended part with the selected depth.
    :param part: part to be recommended
    :param selected_depth: depth of the selected part
-    :param selected_cost: cost of the selected depth
+    :param selected_total_cost: Total cost of the selected part
+    :param quantity: Quantity of the selected part
+    :param quantity_unit: Unit of the quantity
    :return:
    """
    recommended_part = deepcopy(part)
    recommended_part["depths"] = [selected_depth]
-    recommended_part["cost"] = [selected_cost]
+    recommended_part["estimated_cost"] = selected_total_cost
+    recommended_part["quantity"] = quantity
+    recommended_part["quantity_unit"] = quantity_unit

    return recommended_part

--- a/serverless.yml
+++ b/serverless.yml
@ -46,6 +46,7 @@ package:
    - 'model_data/EpcClean.py'
    - 'model_data/utils.py'
    - 'model_data/epc_attributes/**'
+    - 'datatypes/**'
    - '!infrastructure/**'
    - '!data_collection/**'
    - '!node_modules/**'