diff --git a/BaseUtility.py b/BaseUtility.py index beece742..bd2f091e 100644 --- a/BaseUtility.py +++ b/BaseUtility.py @@ -43,7 +43,9 @@ class Definitions: # contained within the first of these multiple entries is being provided. As there are no restrictions on the # value in this first field it means that sometimes the first field in a multiple entry description field may # contain a ‘null’ value. A resolution to correct these anomalies will be considered for future data releases. - "NULL" + "NULL", + # We sometimes see fields populated with just an empty string. + "" } DATA_ANOMALY_SUBSTRINGS = { diff --git a/backend/Property.py b/backend/Property.py index 8492ec3e..1e8bbaf8 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -4,6 +4,7 @@ import os import pandas as pd from etl.epc.DataProcessor import DataProcessor +from etl.epc_clean.epc_attributes.all_cleaners import all_cleaner_map from utils.logger import setup_logger from utils.s3 import read_dataframe_from_s3_parquet from epc_api.client import EpcClient @@ -286,8 +287,14 @@ class Property(Definitions): attributes = [ x for x in cleaned[description] if x["original_description"] == self.data[description] ] - if len(attributes) != 1: + if len(attributes) > 1: raise ValueError("Either No attributes or multiple found for %s" % description) + + if len(attributes) == 0: + # We attempt to perform the clean on the fly + cleaner_cls = all_cleaner_map[description] + attributes = [cleaner_cls(self.data[description]).process()] + setattr(self, self.ATTRIBUTE_MAP[description], attributes[0]) self.set_wall_type() diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index e1c0830f..83b758e3 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -131,7 +131,7 @@ async def trigger_plan(body: PlanTriggerRequest): # Floor recommendations floor_recommender = FloorRecommendations( property_instance=p, - materials=materials_by_type["suspended_floor_insulation"] + materials_by_type["solid_floor_insulation"], + materials=materials_by_type["floor"], ) floor_recommender.recommend() @@ -142,7 +142,7 @@ async def trigger_plan(body: PlanTriggerRequest): wall_recomender = WallRecommendations( property_instance=p, - materials=materials_by_type["external_wall_insulation"] + materials_by_type["internal_wall_insulation"] + materials=materials_by_type["walls"] ) wall_recomender.recommend() diff --git a/backend/requirements/base.txt b/backend/requirements/base.txt index ce43ce0a..7a925030 100644 --- a/backend/requirements/base.txt +++ b/backend/requirements/base.txt @@ -34,4 +34,5 @@ pytz==2023.3 mip==1.15.0 boto3==1.28.3 pandas==1.5.3 -pyarrow==12.0.1 \ No newline at end of file +pyarrow==12.0.1 +textblob \ No newline at end of file diff --git a/etl/__init__.py b/etl/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/etl/epc_clean/epc_attributes/__init__.py b/etl/epc_clean/epc_attributes/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/etl/epc_clean/epc_attributes/all_cleaners.py b/etl/epc_clean/epc_attributes/all_cleaners.py new file mode 100644 index 00000000..e4e0a0ba --- /dev/null +++ b/etl/epc_clean/epc_attributes/all_cleaners.py @@ -0,0 +1,21 @@ +from etl.epc_clean.epc_attributes.FloorAttributes import FloorAttributes +from etl.epc_clean.epc_attributes.HotWaterAttributes import HotWaterAttributes +from etl.epc_clean.epc_attributes.MainFuelAttributes import MainFuelAttributes +from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes +from etl.epc_clean.epc_attributes.MainheatControlAttributes import MainheatControlAttributes +from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes +from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes +from etl.epc_clean.epc_attributes.WindowAttributes import WindowAttributes +from etl.epc_clean.epc_attributes.LightingAttributes import LightingAttributes + +all_cleaner_map = { + 'floor-description': FloorAttributes, + 'hotwater-description': HotWaterAttributes, + 'main-fuel': MainFuelAttributes, + 'mainheat-description': MainHeatAttributes, + 'mainheatcont-description': MainheatControlAttributes, + 'roof-description': RoofAttributes, + 'walls-description': WallAttributes, + 'windows-description': WindowAttributes, + 'lighting-description:': LightingAttributes, +}