diff --git a/.idea/Model.iml b/.idea/Model.iml
index 05b9012b..b03b31b1 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
-
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 3b05c6ac..ca0e1cd9 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -1,6 +1,6 @@
-
+
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 4b972a6a..8f1413ee 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -373,6 +373,7 @@ async def trigger_plan(body: PlanTriggerRequest):
recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
+ # TODO: Set the TRANSACTION_TYPE
# Clean the data
cleaning_data = read_parquet_from_s3(
bucket_name="retrofit-data-dev",
diff --git a/model_data/EpcClean.py b/model_data/EpcClean.py
index c8594de8..adec9978 100644
--- a/model_data/EpcClean.py
+++ b/model_data/EpcClean.py
@@ -2,6 +2,8 @@ from typing import List, Dict, Any
from collections import Counter
from collections import defaultdict
+import pandas as pd
+
from model_data.utils import correct_spelling
from model_data.epc_attributes.FloorAttributes import FloorAttributes
from model_data.epc_attributes.HotWaterAttributes import HotWaterAttributes
@@ -97,7 +99,7 @@ class EpcClean:
self._init_empty_cleaned_obj()
for field in self.CLEANING_FIELDS:
- self.unique_vals[field] = Counter([v[field] for v in self.data])
+ self.unique_vals[field] = Counter([v[field] for v in self.data if not pd.isnull(v[field])])
self.clean_wrapper(field="floor-description", cleaning_cls=FloorAttributes)
self.clean_wrapper(field="hotwater-description", cleaning_cls=HotWaterAttributes)
diff --git a/model_data/epc_attributes/FloorAttributes.py b/model_data/epc_attributes/FloorAttributes.py
index 024ec6dc..71a8b5a8 100644
--- a/model_data/epc_attributes/FloorAttributes.py
+++ b/model_data/epc_attributes/FloorAttributes.py
@@ -14,6 +14,7 @@ class FloorAttributes(Definitions):
WELSH_TEXT = {
"(anheddiad arall islaw)": "(another dwelling below)",
+ "solet, dim inswleiddio (rhagdybiaeth)": "dolid, no insulation (assumed)"
}
def __init__(self, description: str):
diff --git a/model_data/epc_attributes/HotWaterAttributes.py b/model_data/epc_attributes/HotWaterAttributes.py
index 97664416..2535032b 100644
--- a/model_data/epc_attributes/HotWaterAttributes.py
+++ b/model_data/epc_attributes/HotWaterAttributes.py
@@ -15,7 +15,8 @@ class HotWaterAttributes(Definitions):
'oil boiler', # A boiler that uses oil as fuel to heat water
'electric instantaneous', # Similar to gas instantaneous, but uses electricity as its energy source
'gas multipoint', # A gas water heater that can supply hot water to multiple points of use at once
- 'heat pump' # A general category for heat pumps, regardless of the energy source
+ 'heat pump', # A general category for heat pumps, regardless of the energy source
+ 'solid fuel boiler' # burns solid materials to generate heat for water heating and/or space heating
]
# SYSTEM_TYPES refer to the larger system within which the heater operates.
@@ -83,6 +84,7 @@ class HotWaterAttributes(Definitions):
# not common, especially in modern homes.
APPLIANCE_SYSTEMS = [
'gas range cooker', # A gas-powered range cooker
+ 'oil range cooker'
]
# Descriptions which represent the same thing
@@ -92,6 +94,7 @@ class HotWaterAttributes(Definitions):
WELSH_TEXT = {
"ogçör brif system": "from main system",
+ "ogçör brif system, adfer gwres nwyon ffliw": "from main system, flue gas heat recovery"
}
def __init__(self, description: str):
@@ -118,6 +121,7 @@ class HotWaterAttributes(Definitions):
self.CHP_SYSTEMS,
self.NO_SYSTEM_PRESENT_KEYWORDS,
self.APPLIANCE_SYSTEMS,
+ self.DISTRIBUTION_SYSTEM_KEYWORDS
]
):
raise ValueError('Invalid description')
diff --git a/model_data/epc_attributes/LightingAttributes.py b/model_data/epc_attributes/LightingAttributes.py
index 92c03846..452caa7a 100644
--- a/model_data/epc_attributes/LightingAttributes.py
+++ b/model_data/epc_attributes/LightingAttributes.py
@@ -4,9 +4,18 @@ from model_data.utils import correct_spelling
class LightingAttributes:
+ WELSH_TEXT = {
+ "goleuadau ynni-isel ym mhob un ogçör mannau gosod": "low energy lighting in all fixed outlets"
+ }
def __init__(self, description, averages):
self.description: str = clean_description(description.lower())
+
+ translation = self.WELSH_TEXT.get(self.description)
+ if translation:
+ self.nodata = False
+ self.description = translation
+
self.description = correct_spelling(self.description)
self.averages = averages
diff --git a/model_data/epc_attributes/MainFuelAttributes.py b/model_data/epc_attributes/MainFuelAttributes.py
index 055f4cac..363d0ec3 100644
--- a/model_data/epc_attributes/MainFuelAttributes.py
+++ b/model_data/epc_attributes/MainFuelAttributes.py
@@ -26,6 +26,8 @@ class MainFuelAttributes(Definitions):
# Wood pellets have a higher energy density than wood chips. This is due to their manufacturing process,
# which compresses the wood and removes most of the moisture, making them more efficient as a fuel
'wood pellets',
+ 'b30k',
+ 'dual fuel appliance mineral and wood',
]
COMPLEX_FUEL_KEYWORDS = [
diff --git a/model_data/epc_attributes/MainheatAttributes.py b/model_data/epc_attributes/MainheatAttributes.py
index 492c3123..70e78ee0 100644
--- a/model_data/epc_attributes/MainheatAttributes.py
+++ b/model_data/epc_attributes/MainheatAttributes.py
@@ -1,5 +1,5 @@
from model_data.BaseUtility import Definitions
-from model_data.epc_attributes.attribute_utils import clean_description, process_part
+from model_data.epc_attributes.attribute_utils import clean_description, process_part, switch_chars
from typing import Dict, Union
@@ -25,7 +25,10 @@ class MainHeatAttributes(Definitions):
}
def __init__(self, description: str):
- self.description: str = clean_description(description.lower())
+
+ self.description = switch_chars(description.lower())
+
+ self.description: str = clean_description(self.description)
# Remove special characters
self.nodata = not description or description in self.DATA_ANOMALY_MATCHES
diff --git a/model_data/epc_attributes/RoofAttributes.py b/model_data/epc_attributes/RoofAttributes.py
index df1ce977..892217b6 100644
--- a/model_data/epc_attributes/RoofAttributes.py
+++ b/model_data/epc_attributes/RoofAttributes.py
@@ -10,6 +10,7 @@ class RoofAttributes(Definitions):
WELSH_TEXT = {
"ar oleddf, dim inswleiddio": "pitched, no insulation",
+ "ar oleddf, 150 mm o inswleiddio yn y llofft": "pitched, 150 mm loft insulation"
}
def __init__(self, description: str):
diff --git a/model_data/epc_attributes/attribute_utils.py b/model_data/epc_attributes/attribute_utils.py
index 9819cc01..a1b65327 100644
--- a/model_data/epc_attributes/attribute_utils.py
+++ b/model_data/epc_attributes/attribute_utils.py
@@ -65,6 +65,20 @@ def clean_description(description: str) -> str:
return description
+def switch_chars(description: str) -> str:
+ """
+ Switches specified characters in a description with a ,
+ Useful for descriptions like "Gas: mains gas"
+ """
+
+ # Switch : to ,
+ chars = [":"]
+ for char in chars:
+ description = description.replace(char, ",")
+
+ return description
+
+
def process_part(result: Dict[str, Union[str, bool]], part: str, attr_list: List[str], prefix: str):
"""
Process a part of the description with a given list of epc_attributes
diff --git a/model_data/tests/test_data/test_lighting_attributes_cases.py b/model_data/tests/test_data/test_lighting_attributes_cases.py
index 7ddec1d3..d9e3f01f 100644
--- a/model_data/tests/test_data/test_lighting_attributes_cases.py
+++ b/model_data/tests/test_data/test_lighting_attributes_cases.py
@@ -30,5 +30,6 @@ test_cases = [
{'original_description': 'Excellent lighting efficiency', 'low_energy_proportion': 1.0},
{'original_description': 'Low energy lighting in 2% of fixed outlets', 'low_energy_proportion': 0.02},
{'original_description': 'No Low energy lighting', 'low_energy_proportion': 0},
- {'original_description': 'Goleuadau ynni-isel mewn 60% oGÇÖr mannau gosod', 'low_energy_proportion': 0.6}
+ {'original_description': 'Goleuadau ynni-isel mewn 60% oGÇÖr mannau gosod', 'low_energy_proportion': 0.6},
+ {'original_description': 'Goleuadau ynni-isel ym mhob un oGÇÖr mannau gosod', 'low_energy_proportion': 1},
]
diff --git a/model_data/tests/test_data/test_main_fuel_attributes_cases.py b/model_data/tests/test_data/test_main_fuel_attributes_cases.py
index 8a06c979..49502e88 100644
--- a/model_data/tests/test_data/test_main_fuel_attributes_cases.py
+++ b/model_data/tests/test_data/test_main_fuel_attributes_cases.py
@@ -60,5 +60,10 @@ mainfuel_cases = [
{'original_description': 'wood chips', 'fuel_type': 'wood chips', 'tariff_type': None, 'is_community': False,
'no_individual_heating_or_community_network': False, 'complex_fuel_type': None},
{'original_description': 'wood pellets', 'fuel_type': 'wood pellets', 'tariff_type': None, 'is_community': False,
- 'no_individual_heating_or_community_network': False, 'complex_fuel_type': None}
+ 'no_individual_heating_or_community_network': False, 'complex_fuel_type': None},
+ {'original_description': 'Solid fuel: dual fuel appliance (mineral and wood)',
+ 'fuel_type': 'dual fuel appliance mineral and wood',
+ 'tariff_type': None, 'is_community': False,
+ 'no_individual_heating_or_community_network': False, 'complex_fuel_type': None},
+
]
diff --git a/sapmodel.serverless.yml b/sapmodel.serverless.yml
index 77d9fc1f..d43609d4 100644
--- a/sapmodel.serverless.yml
+++ b/sapmodel.serverless.yml
@@ -58,4 +58,5 @@ functions:
- http:
path: /predict
method: POST
+ async: true # Enable async for long running tasks
timeout: 120 # Set max run time to 2 minutes - we shouldn't need this much time so this can be reviewed