got pipeline working for new properties

This commit is contained in:
Khalim Conn-Kowlessar 2023-10-09 14:07:52 +08:00
parent c502fd8370
commit f551fda186
7 changed files with 36 additions and 5 deletions

View file

@ -43,7 +43,9 @@ class Definitions:
# contained within the first of these multiple entries is being provided. As there are no restrictions on the
# value in this first field it means that sometimes the first field in a multiple entry description field may
# contain a null value. A resolution to correct these anomalies will be considered for future data releases.
"NULL"
"NULL",
# We sometimes see fields populated with just an empty string.
""
}
DATA_ANOMALY_SUBSTRINGS = {

View file

@ -4,6 +4,7 @@ import os
import pandas as pd
from etl.epc.DataProcessor import DataProcessor
from etl.epc_clean.epc_attributes.all_cleaners import all_cleaner_map
from utils.logger import setup_logger
from utils.s3 import read_dataframe_from_s3_parquet
from epc_api.client import EpcClient
@ -286,8 +287,14 @@ class Property(Definitions):
attributes = [
x for x in cleaned[description] if x["original_description"] == self.data[description]
]
if len(attributes) != 1:
if len(attributes) > 1:
raise ValueError("Either No attributes or multiple found for %s" % description)
if len(attributes) == 0:
# We attempt to perform the clean on the fly
cleaner_cls = all_cleaner_map[description]
attributes = [cleaner_cls(self.data[description]).process()]
setattr(self, self.ATTRIBUTE_MAP[description], attributes[0])
self.set_wall_type()

View file

@ -131,7 +131,7 @@ async def trigger_plan(body: PlanTriggerRequest):
# Floor recommendations
floor_recommender = FloorRecommendations(
property_instance=p,
materials=materials_by_type["suspended_floor_insulation"] + materials_by_type["solid_floor_insulation"],
materials=materials_by_type["floor"],
)
floor_recommender.recommend()
@ -142,7 +142,7 @@ async def trigger_plan(body: PlanTriggerRequest):
wall_recomender = WallRecommendations(
property_instance=p,
materials=materials_by_type["external_wall_insulation"] + materials_by_type["internal_wall_insulation"]
materials=materials_by_type["walls"]
)
wall_recomender.recommend()

View file

@ -34,4 +34,5 @@ pytz==2023.3
mip==1.15.0
boto3==1.28.3
pandas==1.5.3
pyarrow==12.0.1
pyarrow==12.0.1
textblob

0
etl/__init__.py Normal file
View file

View file

View file

@ -0,0 +1,21 @@
from etl.epc_clean.epc_attributes.FloorAttributes import FloorAttributes
from etl.epc_clean.epc_attributes.HotWaterAttributes import HotWaterAttributes
from etl.epc_clean.epc_attributes.MainFuelAttributes import MainFuelAttributes
from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes
from etl.epc_clean.epc_attributes.MainheatControlAttributes import MainheatControlAttributes
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes
from etl.epc_clean.epc_attributes.WindowAttributes import WindowAttributes
from etl.epc_clean.epc_attributes.LightingAttributes import LightingAttributes
all_cleaner_map = {
'floor-description': FloorAttributes,
'hotwater-description': HotWaterAttributes,
'main-fuel': MainFuelAttributes,
'mainheat-description': MainHeatAttributes,
'mainheatcont-description': MainheatControlAttributes,
'roof-description': RoofAttributes,
'walls-description': WallAttributes,
'windows-description': WindowAttributes,
'lighting-description:': LightingAttributes,
}