diff --git a/model_data/BoreholeClient.py b/model_data/BoreholeClient.py index 39e281a6..9e669d86 100644 --- a/model_data/BoreholeClient.py +++ b/model_data/BoreholeClient.py @@ -1,7 +1,7 @@ import math from tqdm import tqdm from dbfread import DBF -from utils import setup_logger +from model_data.utils import setup_logger logger = setup_logger() diff --git a/model_data/EpcClean.py b/model_data/EpcClean.py index fab0fb3c..6b28591f 100644 --- a/model_data/EpcClean.py +++ b/model_data/EpcClean.py @@ -1,8 +1,14 @@ from typing import List, Dict, Any from collections import Counter -from model_data.epc_attributes.RoofAttributes import RoofAttributes from model_data.epc_attributes.FloorAttributes import FloorAttributes +from model_data.epc_attributes.HotWaterAttributes import HotWaterAttributes +from model_data.epc_attributes.MainFuelAttributes import MainFuelAttributes +from model_data.epc_attributes.MainheatAttributes import MainHeatAttributes +from model_data.epc_attributes.MainheatControlAttributes import MainheatControlAttributes +from model_data.epc_attributes.RoofAttributes import RoofAttributes +from model_data.epc_attributes.WallAttributes import WallAttributes +from model_data.epc_attributes.WindowAttributes import WindowAttributes class EpcClean: @@ -11,10 +17,14 @@ class EpcClean: """ CLEANING_FIELDS: List[str] = [ - "roof-description", "floor-description", + "hotwater-description", + "main-fuel", + "mainheat-description", + "main-heating-controls", + "roof-description", "walls-description", - "mainheat-description" + "windows-description", ] def __init__(self, data: List[Dict[str, Any]]) -> None: @@ -36,9 +46,14 @@ class EpcClean: for field in self.CLEANING_FIELDS: self.unique_vals[field] = Counter([v[field] for v in self.data]) - self.clean_wrapper(field="roof-description", cleaning_cls=RoofAttributes) - self.clean_wrapper(field="floor-description", cleaning_cls=FloorAttributes) + self.clean_wrapper(field="hotwater-description", cleaning_cls=HotWaterAttributes) + self.clean_wrapper(field="main-fuel", cleaning_cls=MainFuelAttributes) + self.clean_wrapper(field="mainheat-description", cleaning_cls=MainHeatAttributes) + self.clean_wrapper(field="main-heating-controls", cleaning_cls=MainheatControlAttributes) + self.clean_wrapper(field="roof-description", cleaning_cls=RoofAttributes) + self.clean_wrapper(field="walls-description", cleaning_cls=WallAttributes) + self.clean_wrapper(field="windows-description", cleaning_cls=WindowAttributes) def _init_empty_cleaned_obj(self) -> None: """ diff --git a/model_data/app.py b/model_data/app.py index 26584123..3e755555 100644 --- a/model_data/app.py +++ b/model_data/app.py @@ -1,7 +1,8 @@ import pandas as pd +from pprint import pprint from tqdm import tqdm import os -from BoreholeClient import BoreholeClient +from model_data.BoreholeClient import BoreholeClient from model_data.LandRegistryClient import LandRegistryClient from model_data.temp_inputs import input_data @@ -47,9 +48,14 @@ def handler(): ) open_uprn_client.read() + # What's going on here? + # We're using Ordinance Survey Open Uprn data + # to find the coordinates of each address, which we will then be able to use at a later stage for p in input_properties: p.get_coordinates(open_uprn_client) + pprint(input_properties[0].coordinates) + local_authorities = {p.data['local-authority'] for p in input_properties} data = [] @@ -67,6 +73,14 @@ def handler(): cleaner.clean() + # example cleaned data + # Why do we need this stuff? + # https://docs.google.com/spreadsheets/d/1ek9ItDv7xHwFm_FK6B0PyOBwvi6U4qRPuncBsVlCHUA/edit#gid=0 + cleaner.cleaned.keys() + floors = pd.DataFrame(cleaner.cleaned['floor-description']) + hotwater = pd.DataFrame(cleaner.cleaned['hotwater-description']) + mainheat = pd.DataFrame(cleaner.cleaned["mainheat-description"]) + address_meta = [ { "postcode": x["postcode"].upper(), @@ -100,7 +114,7 @@ def handler(): property = input_properties[0] # for each property, find the nearest borehole - + # This is just an example, looking at the distance from a property to a borehole dist_m, dist_km = borehole_client.distance_between_bng_coords( x1_bng=property.coordinates["x_coordinate"], y1_bng=property.coordinates["y_coordinate"], diff --git a/model_data/epc_attributes/MainheatControlAttributes.py b/model_data/epc_attributes/MainheatControlAttributes.py index 547fc853..4c997feb 100644 --- a/model_data/epc_attributes/MainheatControlAttributes.py +++ b/model_data/epc_attributes/MainheatControlAttributes.py @@ -67,25 +67,42 @@ class MainheatControlAttributes: def __init__(self, description: str): self.description: str = clean_description(description.lower()) + self.nodata = not description - if not any( - self._keyword_in_description(keywords) - for keywords in [ - self.THERMOSTATIC_CONTROL_KEYWORDS, - self.CHARGING_SYSTEM_KEYWORDS, - self.SWITCH_SYSTEM_KEYWORDS, - self.DHW_CONTROL_KEYWORDS, - self.COMMUNITY_HEATING_KEYWORDS, - self.TRVS_KEYWORDS, - self.NO_CONTROL_SYSTEM_KEYWORDS - ] - ): - raise ValueError('Invalid description') + if not self.nodata: + if not any( + self._keyword_in_description(keywords) + for keywords in [ + self.THERMOSTATIC_CONTROL_KEYWORDS, + self.CHARGING_SYSTEM_KEYWORDS, + self.SWITCH_SYSTEM_KEYWORDS, + self.DHW_CONTROL_KEYWORDS, + self.COMMUNITY_HEATING_KEYWORDS, + self.TRVS_KEYWORDS, + self.NO_CONTROL_SYSTEM_KEYWORDS + ] + ): + raise ValueError('Invalid description') def _keyword_in_description(self, keywords): return any(keyword in self.description for keyword in keywords) def process(self) -> Dict[str, Union[str, bool]]: + + if self.nodata: + result = { + "thermostatic_control": False, + "charging_system": False, + "switch_system": False, + "no_control": False, + "dhw_control": False, + "community_heating": False, + "multiple_room_thermostats": False, + "auxiliary_systems": False, + "trvs": False + } + return result + result: Dict[str, Union[str, bool]] = { "thermostatic_control": find_keyword(self.description, self.THERMOSTATIC_CONTROL_KEYWORDS), "charging_system": find_keyword(self.description, self.CHARGING_SYSTEM_KEYWORDS),