tidy up for demo

This commit is contained in:
Khalim Conn-Kowlessar 2023-06-19 10:58:56 +01:00
parent b75fedf3ac
commit dd5b8db1d5
4 changed files with 67 additions and 21 deletions

View file

@ -1,7 +1,7 @@
import math import math
from tqdm import tqdm from tqdm import tqdm
from dbfread import DBF from dbfread import DBF
from utils import setup_logger from model_data.utils import setup_logger
logger = setup_logger() logger = setup_logger()

View file

@ -1,8 +1,14 @@
from typing import List, Dict, Any from typing import List, Dict, Any
from collections import Counter from collections import Counter
from model_data.epc_attributes.RoofAttributes import RoofAttributes
from model_data.epc_attributes.FloorAttributes import FloorAttributes from model_data.epc_attributes.FloorAttributes import FloorAttributes
from model_data.epc_attributes.HotWaterAttributes import HotWaterAttributes
from model_data.epc_attributes.MainFuelAttributes import MainFuelAttributes
from model_data.epc_attributes.MainheatAttributes import MainHeatAttributes
from model_data.epc_attributes.MainheatControlAttributes import MainheatControlAttributes
from model_data.epc_attributes.RoofAttributes import RoofAttributes
from model_data.epc_attributes.WallAttributes import WallAttributes
from model_data.epc_attributes.WindowAttributes import WindowAttributes
class EpcClean: class EpcClean:
@ -11,10 +17,14 @@ class EpcClean:
""" """
CLEANING_FIELDS: List[str] = [ CLEANING_FIELDS: List[str] = [
"roof-description",
"floor-description", "floor-description",
"hotwater-description",
"main-fuel",
"mainheat-description",
"main-heating-controls",
"roof-description",
"walls-description", "walls-description",
"mainheat-description" "windows-description",
] ]
def __init__(self, data: List[Dict[str, Any]]) -> None: def __init__(self, data: List[Dict[str, Any]]) -> None:
@ -36,9 +46,14 @@ class EpcClean:
for field in self.CLEANING_FIELDS: for field in self.CLEANING_FIELDS:
self.unique_vals[field] = Counter([v[field] for v in self.data]) self.unique_vals[field] = Counter([v[field] for v in self.data])
self.clean_wrapper(field="roof-description", cleaning_cls=RoofAttributes)
self.clean_wrapper(field="floor-description", cleaning_cls=FloorAttributes) self.clean_wrapper(field="floor-description", cleaning_cls=FloorAttributes)
self.clean_wrapper(field="hotwater-description", cleaning_cls=HotWaterAttributes)
self.clean_wrapper(field="main-fuel", cleaning_cls=MainFuelAttributes)
self.clean_wrapper(field="mainheat-description", cleaning_cls=MainHeatAttributes)
self.clean_wrapper(field="main-heating-controls", cleaning_cls=MainheatControlAttributes)
self.clean_wrapper(field="roof-description", cleaning_cls=RoofAttributes)
self.clean_wrapper(field="walls-description", cleaning_cls=WallAttributes)
self.clean_wrapper(field="windows-description", cleaning_cls=WindowAttributes)
def _init_empty_cleaned_obj(self) -> None: def _init_empty_cleaned_obj(self) -> None:
""" """

View file

@ -1,7 +1,8 @@
import pandas as pd import pandas as pd
from pprint import pprint
from tqdm import tqdm from tqdm import tqdm
import os import os
from BoreholeClient import BoreholeClient from model_data.BoreholeClient import BoreholeClient
from model_data.LandRegistryClient import LandRegistryClient from model_data.LandRegistryClient import LandRegistryClient
from model_data.temp_inputs import input_data from model_data.temp_inputs import input_data
@ -47,9 +48,14 @@ def handler():
) )
open_uprn_client.read() open_uprn_client.read()
# What's going on here?
# We're using Ordinance Survey Open Uprn data
# to find the coordinates of each address, which we will then be able to use at a later stage
for p in input_properties: for p in input_properties:
p.get_coordinates(open_uprn_client) p.get_coordinates(open_uprn_client)
pprint(input_properties[0].coordinates)
local_authorities = {p.data['local-authority'] for p in input_properties} local_authorities = {p.data['local-authority'] for p in input_properties}
data = [] data = []
@ -67,6 +73,14 @@ def handler():
cleaner.clean() cleaner.clean()
# example cleaned data
# Why do we need this stuff?
# https://docs.google.com/spreadsheets/d/1ek9ItDv7xHwFm_FK6B0PyOBwvi6U4qRPuncBsVlCHUA/edit#gid=0
cleaner.cleaned.keys()
floors = pd.DataFrame(cleaner.cleaned['floor-description'])
hotwater = pd.DataFrame(cleaner.cleaned['hotwater-description'])
mainheat = pd.DataFrame(cleaner.cleaned["mainheat-description"])
address_meta = [ address_meta = [
{ {
"postcode": x["postcode"].upper(), "postcode": x["postcode"].upper(),
@ -100,7 +114,7 @@ def handler():
property = input_properties[0] property = input_properties[0]
# for each property, find the nearest borehole # for each property, find the nearest borehole
# This is just an example, looking at the distance from a property to a borehole
dist_m, dist_km = borehole_client.distance_between_bng_coords( dist_m, dist_km = borehole_client.distance_between_bng_coords(
x1_bng=property.coordinates["x_coordinate"], x1_bng=property.coordinates["x_coordinate"],
y1_bng=property.coordinates["y_coordinate"], y1_bng=property.coordinates["y_coordinate"],

View file

@ -67,25 +67,42 @@ class MainheatControlAttributes:
def __init__(self, description: str): def __init__(self, description: str):
self.description: str = clean_description(description.lower()) self.description: str = clean_description(description.lower())
self.nodata = not description
if not any( if not self.nodata:
self._keyword_in_description(keywords) if not any(
for keywords in [ self._keyword_in_description(keywords)
self.THERMOSTATIC_CONTROL_KEYWORDS, for keywords in [
self.CHARGING_SYSTEM_KEYWORDS, self.THERMOSTATIC_CONTROL_KEYWORDS,
self.SWITCH_SYSTEM_KEYWORDS, self.CHARGING_SYSTEM_KEYWORDS,
self.DHW_CONTROL_KEYWORDS, self.SWITCH_SYSTEM_KEYWORDS,
self.COMMUNITY_HEATING_KEYWORDS, self.DHW_CONTROL_KEYWORDS,
self.TRVS_KEYWORDS, self.COMMUNITY_HEATING_KEYWORDS,
self.NO_CONTROL_SYSTEM_KEYWORDS self.TRVS_KEYWORDS,
] self.NO_CONTROL_SYSTEM_KEYWORDS
): ]
raise ValueError('Invalid description') ):
raise ValueError('Invalid description')
def _keyword_in_description(self, keywords): def _keyword_in_description(self, keywords):
return any(keyword in self.description for keyword in keywords) return any(keyword in self.description for keyword in keywords)
def process(self) -> Dict[str, Union[str, bool]]: def process(self) -> Dict[str, Union[str, bool]]:
if self.nodata:
result = {
"thermostatic_control": False,
"charging_system": False,
"switch_system": False,
"no_control": False,
"dhw_control": False,
"community_heating": False,
"multiple_room_thermostats": False,
"auxiliary_systems": False,
"trvs": False
}
return result
result: Dict[str, Union[str, bool]] = { result: Dict[str, Union[str, bool]] = {
"thermostatic_control": find_keyword(self.description, self.THERMOSTATIC_CONTROL_KEYWORDS), "thermostatic_control": find_keyword(self.description, self.THERMOSTATIC_CONTROL_KEYWORDS),
"charging_system": find_keyword(self.description, self.CHARGING_SYSTEM_KEYWORDS), "charging_system": find_keyword(self.description, self.CHARGING_SYSTEM_KEYWORDS),