tidy up for demo

This commit is contained in:
Khalim Conn-Kowlessar 2023-06-19 10:58:56 +01:00
parent b75fedf3ac
commit dd5b8db1d5
4 changed files with 67 additions and 21 deletions

View file

@ -1,7 +1,7 @@
import math
from tqdm import tqdm
from dbfread import DBF
from utils import setup_logger
from model_data.utils import setup_logger
logger = setup_logger()

View file

@ -1,8 +1,14 @@
from typing import List, Dict, Any
from collections import Counter
from model_data.epc_attributes.RoofAttributes import RoofAttributes
from model_data.epc_attributes.FloorAttributes import FloorAttributes
from model_data.epc_attributes.HotWaterAttributes import HotWaterAttributes
from model_data.epc_attributes.MainFuelAttributes import MainFuelAttributes
from model_data.epc_attributes.MainheatAttributes import MainHeatAttributes
from model_data.epc_attributes.MainheatControlAttributes import MainheatControlAttributes
from model_data.epc_attributes.RoofAttributes import RoofAttributes
from model_data.epc_attributes.WallAttributes import WallAttributes
from model_data.epc_attributes.WindowAttributes import WindowAttributes
class EpcClean:
@ -11,10 +17,14 @@ class EpcClean:
"""
CLEANING_FIELDS: List[str] = [
"roof-description",
"floor-description",
"hotwater-description",
"main-fuel",
"mainheat-description",
"main-heating-controls",
"roof-description",
"walls-description",
"mainheat-description"
"windows-description",
]
def __init__(self, data: List[Dict[str, Any]]) -> None:
@ -36,9 +46,14 @@ class EpcClean:
for field in self.CLEANING_FIELDS:
self.unique_vals[field] = Counter([v[field] for v in self.data])
self.clean_wrapper(field="roof-description", cleaning_cls=RoofAttributes)
self.clean_wrapper(field="floor-description", cleaning_cls=FloorAttributes)
self.clean_wrapper(field="hotwater-description", cleaning_cls=HotWaterAttributes)
self.clean_wrapper(field="main-fuel", cleaning_cls=MainFuelAttributes)
self.clean_wrapper(field="mainheat-description", cleaning_cls=MainHeatAttributes)
self.clean_wrapper(field="main-heating-controls", cleaning_cls=MainheatControlAttributes)
self.clean_wrapper(field="roof-description", cleaning_cls=RoofAttributes)
self.clean_wrapper(field="walls-description", cleaning_cls=WallAttributes)
self.clean_wrapper(field="windows-description", cleaning_cls=WindowAttributes)
def _init_empty_cleaned_obj(self) -> None:
"""

View file

@ -1,7 +1,8 @@
import pandas as pd
from pprint import pprint
from tqdm import tqdm
import os
from BoreholeClient import BoreholeClient
from model_data.BoreholeClient import BoreholeClient
from model_data.LandRegistryClient import LandRegistryClient
from model_data.temp_inputs import input_data
@ -47,9 +48,14 @@ def handler():
)
open_uprn_client.read()
# What's going on here?
# We're using Ordinance Survey Open Uprn data
# to find the coordinates of each address, which we will then be able to use at a later stage
for p in input_properties:
p.get_coordinates(open_uprn_client)
pprint(input_properties[0].coordinates)
local_authorities = {p.data['local-authority'] for p in input_properties}
data = []
@ -67,6 +73,14 @@ def handler():
cleaner.clean()
# example cleaned data
# Why do we need this stuff?
# https://docs.google.com/spreadsheets/d/1ek9ItDv7xHwFm_FK6B0PyOBwvi6U4qRPuncBsVlCHUA/edit#gid=0
cleaner.cleaned.keys()
floors = pd.DataFrame(cleaner.cleaned['floor-description'])
hotwater = pd.DataFrame(cleaner.cleaned['hotwater-description'])
mainheat = pd.DataFrame(cleaner.cleaned["mainheat-description"])
address_meta = [
{
"postcode": x["postcode"].upper(),
@ -100,7 +114,7 @@ def handler():
property = input_properties[0]
# for each property, find the nearest borehole
# This is just an example, looking at the distance from a property to a borehole
dist_m, dist_km = borehole_client.distance_between_bng_coords(
x1_bng=property.coordinates["x_coordinate"],
y1_bng=property.coordinates["y_coordinate"],

View file

@ -67,25 +67,42 @@ class MainheatControlAttributes:
def __init__(self, description: str):
self.description: str = clean_description(description.lower())
self.nodata = not description
if not any(
self._keyword_in_description(keywords)
for keywords in [
self.THERMOSTATIC_CONTROL_KEYWORDS,
self.CHARGING_SYSTEM_KEYWORDS,
self.SWITCH_SYSTEM_KEYWORDS,
self.DHW_CONTROL_KEYWORDS,
self.COMMUNITY_HEATING_KEYWORDS,
self.TRVS_KEYWORDS,
self.NO_CONTROL_SYSTEM_KEYWORDS
]
):
raise ValueError('Invalid description')
if not self.nodata:
if not any(
self._keyword_in_description(keywords)
for keywords in [
self.THERMOSTATIC_CONTROL_KEYWORDS,
self.CHARGING_SYSTEM_KEYWORDS,
self.SWITCH_SYSTEM_KEYWORDS,
self.DHW_CONTROL_KEYWORDS,
self.COMMUNITY_HEATING_KEYWORDS,
self.TRVS_KEYWORDS,
self.NO_CONTROL_SYSTEM_KEYWORDS
]
):
raise ValueError('Invalid description')
def _keyword_in_description(self, keywords):
return any(keyword in self.description for keyword in keywords)
def process(self) -> Dict[str, Union[str, bool]]:
if self.nodata:
result = {
"thermostatic_control": False,
"charging_system": False,
"switch_system": False,
"no_control": False,
"dhw_control": False,
"community_heating": False,
"multiple_room_thermostats": False,
"auxiliary_systems": False,
"trvs": False
}
return result
result: Dict[str, Union[str, bool]] = {
"thermostatic_control": find_keyword(self.description, self.THERMOSTATIC_CONTROL_KEYWORDS),
"charging_system": find_keyword(self.description, self.CHARGING_SYSTEM_KEYWORDS),