mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Added anomaly handling into classes by introducing the BaseUtility class
This commit is contained in:
parent
2c17b983d0
commit
9e61f3c8fb
4 changed files with 78 additions and 18 deletions
54
model_data/BaseUtility.py
Normal file
54
model_data/BaseUtility.py
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
class BaseUtility:
|
||||
"""
|
||||
This class contains some base attributes which are used across multiple other classes
|
||||
"""
|
||||
|
||||
# Anomalies described here: https://epc.opendatacommunities.org/docs/guidance#glossary
|
||||
DATA_ANOMALY_MATCHES = {
|
||||
# Invalid reports are where the value provided is out of bounds, e.g. a negative energy rating of -1199 or a
|
||||
# non-integer, there is no valid energy band for this, so it is marked as INVALID!
|
||||
"INVALID",
|
||||
# When the energy certificate was first lodged on the register there was no requirement to lodge this data
|
||||
# item, i.e. a non-mandatory item.
|
||||
"NO DATA!",
|
||||
# When the energy certificate was first lodged on the register there was no requirement to lodge this data item,
|
||||
# i.e.a non - mandatory item.
|
||||
"N/A",
|
||||
# A value generated by the register to account for a data item that was not mandatory when the lodgement of
|
||||
# the energy certificate occurred. When the data item became mandatory the register operator, for backwards
|
||||
# compatibility purposes, populated the data field with a value of ‘not recorded’ to ensure that the energy
|
||||
# certificate retrieval process is successfully completed. Mandatory data items cannot be applied
|
||||
# retrospectively to energy certificates lodged before the date of the change.
|
||||
"Not recorded",
|
||||
# The data also contains DECs with an operational rating of ‘9999’ (a ‘default’ DEC). The production of a
|
||||
# ‘default’ DEC value was allowed to enable building occupiers, with poor quality or no energy data,
|
||||
# the opportunity to comply with the regulations. From April 2011 the ability to lodge a ‘default’ DEC was no
|
||||
# longer allowed.
|
||||
"9999",
|
||||
# The Building Emission Rate (BER) data field for non-domestic buildings may contain a ‘blank’ value. The BER
|
||||
# was only lodged on the register from 7 March 2010.
|
||||
"Blank"
|
||||
# There are currently just over 8,600 records where the local authority identifier is ‘null’. This is due to
|
||||
# the Register Operator not being able to match the building address in the Markermap Ordinance Survey (GB)
|
||||
# lookup tables or OS MasterMap Address Layer 2 data. The majority of these addresses have been requested
|
||||
# manually by energy assessors for inclusion by the Register Operator in the registers (e.g. new builds,
|
||||
# etc). These records are being published for completeness. An ongoing process to manage these manually added
|
||||
# addresses will take time to develop to deal with these and future anomalies.
|
||||
#
|
||||
# There are several fields within the lodged data where it is possible to enter multiple entries to cater for
|
||||
# different types of build within a single property, i.e. extensions. This results in multiple entries for
|
||||
# the description fields for floor, roof and wall. For the purposes of this data release only the information
|
||||
# contained within the first of these multiple entries is being provided. As there are no restrictions on the
|
||||
# value in this first field it means that sometimes the first field in a multiple entry description field may
|
||||
# contain a ‘null’ value. A resolution to correct these anomalies will be considered for future data releases.
|
||||
"NULL"
|
||||
}
|
||||
|
||||
DATA_ANOMALY_SUBSTRINGS = {
|
||||
# Where values in a ‘pick’ list that have been superseded by another value. For example, where a value for
|
||||
# ‘pitched roof’ has been replaced by three sub-categories of pitched roof. The original value is retained
|
||||
# but ‘for backward compatibility only’ it is appended to ensure that the energy certificate retrieval
|
||||
# process can be successfully completed. Replacement data items cannot be applied retrospectively to energy
|
||||
# certificates lodged on the register before the date of the change.
|
||||
"for backward compatibility only"
|
||||
}
|
||||
|
|
@ -2,9 +2,10 @@ from epc_api.client import EpcClient
|
|||
from model_data.config import EPC_AUTH_TOKEN
|
||||
from model_data.OpenUprnClient import OpenUprnClient
|
||||
from model_data.EpcClean import EpcClean
|
||||
from model_data.BaseUtility import BaseUtility
|
||||
|
||||
|
||||
class Property:
|
||||
class Property(BaseUtility):
|
||||
ATTRIBUTE_MAP = {
|
||||
"floor-description": "floor",
|
||||
"hotwater-description": "hotwater",
|
||||
|
|
@ -91,6 +92,10 @@ class Property:
|
|||
|
||||
for description, attribute in cleaner.cleaned.items():
|
||||
|
||||
if self.data[description] in self.DATA_ANOMALY_MATCHES:
|
||||
setattr(self, self.ATTRIBUTE_MAP[description], {"original_description": self.data[description]})
|
||||
continue
|
||||
|
||||
attributes = [
|
||||
x for x in cleaner.cleaned[description] if x["original_description"] == self.data[description]
|
||||
]
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
import pandas as pd
|
||||
from pprint import pprint
|
||||
from tqdm import tqdm
|
||||
import os
|
||||
from model_data.BoreholeClient import BoreholeClient
|
||||
|
|
@ -54,8 +52,6 @@ def handler():
|
|||
for p in input_properties:
|
||||
p.get_coordinates(open_uprn_client)
|
||||
|
||||
pprint(input_properties[0].coordinates)
|
||||
|
||||
local_authorities = {p.data['local-authority'] for p in input_properties}
|
||||
|
||||
data = []
|
||||
|
|
@ -69,19 +65,10 @@ def handler():
|
|||
)
|
||||
)
|
||||
|
||||
cleaner = EpcClean(data)
|
||||
|
||||
# Incorporate input data into cleaning
|
||||
cleaner = EpcClean(data + [p.data for p in input_properties])
|
||||
cleaner.clean()
|
||||
|
||||
# example cleaned data
|
||||
# Why do we need this stuff?
|
||||
# https://docs.google.com/spreadsheets/d/1ek9ItDv7xHwFm_FK6B0PyOBwvi6U4qRPuncBsVlCHUA/edit#gid=0
|
||||
cleaner.cleaned.keys()
|
||||
floors = pd.DataFrame(cleaner.cleaned['floor-description'])
|
||||
walls = pd.DataFrame(cleaner.cleaned['walls-description'])
|
||||
hotwater = pd.DataFrame(cleaner.cleaned['hotwater-description'])
|
||||
mainheat = pd.DataFrame(cleaner.cleaned["mainheat-description"])
|
||||
|
||||
address_meta = [
|
||||
{
|
||||
"postcode": x["postcode"].upper(),
|
||||
|
|
@ -110,3 +97,5 @@ def handler():
|
|||
# on the cleaning we've done
|
||||
for p in input_properties:
|
||||
p.get_components(cleaner)
|
||||
|
||||
# Now, given the components, we want to idenfity upgrade options
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
from typing import Dict, Union
|
||||
from model_data.BaseUtility import BaseUtility
|
||||
from model_data.epc_attributes.attribute_utils import clean_description, remove_punctuation, find_keyword
|
||||
|
||||
|
||||
class MainFuelAttributes:
|
||||
class MainFuelAttributes(BaseUtility):
|
||||
FUEL_KEYWORDS = [
|
||||
'heat network',
|
||||
'mains gas',
|
||||
|
|
@ -42,7 +43,7 @@ class MainFuelAttributes:
|
|||
|
||||
self.is_community = 'community' in self.description and 'not community' not in self.description
|
||||
self.is_unknown = False
|
||||
self.nodata = not description
|
||||
self.nodata = not description or description in self.DATA_ANOMALY_MATCHES
|
||||
|
||||
if not self.nodata and not any(
|
||||
self._keyword_in_description(keywords)
|
||||
|
|
@ -59,6 +60,17 @@ class MainFuelAttributes:
|
|||
return any(keyword in self.description for keyword in keywords)
|
||||
|
||||
def process(self) -> Dict[str, Union[str, bool]]:
|
||||
|
||||
if self.nodata:
|
||||
result = {
|
||||
"fuel_type": None,
|
||||
"tariff_type": None,
|
||||
"is_community": False,
|
||||
"no_individual_heating_or_community_network": False,
|
||||
"complex_fuel_type": False
|
||||
}
|
||||
return result
|
||||
|
||||
result: Dict[str, Union[str, bool]] = {
|
||||
"fuel_type": find_keyword(self.description, self.FUEL_KEYWORDS),
|
||||
"tariff_type": find_keyword(self.description, self.TARIFF_KEYWORDS),
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue