mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
modified definitions class though it should be depracated
This commit is contained in:
parent
40f5eba3d0
commit
e8f99b6c14
3 changed files with 9 additions and 49 deletions
|
|
@ -1,54 +1,14 @@
|
|||
from etl.epc.settings import DATA_ANOMALY_MATCHES as data_anon_matches
|
||||
from etl.epc.settings import DATA_ANOMALY_MATCHES as data_anon_matches
|
||||
|
||||
|
||||
class Definitions:
|
||||
"""
|
||||
This class contains some base attributes which are used across multiple other classes
|
||||
"""
|
||||
|
||||
# Anomalies described here: https://epc.opendatacommunities.org/docs/guidance#glossary
|
||||
DATA_ANOMALY_MATCHES = {
|
||||
# Invalid reports are where the value provided is out of bounds, e.g. a negative energy rating of -1199 or a
|
||||
# non-integer, there is no valid energy band for this, so it is marked as INVALID!
|
||||
"INVALID",
|
||||
"INVALID!",
|
||||
# When the energy certificate was first lodged on the register there was no requirement to lodge this data
|
||||
# item, i.e. a non-mandatory item.
|
||||
"NO DATA!",
|
||||
"NODATA!",
|
||||
# When the energy certificate was first lodged on the register there was no requirement to lodge this data item,
|
||||
# i.e.a non - mandatory item.
|
||||
"N/A",
|
||||
# A value generated by the register to account for a data item that was not mandatory when the lodgement of
|
||||
# the energy certificate occurred. When the data item became mandatory the register operator, for backwards
|
||||
# compatibility purposes, populated the data field with a value of ‘not recorded’ to ensure that the energy
|
||||
# certificate retrieval process is successfully completed. Mandatory data items cannot be applied
|
||||
# retrospectively to energy certificates lodged before the date of the change.
|
||||
"Not recorded",
|
||||
# The data also contains DECs with an operational rating of ‘9999’ (a ‘default’ DEC). The production of a
|
||||
# ‘default’ DEC value was allowed to enable building occupiers, with poor quality or no energy data,
|
||||
# the opportunity to comply with the regulations. From April 2011 the ability to lodge a ‘default’ DEC was no
|
||||
# longer allowed.
|
||||
"9999",
|
||||
# The Building Emission Rate (BER) data field for non-domestic buildings may contain a ‘blank’ value. The BER
|
||||
# was only lodged on the register from 7 March 2010.
|
||||
"Blank"
|
||||
# There are currently just over 8,600 records where the local authority identifier is ‘null’. This is due to
|
||||
# the Register Operator not being able to match the building address in the Markermap Ordinance Survey (GB)
|
||||
# lookup tables or OS MasterMap Address Layer 2 data. The majority of these addresses have been requested
|
||||
# manually by energy assessors for inclusion by the Register Operator in the registers (e.g. new builds,
|
||||
# etc). These records are being published for completeness. An ongoing process to manage these manually added
|
||||
# addresses will take time to develop to deal with these and future anomalies.
|
||||
#
|
||||
# There are several fields within the lodged data where it is possible to enter multiple entries to cater for
|
||||
# different data_types of build within a single property, i.e. extensions. This results in multiple entries for
|
||||
# the description fields for floor, roof and wall. For the purposes of this data release only the information
|
||||
# contained within the first of these multiple entries is being provided. As there are no restrictions on the
|
||||
# value in this first field it means that sometimes the first field in a multiple entry description field may
|
||||
# contain a ‘null’ value. A resolution to correct these anomalies will be considered for future data releases.
|
||||
"NULL",
|
||||
# We sometimes see fields populated with just an empty string.
|
||||
"",
|
||||
# An older value which rarely shows up but has been seen in the data.
|
||||
"UNKNOWN",
|
||||
}
|
||||
DATA_ANOMALY_MATCHES = data_anon_matches
|
||||
|
||||
DATA_ANOMALY_SUBSTRINGS = {
|
||||
# Where values in a ‘pick’ list that have been superseded by another value. For example, where a value for
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
from pathlib import Path
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from BaseUtility import Definitions
|
||||
from etl.epc.settings import (
|
||||
DATA_PROCESSOR_SETTINGS,
|
||||
EARLIEST_EPC_DATE,
|
||||
|
|
@ -22,6 +21,7 @@ from etl.epc.settings import (
|
|||
ENDING_SUFFIX_COMPONENT_COLS,
|
||||
POTENTIAL_COLUMNS,
|
||||
EFFICIENCY_FEATURES,
|
||||
DATA_ANOMALY_MATCHES
|
||||
)
|
||||
from recommendations.rdsap_tables import FLOOR_LEVEL_MAP
|
||||
|
||||
|
|
@ -247,8 +247,7 @@ class EPCDataProcessor:
|
|||
# Map all anomaly values to None
|
||||
data_anomaly_map = dict(
|
||||
zip(
|
||||
Definitions.DATA_ANOMALY_MATCHES,
|
||||
[None] * len(Definitions.DATA_ANOMALY_MATCHES),
|
||||
DATA_ANOMALY_MATCHES, [None] * len(DATA_ANOMALY_MATCHES),
|
||||
)
|
||||
)
|
||||
|
||||
|
|
@ -283,7 +282,7 @@ class EPCDataProcessor:
|
|||
@staticmethod
|
||||
def clean_construction_age_band(x):
|
||||
# Firstly, we check if it's an error value
|
||||
if x in Definitions.DATA_ANOMALY_MATCHES or x in [None, np.nan]:
|
||||
if x in DATA_ANOMALY_MATCHES or x in [None, np.nan]:
|
||||
return x
|
||||
|
||||
# Next, we check if it's a value in our map
|
||||
|
|
|
|||
|
|
@ -128,6 +128,7 @@ class MainheatControlAttributes(Definitions):
|
|||
]
|
||||
|
||||
def __init__(self, description: str):
|
||||
|
||||
self.description: str = clean_description(description.lower()).strip()
|
||||
self.nodata = not self.description or description in self.DATA_ANOMALY_MATCHES or (
|
||||
description in self.NO_DATA_DESCRIPTIONS
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue