improving basic typing of EpcRecord

This commit is contained in:
Khalim Conn-Kowlessar 2026-03-06 14:42:43 +00:00
parent fb2a69faff
commit 8f0cd7f98c

View file

@ -1,4 +1,4 @@
from typing import Optional, get_origin, get_args, TypedDict, Dict
from typing import Optional, get_origin, get_args, TypedDict, cast, TypeAlias
from dataclasses import fields
from datetime import datetime
from dataclasses import dataclass
@ -45,11 +45,15 @@ DATA_BUCKET = os.environ.get(
pd.set_option("future.no_silent_downcasting", True)
RawEpcRow: TypeAlias = dict[str, str | None]
PreparedEpcValue: TypeAlias = str | int | float | bool | None
PreparedEpcRow: TypeAlias = dict[str, PreparedEpcValue]
class InputEpcRecords(TypedDict):
original_epc: Dict[str, Any]
full_sap_epc: Dict[str, Any]
old_data: List[Dict[str, Any]]
original_epc: RawEpcRow
full_sap_epc: RawEpcRow
old_data: list[RawEpcRow]
@dataclass
@ -231,22 +235,33 @@ class EPCRecord:
run_mode: str = "training"
# ------------------------------------------------------------------
# INPUT DATA STRUCTURES
# ------------------------------------------------------------------
epc_records: Optional[InputEpcRecords] = None
full_sap_epc: Optional[dict] = None
old_data: list[dict] = None
original_epc: Optional[dict] = None
prepared_epc: Optional[dict] = None
# Raw EPC input (immutable)
original_epc: Optional[RawEpcRow] = None
# Working dictionary that gets cleaned
prepared_epc: Optional[PreparedEpcRow] = None
# Supporting
full_sap_epc: Optional[RawEpcRow] = None
old_data: Optional[list[RawEpcRow]] = None
# # Metadata generated during processing
prepared_epc_delta_metadata: pd.DataFrame = None
cleaning_data: pd.DataFrame = None
# Not used in training mod but used in newdata mode
age_band: str = None
construction_age_band: str = None
year_built: int = None
number_of_floors: int = None
number_of_open_fireplaces: int = None
heat_loss_corridor_bool: bool = None
solar_water_heating_flag_bool: bool = None
age_band: Optional[str] = None
construction_age_band: Optional[str] = None
year_built: Optional[int] = None
number_of_floors: Optional[int] = None
number_of_open_fireplaces: Optional[int] = None
heat_loss_corridor_bool: Optional[bool] = None
solar_water_heating_flag_bool: Optional[bool] = None
def __post_init__(self):
# We can have validation and cleaning steps for each of the fields
@ -255,15 +270,18 @@ class EPCRecord:
if self.run_mode == "training":
self.validation_configuration = EPCRecordValidationConfiguration
# self._field_validation()
return
# We are running in newdata mode
if self.epc_records is None:
raise ValueError("Must provide epc records if running in newdata mode")
self.prepared_epc = self.epc_records["original_epc"]
# Immutable copy; raw record
self.original_epc = self.epc_records["original_epc"].copy()
# Working copy that we will clean and manipulate
self.prepared_epc = self.epc_records["original_epc"].copy()
self.full_sap_epc = self.epc_records["full_sap_epc"]
self.old_data = self.epc_records["old_data"]
@ -299,9 +317,12 @@ class EPCRecord:
)
epc_data_processor.prepare_data()
self.prepared_epc = epc_data_processor.data.to_dict(orient="records")[0]
record = epc_data_processor.data.to_dict(orient="records")[0]
def _cast_value(self, value, type_hint):
self.prepared_epc = cast(RawEpcRow, record)
@staticmethod
def _cast_value(value, type_hint):
origin = get_origin(type_hint)
args = get_args(type_hint)
@ -396,14 +417,6 @@ class EPCRecord:
self._clean_constituency()
self._clean_new_build_descriptions()
# self._clean_potential_energy_efficiency()
# self._clean_environment_impact_potential()
# self._clean_energy_consumption_potential()
# self._clean_co2_emissions_potential()
# self._clean_current_energy_efficiency()
# self._clean_energy_consumption_current()
# self._clean_co2_emissions_current()
def epc_record_as_dataframe(
self,
epc_type: str = "prepared_epc",
@ -524,9 +537,7 @@ class EPCRecord:
cleaned_property_data["FIXED_LIGHTING_OUTLETS_COUNT"].values[0]
)
else:
self.prepared_epc["fixed-lighting-outlets-count"] = float(
self.prepared_epc["fixed-lighting-outlets-count"]
)
self.prepared_epc["fixed-lighting-outlets-count"] = float(self.prepared_epc["fixed-lighting-outlets-count"])
def _filter_property_dimensions(self, property_dimensions):
"""
@ -604,15 +615,6 @@ class EPCRecord:
self.prepared_epc["property-type"]
)
# if self.prepared_epc["property-type"] == "House":
# self.number_of_floors = 2
# elif self.prepared_epc["property-type"] in ["Flat", "Bungalow"]:
# self.number_of_floors = 1
# elif self.prepared_epc["property-type"] == "Maisonette":
# self.number_of_floors = 2
# else:
# raise NotImplementedError("Implement me")
if (
self.prepared_epc["floor-height"] == ""
or self.prepared_epc["floor-height"] in DATA_ANOMALY_MATCHES
@ -859,9 +861,12 @@ class EPCRecord:
This method will clean the year built, if empty or invalid
"""
if self.full_sap_epc:
self.year_built = datetime.strptime(
self.full_sap_epc["lodgement-date"], "%Y-%m-%d"
).year
lodgement_date = self.full_sap_epc["lodgement-date"]
if lodgement_date is None:
raise ValueError("full_sap_epc lodgement-date is missing")
self.year_built = datetime.strptime(str(lodgement_date), "%Y-%m-%d").year
return