mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
improving basic typing of EpcRecord
This commit is contained in:
parent
fb2a69faff
commit
8f0cd7f98c
1 changed files with 47 additions and 42 deletions
|
|
@ -1,4 +1,4 @@
|
|||
from typing import Optional, get_origin, get_args, TypedDict, Dict
|
||||
from typing import Optional, get_origin, get_args, TypedDict, cast, TypeAlias
|
||||
from dataclasses import fields
|
||||
from datetime import datetime
|
||||
from dataclasses import dataclass
|
||||
|
|
@ -45,11 +45,15 @@ DATA_BUCKET = os.environ.get(
|
|||
|
||||
pd.set_option("future.no_silent_downcasting", True)
|
||||
|
||||
RawEpcRow: TypeAlias = dict[str, str | None]
|
||||
PreparedEpcValue: TypeAlias = str | int | float | bool | None
|
||||
PreparedEpcRow: TypeAlias = dict[str, PreparedEpcValue]
|
||||
|
||||
|
||||
class InputEpcRecords(TypedDict):
|
||||
original_epc: Dict[str, Any]
|
||||
full_sap_epc: Dict[str, Any]
|
||||
old_data: List[Dict[str, Any]]
|
||||
original_epc: RawEpcRow
|
||||
full_sap_epc: RawEpcRow
|
||||
old_data: list[RawEpcRow]
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -231,22 +235,33 @@ class EPCRecord:
|
|||
|
||||
run_mode: str = "training"
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# INPUT DATA STRUCTURES
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
epc_records: Optional[InputEpcRecords] = None
|
||||
full_sap_epc: Optional[dict] = None
|
||||
old_data: list[dict] = None
|
||||
original_epc: Optional[dict] = None
|
||||
prepared_epc: Optional[dict] = None
|
||||
# Raw EPC input (immutable)
|
||||
original_epc: Optional[RawEpcRow] = None
|
||||
|
||||
# Working dictionary that gets cleaned
|
||||
prepared_epc: Optional[PreparedEpcRow] = None
|
||||
|
||||
# Supporting
|
||||
full_sap_epc: Optional[RawEpcRow] = None
|
||||
old_data: Optional[list[RawEpcRow]] = None
|
||||
|
||||
# # Metadata generated during processing
|
||||
prepared_epc_delta_metadata: pd.DataFrame = None
|
||||
cleaning_data: pd.DataFrame = None
|
||||
|
||||
# Not used in training mod but used in newdata mode
|
||||
age_band: str = None
|
||||
construction_age_band: str = None
|
||||
year_built: int = None
|
||||
number_of_floors: int = None
|
||||
number_of_open_fireplaces: int = None
|
||||
heat_loss_corridor_bool: bool = None
|
||||
solar_water_heating_flag_bool: bool = None
|
||||
age_band: Optional[str] = None
|
||||
construction_age_band: Optional[str] = None
|
||||
year_built: Optional[int] = None
|
||||
number_of_floors: Optional[int] = None
|
||||
number_of_open_fireplaces: Optional[int] = None
|
||||
heat_loss_corridor_bool: Optional[bool] = None
|
||||
solar_water_heating_flag_bool: Optional[bool] = None
|
||||
|
||||
def __post_init__(self):
|
||||
# We can have validation and cleaning steps for each of the fields
|
||||
|
|
@ -255,15 +270,18 @@ class EPCRecord:
|
|||
|
||||
if self.run_mode == "training":
|
||||
self.validation_configuration = EPCRecordValidationConfiguration
|
||||
# self._field_validation()
|
||||
return
|
||||
|
||||
# We are running in newdata mode
|
||||
if self.epc_records is None:
|
||||
raise ValueError("Must provide epc records if running in newdata mode")
|
||||
|
||||
self.prepared_epc = self.epc_records["original_epc"]
|
||||
# Immutable copy; raw record
|
||||
self.original_epc = self.epc_records["original_epc"].copy()
|
||||
|
||||
# Working copy that we will clean and manipulate
|
||||
self.prepared_epc = self.epc_records["original_epc"].copy()
|
||||
|
||||
self.full_sap_epc = self.epc_records["full_sap_epc"]
|
||||
self.old_data = self.epc_records["old_data"]
|
||||
|
||||
|
|
@ -299,9 +317,12 @@ class EPCRecord:
|
|||
)
|
||||
epc_data_processor.prepare_data()
|
||||
|
||||
self.prepared_epc = epc_data_processor.data.to_dict(orient="records")[0]
|
||||
record = epc_data_processor.data.to_dict(orient="records")[0]
|
||||
|
||||
def _cast_value(self, value, type_hint):
|
||||
self.prepared_epc = cast(RawEpcRow, record)
|
||||
|
||||
@staticmethod
|
||||
def _cast_value(value, type_hint):
|
||||
|
||||
origin = get_origin(type_hint)
|
||||
args = get_args(type_hint)
|
||||
|
|
@ -396,14 +417,6 @@ class EPCRecord:
|
|||
self._clean_constituency()
|
||||
self._clean_new_build_descriptions()
|
||||
|
||||
# self._clean_potential_energy_efficiency()
|
||||
# self._clean_environment_impact_potential()
|
||||
# self._clean_energy_consumption_potential()
|
||||
# self._clean_co2_emissions_potential()
|
||||
# self._clean_current_energy_efficiency()
|
||||
# self._clean_energy_consumption_current()
|
||||
# self._clean_co2_emissions_current()
|
||||
|
||||
def epc_record_as_dataframe(
|
||||
self,
|
||||
epc_type: str = "prepared_epc",
|
||||
|
|
@ -524,9 +537,7 @@ class EPCRecord:
|
|||
cleaned_property_data["FIXED_LIGHTING_OUTLETS_COUNT"].values[0]
|
||||
)
|
||||
else:
|
||||
self.prepared_epc["fixed-lighting-outlets-count"] = float(
|
||||
self.prepared_epc["fixed-lighting-outlets-count"]
|
||||
)
|
||||
self.prepared_epc["fixed-lighting-outlets-count"] = float(self.prepared_epc["fixed-lighting-outlets-count"])
|
||||
|
||||
def _filter_property_dimensions(self, property_dimensions):
|
||||
"""
|
||||
|
|
@ -604,15 +615,6 @@ class EPCRecord:
|
|||
self.prepared_epc["property-type"]
|
||||
)
|
||||
|
||||
# if self.prepared_epc["property-type"] == "House":
|
||||
# self.number_of_floors = 2
|
||||
# elif self.prepared_epc["property-type"] in ["Flat", "Bungalow"]:
|
||||
# self.number_of_floors = 1
|
||||
# elif self.prepared_epc["property-type"] == "Maisonette":
|
||||
# self.number_of_floors = 2
|
||||
# else:
|
||||
# raise NotImplementedError("Implement me")
|
||||
|
||||
if (
|
||||
self.prepared_epc["floor-height"] == ""
|
||||
or self.prepared_epc["floor-height"] in DATA_ANOMALY_MATCHES
|
||||
|
|
@ -859,9 +861,12 @@ class EPCRecord:
|
|||
This method will clean the year built, if empty or invalid
|
||||
"""
|
||||
if self.full_sap_epc:
|
||||
self.year_built = datetime.strptime(
|
||||
self.full_sap_epc["lodgement-date"], "%Y-%m-%d"
|
||||
).year
|
||||
lodgement_date = self.full_sap_epc["lodgement-date"]
|
||||
|
||||
if lodgement_date is None:
|
||||
raise ValueError("full_sap_epc lodgement-date is missing")
|
||||
|
||||
self.year_built = datetime.strptime(str(lodgement_date), "%Y-%m-%d").year
|
||||
|
||||
return
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue