mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
improving basic typing of EpcRecord
This commit is contained in:
parent
fb2a69faff
commit
8f0cd7f98c
1 changed files with 47 additions and 42 deletions
|
|
@ -1,4 +1,4 @@
|
||||||
from typing import Optional, get_origin, get_args, TypedDict, Dict
|
from typing import Optional, get_origin, get_args, TypedDict, cast, TypeAlias
|
||||||
from dataclasses import fields
|
from dataclasses import fields
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
@ -45,11 +45,15 @@ DATA_BUCKET = os.environ.get(
|
||||||
|
|
||||||
pd.set_option("future.no_silent_downcasting", True)
|
pd.set_option("future.no_silent_downcasting", True)
|
||||||
|
|
||||||
|
RawEpcRow: TypeAlias = dict[str, str | None]
|
||||||
|
PreparedEpcValue: TypeAlias = str | int | float | bool | None
|
||||||
|
PreparedEpcRow: TypeAlias = dict[str, PreparedEpcValue]
|
||||||
|
|
||||||
|
|
||||||
class InputEpcRecords(TypedDict):
|
class InputEpcRecords(TypedDict):
|
||||||
original_epc: Dict[str, Any]
|
original_epc: RawEpcRow
|
||||||
full_sap_epc: Dict[str, Any]
|
full_sap_epc: RawEpcRow
|
||||||
old_data: List[Dict[str, Any]]
|
old_data: list[RawEpcRow]
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
@ -231,22 +235,33 @@ class EPCRecord:
|
||||||
|
|
||||||
run_mode: str = "training"
|
run_mode: str = "training"
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# INPUT DATA STRUCTURES
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
epc_records: Optional[InputEpcRecords] = None
|
epc_records: Optional[InputEpcRecords] = None
|
||||||
full_sap_epc: Optional[dict] = None
|
# Raw EPC input (immutable)
|
||||||
old_data: list[dict] = None
|
original_epc: Optional[RawEpcRow] = None
|
||||||
original_epc: Optional[dict] = None
|
|
||||||
prepared_epc: Optional[dict] = None
|
# Working dictionary that gets cleaned
|
||||||
|
prepared_epc: Optional[PreparedEpcRow] = None
|
||||||
|
|
||||||
|
# Supporting
|
||||||
|
full_sap_epc: Optional[RawEpcRow] = None
|
||||||
|
old_data: Optional[list[RawEpcRow]] = None
|
||||||
|
|
||||||
|
# # Metadata generated during processing
|
||||||
prepared_epc_delta_metadata: pd.DataFrame = None
|
prepared_epc_delta_metadata: pd.DataFrame = None
|
||||||
cleaning_data: pd.DataFrame = None
|
cleaning_data: pd.DataFrame = None
|
||||||
|
|
||||||
# Not used in training mod but used in newdata mode
|
# Not used in training mod but used in newdata mode
|
||||||
age_band: str = None
|
age_band: Optional[str] = None
|
||||||
construction_age_band: str = None
|
construction_age_band: Optional[str] = None
|
||||||
year_built: int = None
|
year_built: Optional[int] = None
|
||||||
number_of_floors: int = None
|
number_of_floors: Optional[int] = None
|
||||||
number_of_open_fireplaces: int = None
|
number_of_open_fireplaces: Optional[int] = None
|
||||||
heat_loss_corridor_bool: bool = None
|
heat_loss_corridor_bool: Optional[bool] = None
|
||||||
solar_water_heating_flag_bool: bool = None
|
solar_water_heating_flag_bool: Optional[bool] = None
|
||||||
|
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
# We can have validation and cleaning steps for each of the fields
|
# We can have validation and cleaning steps for each of the fields
|
||||||
|
|
@ -255,15 +270,18 @@ class EPCRecord:
|
||||||
|
|
||||||
if self.run_mode == "training":
|
if self.run_mode == "training":
|
||||||
self.validation_configuration = EPCRecordValidationConfiguration
|
self.validation_configuration = EPCRecordValidationConfiguration
|
||||||
# self._field_validation()
|
|
||||||
return
|
return
|
||||||
|
|
||||||
# We are running in newdata mode
|
# We are running in newdata mode
|
||||||
if self.epc_records is None:
|
if self.epc_records is None:
|
||||||
raise ValueError("Must provide epc records if running in newdata mode")
|
raise ValueError("Must provide epc records if running in newdata mode")
|
||||||
|
|
||||||
self.prepared_epc = self.epc_records["original_epc"]
|
# Immutable copy; raw record
|
||||||
self.original_epc = self.epc_records["original_epc"].copy()
|
self.original_epc = self.epc_records["original_epc"].copy()
|
||||||
|
|
||||||
|
# Working copy that we will clean and manipulate
|
||||||
|
self.prepared_epc = self.epc_records["original_epc"].copy()
|
||||||
|
|
||||||
self.full_sap_epc = self.epc_records["full_sap_epc"]
|
self.full_sap_epc = self.epc_records["full_sap_epc"]
|
||||||
self.old_data = self.epc_records["old_data"]
|
self.old_data = self.epc_records["old_data"]
|
||||||
|
|
||||||
|
|
@ -299,9 +317,12 @@ class EPCRecord:
|
||||||
)
|
)
|
||||||
epc_data_processor.prepare_data()
|
epc_data_processor.prepare_data()
|
||||||
|
|
||||||
self.prepared_epc = epc_data_processor.data.to_dict(orient="records")[0]
|
record = epc_data_processor.data.to_dict(orient="records")[0]
|
||||||
|
|
||||||
def _cast_value(self, value, type_hint):
|
self.prepared_epc = cast(RawEpcRow, record)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _cast_value(value, type_hint):
|
||||||
|
|
||||||
origin = get_origin(type_hint)
|
origin = get_origin(type_hint)
|
||||||
args = get_args(type_hint)
|
args = get_args(type_hint)
|
||||||
|
|
@ -396,14 +417,6 @@ class EPCRecord:
|
||||||
self._clean_constituency()
|
self._clean_constituency()
|
||||||
self._clean_new_build_descriptions()
|
self._clean_new_build_descriptions()
|
||||||
|
|
||||||
# self._clean_potential_energy_efficiency()
|
|
||||||
# self._clean_environment_impact_potential()
|
|
||||||
# self._clean_energy_consumption_potential()
|
|
||||||
# self._clean_co2_emissions_potential()
|
|
||||||
# self._clean_current_energy_efficiency()
|
|
||||||
# self._clean_energy_consumption_current()
|
|
||||||
# self._clean_co2_emissions_current()
|
|
||||||
|
|
||||||
def epc_record_as_dataframe(
|
def epc_record_as_dataframe(
|
||||||
self,
|
self,
|
||||||
epc_type: str = "prepared_epc",
|
epc_type: str = "prepared_epc",
|
||||||
|
|
@ -524,9 +537,7 @@ class EPCRecord:
|
||||||
cleaned_property_data["FIXED_LIGHTING_OUTLETS_COUNT"].values[0]
|
cleaned_property_data["FIXED_LIGHTING_OUTLETS_COUNT"].values[0]
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
self.prepared_epc["fixed-lighting-outlets-count"] = float(
|
self.prepared_epc["fixed-lighting-outlets-count"] = float(self.prepared_epc["fixed-lighting-outlets-count"])
|
||||||
self.prepared_epc["fixed-lighting-outlets-count"]
|
|
||||||
)
|
|
||||||
|
|
||||||
def _filter_property_dimensions(self, property_dimensions):
|
def _filter_property_dimensions(self, property_dimensions):
|
||||||
"""
|
"""
|
||||||
|
|
@ -604,15 +615,6 @@ class EPCRecord:
|
||||||
self.prepared_epc["property-type"]
|
self.prepared_epc["property-type"]
|
||||||
)
|
)
|
||||||
|
|
||||||
# if self.prepared_epc["property-type"] == "House":
|
|
||||||
# self.number_of_floors = 2
|
|
||||||
# elif self.prepared_epc["property-type"] in ["Flat", "Bungalow"]:
|
|
||||||
# self.number_of_floors = 1
|
|
||||||
# elif self.prepared_epc["property-type"] == "Maisonette":
|
|
||||||
# self.number_of_floors = 2
|
|
||||||
# else:
|
|
||||||
# raise NotImplementedError("Implement me")
|
|
||||||
|
|
||||||
if (
|
if (
|
||||||
self.prepared_epc["floor-height"] == ""
|
self.prepared_epc["floor-height"] == ""
|
||||||
or self.prepared_epc["floor-height"] in DATA_ANOMALY_MATCHES
|
or self.prepared_epc["floor-height"] in DATA_ANOMALY_MATCHES
|
||||||
|
|
@ -859,9 +861,12 @@ class EPCRecord:
|
||||||
This method will clean the year built, if empty or invalid
|
This method will clean the year built, if empty or invalid
|
||||||
"""
|
"""
|
||||||
if self.full_sap_epc:
|
if self.full_sap_epc:
|
||||||
self.year_built = datetime.strptime(
|
lodgement_date = self.full_sap_epc["lodgement-date"]
|
||||||
self.full_sap_epc["lodgement-date"], "%Y-%m-%d"
|
|
||||||
).year
|
if lodgement_date is None:
|
||||||
|
raise ValueError("full_sap_epc lodgement-date is missing")
|
||||||
|
|
||||||
|
self.year_built = datetime.strptime(str(lodgement_date), "%Y-%m-%d").year
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue