debugging epc_record_as_dataframe

This commit is contained in:
Khalim Conn-Kowlessar 2026-03-11 18:52:40 +00:00
parent 20d63c4ca2
commit cbe162e64e

View file

@ -1,5 +1,5 @@
import warnings import warnings
from typing import Optional, get_origin, get_args, TypedDict, cast, TypeAlias from typing import Optional, get_origin, get_args, TypedDict, cast, TypeAlias, Literal
from backend.addresses.Address import Address from backend.addresses.Address import Address
from dataclasses import fields from dataclasses import fields
from datetime import datetime from datetime import datetime
@ -372,7 +372,7 @@ class EPCRecord:
This method will clean the records using the data processor This method will clean the records using the data processor
""" """
epc_data_processor = EPCDataProcessor( epc_data_processor = EPCDataProcessor(
data=self.epc_record_as_dataframe("prepared_epc").copy(), data=self.epc_record_as_dataframe("_prepared_epc").copy(),
run_mode="newdata", run_mode="newdata",
cleaning_averages=self.cleaning_data, cleaning_averages=self.cleaning_data,
) )
@ -441,7 +441,7 @@ class EPCRecord:
""" """
This method will identify the delta between the prepared and original records This method will identify the delta between the prepared and original records
""" """
prepared_epc_df = self.epc_record_as_dataframe("prepared_epc") prepared_epc_df = self.epc_record_as_dataframe("_prepared_epc")
original_epc_df = self.epc_record_as_dataframe("original_epc") original_epc_df = self.epc_record_as_dataframe("original_epc")
df = pd.concat( df = pd.concat(
@ -480,14 +480,20 @@ class EPCRecord:
def epc_record_as_dataframe( def epc_record_as_dataframe(
self, self,
epc_type: str = "prepared_epc", epc_type: Literal["_prepared_epc", "original_epc"] = "_prepared_epc",
use_upper_columns: bool = True, use_upper_columns: bool = True,
replace_empty_string: bool = False, replace_empty_string: bool = False,
) -> pd.DataFrame: ) -> pd.DataFrame:
""" """
This method will return the dataframe representation of the epc record This method will return the dataframe representation of the epc record
""" """
df = pd.DataFrame.from_dict(self.get(epc_type), orient="index").T
if epc_type not in ("_prepared_epc", "original_epc"):
raise ValueError(f"Invalid epc_type: {epc_type}")
source = getattr(self, epc_type)
df = pd.DataFrame.from_dict(source, orient="index").T
if use_upper_columns: if use_upper_columns:
df.columns = [x.upper().replace("-", "_") for x in df.columns] df.columns = [x.upper().replace("-", "_") for x in df.columns]
@ -584,7 +590,7 @@ class EPCRecord:
cleaned_property_data = EPCDataProcessor.apply_averages_cleaning( cleaned_property_data = EPCDataProcessor.apply_averages_cleaning(
data_to_clean=self.epc_record_as_dataframe( data_to_clean=self.epc_record_as_dataframe(
"prepared_epc", replace_empty_string=True "_prepared_epc", replace_empty_string=True
), ),
cleaning_data=cleaning_data, cleaning_data=cleaning_data,
cols_to_merge_on=[ cols_to_merge_on=[
@ -794,12 +800,12 @@ class EPCRecord:
""" """
This method will clean the wind turbine, if empty or invalid This method will clean the wind turbine, if empty or invalid
""" """
if not self.prepared_epc: if not self._prepared_epc:
raise ValueError("EPC Recrod doesn not contain epc data") raise ValueError("EPC Recrod doesn not contain epc data")
self.prepared_epc["wind-turbine-count"] = ( self._prepared_epc["wind-turbine-count"] = (
int(self.prepared_epc["wind-turbine-count"]) int(self._prepared_epc["wind-turbine-count"])
if self.prepared_epc["wind-turbine-count"] not in DATA_ANOMALY_MATCHES if self._prepared_epc["wind-turbine-count"] not in DATA_ANOMALY_MATCHES
else None else None
) )
@ -939,7 +945,7 @@ class EPCRecord:
band = [ band = [
int(x) int(x)
for x in re.findall( for x in re.findall(
r"\b\d{4}\b", self.prepared_epc["construction-age-band"] r"\b\d{4}\b", self._prepared_epc["construction-age-band"]
) )
] ]
self.year_built = band[0] self.year_built = band[0]
@ -952,10 +958,10 @@ class EPCRecord:
""" """
This method will clean the ventilation, if empty or invalid This method will clean the ventilation, if empty or invalid
""" """
self.prepared_epc["mechanical-ventilation"] = ( self._prepared_epc["mechanical-ventilation"] = (
None None
if (self.prepared_epc["mechanical-ventilation"] in DATA_ANOMALY_MATCHES) if (self._prepared_epc["mechanical-ventilation"] in DATA_ANOMALY_MATCHES)
else (self.prepared_epc["mechanical-ventilation"]) else (self._prepared_epc["mechanical-ventilation"])
) )
def _field_validation(self) -> None: def _field_validation(self) -> None:
@ -1123,22 +1129,20 @@ class EPCRecord:
list and return_asdict is True. list and return_asdict is True.
""" """
source = self.prepared_epc if self.prepared_epc is not None else self.__dict__
if isinstance(key, str): if isinstance(key, str):
return source.get(key) return self.__dict__.get(key)
if isinstance(key, list): if isinstance(key, list):
if return_asdict: if return_asdict:
result = {k: source.get(k) for k in key} result = {k: self.__dict__.get(k) for k in key}
if key_suffix: if key_suffix:
result = {f"{k}{key_suffix}": v for k, v in result.items()} result = {f"{k}{key_suffix}": v for k, v in result.items()}
return result return result
return [source.get(k) for k in key] return [self.__dict__.get(k) for k in key]
raise TypeError(f"Key {key} is not a recognised type") raise TypeError(f"Key {key} is not a recognised type")