From cbe162e64ee36c5ffffd0812d1bbde938d39ffd2 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 11 Mar 2026 18:52:40 +0000 Subject: [PATCH] debugging epc_record_as_dataframe --- etl/epc/Record.py | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/etl/epc/Record.py b/etl/epc/Record.py index f2c3c5fa..3f64a7c5 100644 --- a/etl/epc/Record.py +++ b/etl/epc/Record.py @@ -1,5 +1,5 @@ import warnings -from typing import Optional, get_origin, get_args, TypedDict, cast, TypeAlias +from typing import Optional, get_origin, get_args, TypedDict, cast, TypeAlias, Literal from backend.addresses.Address import Address from dataclasses import fields from datetime import datetime @@ -372,7 +372,7 @@ class EPCRecord: This method will clean the records using the data processor """ epc_data_processor = EPCDataProcessor( - data=self.epc_record_as_dataframe("prepared_epc").copy(), + data=self.epc_record_as_dataframe("_prepared_epc").copy(), run_mode="newdata", cleaning_averages=self.cleaning_data, ) @@ -441,7 +441,7 @@ class EPCRecord: """ This method will identify the delta between the prepared and original records """ - prepared_epc_df = self.epc_record_as_dataframe("prepared_epc") + prepared_epc_df = self.epc_record_as_dataframe("_prepared_epc") original_epc_df = self.epc_record_as_dataframe("original_epc") df = pd.concat( @@ -480,14 +480,20 @@ class EPCRecord: def epc_record_as_dataframe( self, - epc_type: str = "prepared_epc", + epc_type: Literal["_prepared_epc", "original_epc"] = "_prepared_epc", use_upper_columns: bool = True, replace_empty_string: bool = False, ) -> pd.DataFrame: """ This method will return the dataframe representation of the epc record """ - df = pd.DataFrame.from_dict(self.get(epc_type), orient="index").T + + if epc_type not in ("_prepared_epc", "original_epc"): + raise ValueError(f"Invalid epc_type: {epc_type}") + + source = getattr(self, epc_type) + + df = pd.DataFrame.from_dict(source, orient="index").T if use_upper_columns: df.columns = [x.upper().replace("-", "_") for x in df.columns] @@ -584,7 +590,7 @@ class EPCRecord: cleaned_property_data = EPCDataProcessor.apply_averages_cleaning( data_to_clean=self.epc_record_as_dataframe( - "prepared_epc", replace_empty_string=True + "_prepared_epc", replace_empty_string=True ), cleaning_data=cleaning_data, cols_to_merge_on=[ @@ -794,12 +800,12 @@ class EPCRecord: """ This method will clean the wind turbine, if empty or invalid """ - if not self.prepared_epc: + if not self._prepared_epc: raise ValueError("EPC Recrod doesn not contain epc data") - self.prepared_epc["wind-turbine-count"] = ( - int(self.prepared_epc["wind-turbine-count"]) - if self.prepared_epc["wind-turbine-count"] not in DATA_ANOMALY_MATCHES + self._prepared_epc["wind-turbine-count"] = ( + int(self._prepared_epc["wind-turbine-count"]) + if self._prepared_epc["wind-turbine-count"] not in DATA_ANOMALY_MATCHES else None ) @@ -939,7 +945,7 @@ class EPCRecord: band = [ int(x) for x in re.findall( - r"\b\d{4}\b", self.prepared_epc["construction-age-band"] + r"\b\d{4}\b", self._prepared_epc["construction-age-band"] ) ] self.year_built = band[0] @@ -952,10 +958,10 @@ class EPCRecord: """ This method will clean the ventilation, if empty or invalid """ - self.prepared_epc["mechanical-ventilation"] = ( + self._prepared_epc["mechanical-ventilation"] = ( None - if (self.prepared_epc["mechanical-ventilation"] in DATA_ANOMALY_MATCHES) - else (self.prepared_epc["mechanical-ventilation"]) + if (self._prepared_epc["mechanical-ventilation"] in DATA_ANOMALY_MATCHES) + else (self._prepared_epc["mechanical-ventilation"]) ) def _field_validation(self) -> None: @@ -1123,22 +1129,20 @@ class EPCRecord: list and return_asdict is True. """ - source = self.prepared_epc if self.prepared_epc is not None else self.__dict__ - if isinstance(key, str): - return source.get(key) + return self.__dict__.get(key) if isinstance(key, list): if return_asdict: - result = {k: source.get(k) for k in key} + result = {k: self.__dict__.get(k) for k in key} if key_suffix: result = {f"{k}{key_suffix}": v for k, v in result.items()} return result - return [source.get(k) for k in key] + return [self.__dict__.get(k) for k in key] raise TypeError(f"Key {key} is not a recognised type")