debugging epc_record_as_dataframe

This commit is contained in:
Khalim Conn-Kowlessar 2026-03-11 18:52:40 +00:00
parent 20d63c4ca2
commit cbe162e64e

View file

@ -1,5 +1,5 @@
import warnings
from typing import Optional, get_origin, get_args, TypedDict, cast, TypeAlias
from typing import Optional, get_origin, get_args, TypedDict, cast, TypeAlias, Literal
from backend.addresses.Address import Address
from dataclasses import fields
from datetime import datetime
@ -372,7 +372,7 @@ class EPCRecord:
This method will clean the records using the data processor
"""
epc_data_processor = EPCDataProcessor(
data=self.epc_record_as_dataframe("prepared_epc").copy(),
data=self.epc_record_as_dataframe("_prepared_epc").copy(),
run_mode="newdata",
cleaning_averages=self.cleaning_data,
)
@ -441,7 +441,7 @@ class EPCRecord:
"""
This method will identify the delta between the prepared and original records
"""
prepared_epc_df = self.epc_record_as_dataframe("prepared_epc")
prepared_epc_df = self.epc_record_as_dataframe("_prepared_epc")
original_epc_df = self.epc_record_as_dataframe("original_epc")
df = pd.concat(
@ -480,14 +480,20 @@ class EPCRecord:
def epc_record_as_dataframe(
self,
epc_type: str = "prepared_epc",
epc_type: Literal["_prepared_epc", "original_epc"] = "_prepared_epc",
use_upper_columns: bool = True,
replace_empty_string: bool = False,
) -> pd.DataFrame:
"""
This method will return the dataframe representation of the epc record
"""
df = pd.DataFrame.from_dict(self.get(epc_type), orient="index").T
if epc_type not in ("_prepared_epc", "original_epc"):
raise ValueError(f"Invalid epc_type: {epc_type}")
source = getattr(self, epc_type)
df = pd.DataFrame.from_dict(source, orient="index").T
if use_upper_columns:
df.columns = [x.upper().replace("-", "_") for x in df.columns]
@ -584,7 +590,7 @@ class EPCRecord:
cleaned_property_data = EPCDataProcessor.apply_averages_cleaning(
data_to_clean=self.epc_record_as_dataframe(
"prepared_epc", replace_empty_string=True
"_prepared_epc", replace_empty_string=True
),
cleaning_data=cleaning_data,
cols_to_merge_on=[
@ -794,12 +800,12 @@ class EPCRecord:
"""
This method will clean the wind turbine, if empty or invalid
"""
if not self.prepared_epc:
if not self._prepared_epc:
raise ValueError("EPC Recrod doesn not contain epc data")
self.prepared_epc["wind-turbine-count"] = (
int(self.prepared_epc["wind-turbine-count"])
if self.prepared_epc["wind-turbine-count"] not in DATA_ANOMALY_MATCHES
self._prepared_epc["wind-turbine-count"] = (
int(self._prepared_epc["wind-turbine-count"])
if self._prepared_epc["wind-turbine-count"] not in DATA_ANOMALY_MATCHES
else None
)
@ -939,7 +945,7 @@ class EPCRecord:
band = [
int(x)
for x in re.findall(
r"\b\d{4}\b", self.prepared_epc["construction-age-band"]
r"\b\d{4}\b", self._prepared_epc["construction-age-band"]
)
]
self.year_built = band[0]
@ -952,10 +958,10 @@ class EPCRecord:
"""
This method will clean the ventilation, if empty or invalid
"""
self.prepared_epc["mechanical-ventilation"] = (
self._prepared_epc["mechanical-ventilation"] = (
None
if (self.prepared_epc["mechanical-ventilation"] in DATA_ANOMALY_MATCHES)
else (self.prepared_epc["mechanical-ventilation"])
if (self._prepared_epc["mechanical-ventilation"] in DATA_ANOMALY_MATCHES)
else (self._prepared_epc["mechanical-ventilation"])
)
def _field_validation(self) -> None:
@ -1123,22 +1129,20 @@ class EPCRecord:
list and return_asdict is True.
"""
source = self.prepared_epc if self.prepared_epc is not None else self.__dict__
if isinstance(key, str):
return source.get(key)
return self.__dict__.get(key)
if isinstance(key, list):
if return_asdict:
result = {k: source.get(k) for k in key}
result = {k: self.__dict__.get(k) for k in key}
if key_suffix:
result = {f"{k}{key_suffix}": v for k, v in result.items()}
return result
return [source.get(k) for k in key]
return [self.__dict__.get(k) for k in key]
raise TypeError(f"Key {key} is not a recognised type")