mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
debugging epc_record_as_dataframe
This commit is contained in:
parent
20d63c4ca2
commit
cbe162e64e
1 changed files with 23 additions and 19 deletions
|
|
@ -1,5 +1,5 @@
|
|||
import warnings
|
||||
from typing import Optional, get_origin, get_args, TypedDict, cast, TypeAlias
|
||||
from typing import Optional, get_origin, get_args, TypedDict, cast, TypeAlias, Literal
|
||||
from backend.addresses.Address import Address
|
||||
from dataclasses import fields
|
||||
from datetime import datetime
|
||||
|
|
@ -372,7 +372,7 @@ class EPCRecord:
|
|||
This method will clean the records using the data processor
|
||||
"""
|
||||
epc_data_processor = EPCDataProcessor(
|
||||
data=self.epc_record_as_dataframe("prepared_epc").copy(),
|
||||
data=self.epc_record_as_dataframe("_prepared_epc").copy(),
|
||||
run_mode="newdata",
|
||||
cleaning_averages=self.cleaning_data,
|
||||
)
|
||||
|
|
@ -441,7 +441,7 @@ class EPCRecord:
|
|||
"""
|
||||
This method will identify the delta between the prepared and original records
|
||||
"""
|
||||
prepared_epc_df = self.epc_record_as_dataframe("prepared_epc")
|
||||
prepared_epc_df = self.epc_record_as_dataframe("_prepared_epc")
|
||||
original_epc_df = self.epc_record_as_dataframe("original_epc")
|
||||
|
||||
df = pd.concat(
|
||||
|
|
@ -480,14 +480,20 @@ class EPCRecord:
|
|||
|
||||
def epc_record_as_dataframe(
|
||||
self,
|
||||
epc_type: str = "prepared_epc",
|
||||
epc_type: Literal["_prepared_epc", "original_epc"] = "_prepared_epc",
|
||||
use_upper_columns: bool = True,
|
||||
replace_empty_string: bool = False,
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
This method will return the dataframe representation of the epc record
|
||||
"""
|
||||
df = pd.DataFrame.from_dict(self.get(epc_type), orient="index").T
|
||||
|
||||
if epc_type not in ("_prepared_epc", "original_epc"):
|
||||
raise ValueError(f"Invalid epc_type: {epc_type}")
|
||||
|
||||
source = getattr(self, epc_type)
|
||||
|
||||
df = pd.DataFrame.from_dict(source, orient="index").T
|
||||
|
||||
if use_upper_columns:
|
||||
df.columns = [x.upper().replace("-", "_") for x in df.columns]
|
||||
|
|
@ -584,7 +590,7 @@ class EPCRecord:
|
|||
|
||||
cleaned_property_data = EPCDataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=self.epc_record_as_dataframe(
|
||||
"prepared_epc", replace_empty_string=True
|
||||
"_prepared_epc", replace_empty_string=True
|
||||
),
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=[
|
||||
|
|
@ -794,12 +800,12 @@ class EPCRecord:
|
|||
"""
|
||||
This method will clean the wind turbine, if empty or invalid
|
||||
"""
|
||||
if not self.prepared_epc:
|
||||
if not self._prepared_epc:
|
||||
raise ValueError("EPC Recrod doesn not contain epc data")
|
||||
|
||||
self.prepared_epc["wind-turbine-count"] = (
|
||||
int(self.prepared_epc["wind-turbine-count"])
|
||||
if self.prepared_epc["wind-turbine-count"] not in DATA_ANOMALY_MATCHES
|
||||
self._prepared_epc["wind-turbine-count"] = (
|
||||
int(self._prepared_epc["wind-turbine-count"])
|
||||
if self._prepared_epc["wind-turbine-count"] not in DATA_ANOMALY_MATCHES
|
||||
else None
|
||||
)
|
||||
|
||||
|
|
@ -939,7 +945,7 @@ class EPCRecord:
|
|||
band = [
|
||||
int(x)
|
||||
for x in re.findall(
|
||||
r"\b\d{4}\b", self.prepared_epc["construction-age-band"]
|
||||
r"\b\d{4}\b", self._prepared_epc["construction-age-band"]
|
||||
)
|
||||
]
|
||||
self.year_built = band[0]
|
||||
|
|
@ -952,10 +958,10 @@ class EPCRecord:
|
|||
"""
|
||||
This method will clean the ventilation, if empty or invalid
|
||||
"""
|
||||
self.prepared_epc["mechanical-ventilation"] = (
|
||||
self._prepared_epc["mechanical-ventilation"] = (
|
||||
None
|
||||
if (self.prepared_epc["mechanical-ventilation"] in DATA_ANOMALY_MATCHES)
|
||||
else (self.prepared_epc["mechanical-ventilation"])
|
||||
if (self._prepared_epc["mechanical-ventilation"] in DATA_ANOMALY_MATCHES)
|
||||
else (self._prepared_epc["mechanical-ventilation"])
|
||||
)
|
||||
|
||||
def _field_validation(self) -> None:
|
||||
|
|
@ -1123,22 +1129,20 @@ class EPCRecord:
|
|||
list and return_asdict is True.
|
||||
"""
|
||||
|
||||
source = self.prepared_epc if self.prepared_epc is not None else self.__dict__
|
||||
|
||||
if isinstance(key, str):
|
||||
return source.get(key)
|
||||
return self.__dict__.get(key)
|
||||
|
||||
if isinstance(key, list):
|
||||
|
||||
if return_asdict:
|
||||
result = {k: source.get(k) for k in key}
|
||||
result = {k: self.__dict__.get(k) for k in key}
|
||||
|
||||
if key_suffix:
|
||||
result = {f"{k}{key_suffix}": v for k, v in result.items()}
|
||||
|
||||
return result
|
||||
|
||||
return [source.get(k) for k in key]
|
||||
return [self.__dict__.get(k) for k in key]
|
||||
|
||||
raise TypeError(f"Key {key} is not a recognised type")
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue