mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
debugging epc_record_as_dataframe
This commit is contained in:
parent
20d63c4ca2
commit
cbe162e64e
1 changed files with 23 additions and 19 deletions
|
|
@ -1,5 +1,5 @@
|
||||||
import warnings
|
import warnings
|
||||||
from typing import Optional, get_origin, get_args, TypedDict, cast, TypeAlias
|
from typing import Optional, get_origin, get_args, TypedDict, cast, TypeAlias, Literal
|
||||||
from backend.addresses.Address import Address
|
from backend.addresses.Address import Address
|
||||||
from dataclasses import fields
|
from dataclasses import fields
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
@ -372,7 +372,7 @@ class EPCRecord:
|
||||||
This method will clean the records using the data processor
|
This method will clean the records using the data processor
|
||||||
"""
|
"""
|
||||||
epc_data_processor = EPCDataProcessor(
|
epc_data_processor = EPCDataProcessor(
|
||||||
data=self.epc_record_as_dataframe("prepared_epc").copy(),
|
data=self.epc_record_as_dataframe("_prepared_epc").copy(),
|
||||||
run_mode="newdata",
|
run_mode="newdata",
|
||||||
cleaning_averages=self.cleaning_data,
|
cleaning_averages=self.cleaning_data,
|
||||||
)
|
)
|
||||||
|
|
@ -441,7 +441,7 @@ class EPCRecord:
|
||||||
"""
|
"""
|
||||||
This method will identify the delta between the prepared and original records
|
This method will identify the delta between the prepared and original records
|
||||||
"""
|
"""
|
||||||
prepared_epc_df = self.epc_record_as_dataframe("prepared_epc")
|
prepared_epc_df = self.epc_record_as_dataframe("_prepared_epc")
|
||||||
original_epc_df = self.epc_record_as_dataframe("original_epc")
|
original_epc_df = self.epc_record_as_dataframe("original_epc")
|
||||||
|
|
||||||
df = pd.concat(
|
df = pd.concat(
|
||||||
|
|
@ -480,14 +480,20 @@ class EPCRecord:
|
||||||
|
|
||||||
def epc_record_as_dataframe(
|
def epc_record_as_dataframe(
|
||||||
self,
|
self,
|
||||||
epc_type: str = "prepared_epc",
|
epc_type: Literal["_prepared_epc", "original_epc"] = "_prepared_epc",
|
||||||
use_upper_columns: bool = True,
|
use_upper_columns: bool = True,
|
||||||
replace_empty_string: bool = False,
|
replace_empty_string: bool = False,
|
||||||
) -> pd.DataFrame:
|
) -> pd.DataFrame:
|
||||||
"""
|
"""
|
||||||
This method will return the dataframe representation of the epc record
|
This method will return the dataframe representation of the epc record
|
||||||
"""
|
"""
|
||||||
df = pd.DataFrame.from_dict(self.get(epc_type), orient="index").T
|
|
||||||
|
if epc_type not in ("_prepared_epc", "original_epc"):
|
||||||
|
raise ValueError(f"Invalid epc_type: {epc_type}")
|
||||||
|
|
||||||
|
source = getattr(self, epc_type)
|
||||||
|
|
||||||
|
df = pd.DataFrame.from_dict(source, orient="index").T
|
||||||
|
|
||||||
if use_upper_columns:
|
if use_upper_columns:
|
||||||
df.columns = [x.upper().replace("-", "_") for x in df.columns]
|
df.columns = [x.upper().replace("-", "_") for x in df.columns]
|
||||||
|
|
@ -584,7 +590,7 @@ class EPCRecord:
|
||||||
|
|
||||||
cleaned_property_data = EPCDataProcessor.apply_averages_cleaning(
|
cleaned_property_data = EPCDataProcessor.apply_averages_cleaning(
|
||||||
data_to_clean=self.epc_record_as_dataframe(
|
data_to_clean=self.epc_record_as_dataframe(
|
||||||
"prepared_epc", replace_empty_string=True
|
"_prepared_epc", replace_empty_string=True
|
||||||
),
|
),
|
||||||
cleaning_data=cleaning_data,
|
cleaning_data=cleaning_data,
|
||||||
cols_to_merge_on=[
|
cols_to_merge_on=[
|
||||||
|
|
@ -794,12 +800,12 @@ class EPCRecord:
|
||||||
"""
|
"""
|
||||||
This method will clean the wind turbine, if empty or invalid
|
This method will clean the wind turbine, if empty or invalid
|
||||||
"""
|
"""
|
||||||
if not self.prepared_epc:
|
if not self._prepared_epc:
|
||||||
raise ValueError("EPC Recrod doesn not contain epc data")
|
raise ValueError("EPC Recrod doesn not contain epc data")
|
||||||
|
|
||||||
self.prepared_epc["wind-turbine-count"] = (
|
self._prepared_epc["wind-turbine-count"] = (
|
||||||
int(self.prepared_epc["wind-turbine-count"])
|
int(self._prepared_epc["wind-turbine-count"])
|
||||||
if self.prepared_epc["wind-turbine-count"] not in DATA_ANOMALY_MATCHES
|
if self._prepared_epc["wind-turbine-count"] not in DATA_ANOMALY_MATCHES
|
||||||
else None
|
else None
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -939,7 +945,7 @@ class EPCRecord:
|
||||||
band = [
|
band = [
|
||||||
int(x)
|
int(x)
|
||||||
for x in re.findall(
|
for x in re.findall(
|
||||||
r"\b\d{4}\b", self.prepared_epc["construction-age-band"]
|
r"\b\d{4}\b", self._prepared_epc["construction-age-band"]
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
self.year_built = band[0]
|
self.year_built = band[0]
|
||||||
|
|
@ -952,10 +958,10 @@ class EPCRecord:
|
||||||
"""
|
"""
|
||||||
This method will clean the ventilation, if empty or invalid
|
This method will clean the ventilation, if empty or invalid
|
||||||
"""
|
"""
|
||||||
self.prepared_epc["mechanical-ventilation"] = (
|
self._prepared_epc["mechanical-ventilation"] = (
|
||||||
None
|
None
|
||||||
if (self.prepared_epc["mechanical-ventilation"] in DATA_ANOMALY_MATCHES)
|
if (self._prepared_epc["mechanical-ventilation"] in DATA_ANOMALY_MATCHES)
|
||||||
else (self.prepared_epc["mechanical-ventilation"])
|
else (self._prepared_epc["mechanical-ventilation"])
|
||||||
)
|
)
|
||||||
|
|
||||||
def _field_validation(self) -> None:
|
def _field_validation(self) -> None:
|
||||||
|
|
@ -1123,22 +1129,20 @@ class EPCRecord:
|
||||||
list and return_asdict is True.
|
list and return_asdict is True.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
source = self.prepared_epc if self.prepared_epc is not None else self.__dict__
|
|
||||||
|
|
||||||
if isinstance(key, str):
|
if isinstance(key, str):
|
||||||
return source.get(key)
|
return self.__dict__.get(key)
|
||||||
|
|
||||||
if isinstance(key, list):
|
if isinstance(key, list):
|
||||||
|
|
||||||
if return_asdict:
|
if return_asdict:
|
||||||
result = {k: source.get(k) for k in key}
|
result = {k: self.__dict__.get(k) for k in key}
|
||||||
|
|
||||||
if key_suffix:
|
if key_suffix:
|
||||||
result = {f"{k}{key_suffix}": v for k, v in result.items()}
|
result = {f"{k}{key_suffix}": v for k, v in result.items()}
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
return [source.get(k) for k in key]
|
return [self.__dict__.get(k) for k in key]
|
||||||
|
|
||||||
raise TypeError(f"Key {key} is not a recognised type")
|
raise TypeError(f"Key {key} is not a recognised type")
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue