mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
decorating function signature
This commit is contained in:
parent
8f0cd7f98c
commit
deb9fd9a38
3 changed files with 53 additions and 55 deletions
|
|
@ -304,7 +304,7 @@ class Property:
|
|||
if k in fixed_data_col_names
|
||||
}
|
||||
|
||||
difference_record = self.epc_record.create_EPCDifferenceRecord(self.epc_record, fixed_data)
|
||||
difference_record = self.epc_record.create_epc_difference_record(self.epc_record, fixed_data)
|
||||
|
||||
# We have rare cases where entire description columns are missing. EpcRecords will convert this to None.
|
||||
# Due to the sensitivity of the EPCDifferenceRecord creation to missing data, we will fill in these missing
|
||||
|
|
|
|||
|
|
@ -328,7 +328,7 @@ class EPCPipeline:
|
|||
# model, since EPC standards and rigour have changed over time
|
||||
variable_data = property_data[
|
||||
VARIABLE_DATA_FEATURES + COST_FEATURES + POST_SAP10_FEATURE
|
||||
]
|
||||
]
|
||||
|
||||
uprn = str(uprn)
|
||||
epc_records = [
|
||||
|
|
@ -391,9 +391,7 @@ class EPCPipeline:
|
|||
|
||||
# Auto sort the records so that the record with highest RDSAP score is always record1
|
||||
difference_record: EPCDifferenceRecord = (
|
||||
latest_record.create_EPCDifferenceRecord(
|
||||
other=earliest_record, fixed_data=fixed_data
|
||||
)
|
||||
latest_record.create_epc_difference_record(other=earliest_record, fixed_data=fixed_data)
|
||||
)
|
||||
# difference_record: EPCDifferenceRecord = latest_record - earliest_record
|
||||
# # TODO: Use method above instead of overloading operator
|
||||
|
|
|
|||
|
|
@ -263,7 +263,7 @@ class EPCRecord:
|
|||
heat_loss_corridor_bool: Optional[bool] = None
|
||||
solar_water_heating_flag_bool: Optional[bool] = None
|
||||
|
||||
def __post_init__(self):
|
||||
def __post_init__(self) -> None:
|
||||
# We can have validation and cleaning steps for each of the fields
|
||||
# self.WALLS_DESCRIPTION = 'check'
|
||||
# Could also have cleaning of records if needed
|
||||
|
|
@ -296,7 +296,7 @@ class EPCRecord:
|
|||
return
|
||||
|
||||
@staticmethod
|
||||
def _calculate_days_to(lodgement_date):
|
||||
def _calculate_days_to(lodgement_date: Union[str, pd.Series]) -> Union[int, pd.Series]:
|
||||
if isinstance(lodgement_date, str):
|
||||
return (
|
||||
pd.to_datetime(lodgement_date) - pd.to_datetime(EARLIEST_EPC_DATE)
|
||||
|
|
@ -306,7 +306,7 @@ class EPCRecord:
|
|||
pd.to_datetime(lodgement_date) - pd.to_datetime(EARLIEST_EPC_DATE)
|
||||
).dt.days
|
||||
|
||||
def _clean_with_data_processor(self):
|
||||
def _clean_with_data_processor(self) -> None:
|
||||
"""
|
||||
This method will clean the records using the data processor
|
||||
"""
|
||||
|
|
@ -322,7 +322,7 @@ class EPCRecord:
|
|||
self.prepared_epc = cast(RawEpcRow, record)
|
||||
|
||||
@staticmethod
|
||||
def _cast_value(value, type_hint):
|
||||
def _cast_value(value: PreparedEpcValue, type_hint: Any) -> PreparedEpcValue:
|
||||
|
||||
origin = get_origin(type_hint)
|
||||
args = get_args(type_hint)
|
||||
|
|
@ -392,12 +392,12 @@ class EPCRecord:
|
|||
same_index = df.apply(pd.Series.duplicated).any()
|
||||
self.prepared_epc_delta_metadata = df[same_index[~same_index].index]
|
||||
|
||||
def _clean_records_using_epc_records(self):
|
||||
def _clean_records_using_epc_records(self) -> None:
|
||||
"""
|
||||
This method will clean the records
|
||||
"""
|
||||
|
||||
# TODO: Move all the cleaning steps in the Property class into there
|
||||
# TODO: Move all the cleaning steps in the Property class into here
|
||||
self._clean_built_form()
|
||||
self._clean_energy()
|
||||
self._clean_ventilation()
|
||||
|
|
@ -422,7 +422,7 @@ class EPCRecord:
|
|||
epc_type: str = "prepared_epc",
|
||||
use_upper_columns: bool = True,
|
||||
replace_empty_string: bool = False,
|
||||
):
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
This method will return the dataframe representation of the epc record
|
||||
"""
|
||||
|
|
@ -436,25 +436,25 @@ class EPCRecord:
|
|||
|
||||
return df
|
||||
|
||||
def _clean_floor_height(self):
|
||||
def _clean_floor_height(self) -> None:
|
||||
"""Remaps anomalies in floor height to the average floor height for the property type"""
|
||||
floor_height_data = self.cleaning_data[
|
||||
(self.cleaning_data["property_type"] == self.prepared_epc["property-type"])
|
||||
& (self.cleaning_data["built_form"] == self.prepared_epc["built-form"])
|
||||
]
|
||||
average = floor_height_data["floor_height"].mean()
|
||||
sd = floor_height_data["floor_height"].std()
|
||||
average = float(np.mean(floor_height_data["floor_height"]))
|
||||
sd = float(np.std(floor_height_data["floor_height"]))
|
||||
# If we're in the top 0.5 percentile of floor heights, we'll set it to the average
|
||||
if self.prepared_epc["floor-height"] > average + 10 * sd:
|
||||
self.prepared_epc["floor-height"] = average
|
||||
if self.prepared_epc["floor-height"] <= 1.665:
|
||||
self.prepared_epc["floor-height"] = average
|
||||
|
||||
def _clean_new_build_descriptions(self):
|
||||
def _clean_new_build_descriptions(self) -> None:
|
||||
for col in ["roof-description", "walls-description", "floor-description"]:
|
||||
self.prepared_epc[col] = self.prepared_epc[col].replace("W/m²K", "W/m-¦K")
|
||||
|
||||
def _clean_constituency(self):
|
||||
def _clean_constituency(self) -> None:
|
||||
"""
|
||||
We handle the single case of finding a missing constituency by using the local authority
|
||||
"""
|
||||
|
|
@ -467,7 +467,7 @@ class EPCRecord:
|
|||
)
|
||||
self.prepared_epc["constituency"] = "E14000883"
|
||||
|
||||
def _clean_floor_level(self):
|
||||
def _clean_floor_level(self) -> None:
|
||||
"""
|
||||
This method will clean the floor level, if empty or invalid
|
||||
"""
|
||||
|
|
@ -480,7 +480,7 @@ class EPCRecord:
|
|||
else None
|
||||
)
|
||||
|
||||
def _clean_number_lighting_outlets(self):
|
||||
def _clean_number_lighting_outlets(self) -> None:
|
||||
"""
|
||||
This method will clean the number of lighting outlets, if empty or invalid
|
||||
"""
|
||||
|
|
@ -539,7 +539,7 @@ class EPCRecord:
|
|||
else:
|
||||
self.prepared_epc["fixed-lighting-outlets-count"] = float(self.prepared_epc["fixed-lighting-outlets-count"])
|
||||
|
||||
def _filter_property_dimensions(self, property_dimensions):
|
||||
def _filter_property_dimensions(self, property_dimensions) -> pd.Series:
|
||||
"""
|
||||
Will filter the property dimensions dataframe to only include the relevant rows for the property
|
||||
:param property_dimensions:
|
||||
|
|
@ -570,7 +570,7 @@ class EPCRecord:
|
|||
]
|
||||
].mean()
|
||||
|
||||
def _clean_property_dimensions(self):
|
||||
def _clean_property_dimensions(self) -> None:
|
||||
"""
|
||||
Cleans up the number of floors, number of habitable rooms, and the floor height
|
||||
"""
|
||||
|
|
@ -585,11 +585,11 @@ class EPCRecord:
|
|||
):
|
||||
# TODO - this probably shouldn't live here - but we only need to use this for specific properties
|
||||
# when we meet this condition
|
||||
property_dimensions = read_dataframe_from_s3_parquet(
|
||||
property_dimensions: pd.DataFrame = read_dataframe_from_s3_parquet(
|
||||
bucket_name=DATA_BUCKET,
|
||||
file_key=f"property_dimensions/{self.prepared_epc['local-authority']}.parquet",
|
||||
)
|
||||
self.property_dimensions = self._filter_property_dimensions(
|
||||
self.property_dimensions: pd.Series = self._filter_property_dimensions(
|
||||
property_dimensions
|
||||
)
|
||||
|
||||
|
|
@ -625,7 +625,7 @@ class EPCRecord:
|
|||
else:
|
||||
self.prepared_epc["floor-height"] = float(self.prepared_epc["floor-height"])
|
||||
|
||||
def _clean_floor_area(self):
|
||||
def _clean_floor_area(self) -> None:
|
||||
"""
|
||||
This method will clean the floor area, if empty or invalid
|
||||
"""
|
||||
|
|
@ -648,7 +648,7 @@ class EPCRecord:
|
|||
)
|
||||
self.prepared_epc["total-floor-area"] = None
|
||||
|
||||
def _clean_mains_gas(self):
|
||||
def _clean_mains_gas(self) -> None:
|
||||
"""
|
||||
This method will clean the mains gas, if empty or invalid
|
||||
"""
|
||||
|
|
@ -666,7 +666,7 @@ class EPCRecord:
|
|||
else mains_gas_map[self.prepared_epc["mains-gas-flag"]]
|
||||
)
|
||||
|
||||
def _clean_heat_loss_corridor(self):
|
||||
def _clean_heat_loss_corridor(self) -> None:
|
||||
"""
|
||||
This method will clean the heat loss corridor, if empty or invalid
|
||||
"""
|
||||
|
|
@ -700,14 +700,14 @@ class EPCRecord:
|
|||
self.prepared_epc["heat-loss-corridor"]
|
||||
]
|
||||
|
||||
def _clean_count_variables(self):
|
||||
def _clean_count_variables(self) -> None:
|
||||
"""
|
||||
This method will clean the count variables, if empty or invalid
|
||||
"""
|
||||
if not self.prepared_epc:
|
||||
raise ValueError("EPC Recrod doesn not contain epc data")
|
||||
|
||||
fields = [
|
||||
_fields = [
|
||||
"number-open-fireplaces",
|
||||
"extension-count",
|
||||
"flat-storey-count",
|
||||
|
|
@ -716,7 +716,7 @@ class EPCRecord:
|
|||
|
||||
null_attributes = ["flat-storey-count", "number-habitable-rooms"]
|
||||
|
||||
for attribute in fields:
|
||||
for attribute in _fields:
|
||||
value = self.prepared_epc[attribute]
|
||||
if value in DATA_ANOMALY_MATCHES or pd.isnull(value):
|
||||
if attribute in null_attributes:
|
||||
|
|
@ -728,7 +728,7 @@ class EPCRecord:
|
|||
|
||||
self.prepared_epc[attribute] = value
|
||||
|
||||
def _clean_wind_turbine(self):
|
||||
def _clean_wind_turbine(self) -> None:
|
||||
"""
|
||||
This method will clean the wind turbine, if empty or invalid
|
||||
"""
|
||||
|
|
@ -741,7 +741,7 @@ class EPCRecord:
|
|||
else None
|
||||
)
|
||||
|
||||
def _clean_solar_hot_water(self):
|
||||
def _clean_solar_hot_water(self) -> None:
|
||||
"""
|
||||
This method will clean the solar hot water, if empty or invalid
|
||||
"""
|
||||
|
|
@ -764,7 +764,7 @@ class EPCRecord:
|
|||
self.prepared_epc["solar-water-heating-flag"]
|
||||
]
|
||||
|
||||
def _clean_solar_pv(self):
|
||||
def _clean_solar_pv(self) -> None:
|
||||
"""
|
||||
This method will clean the solar pv, if empty or invalid
|
||||
"""
|
||||
|
|
@ -777,7 +777,7 @@ class EPCRecord:
|
|||
else None
|
||||
)
|
||||
|
||||
def _clean_energy(self):
|
||||
def _clean_energy(self) -> None:
|
||||
"""
|
||||
This method will clean the energy, if empty or invalid
|
||||
"""
|
||||
|
|
@ -791,7 +791,7 @@ class EPCRecord:
|
|||
self.prepared_epc["co2-emissions-current"]
|
||||
)
|
||||
|
||||
def _clean_built_form(self):
|
||||
def _clean_built_form(self) -> None:
|
||||
"""
|
||||
This method will clean the build form, if empty or invalid
|
||||
"""
|
||||
|
|
@ -804,7 +804,7 @@ class EPCRecord:
|
|||
else:
|
||||
self.prepared_epc["built-form"] = "Semi-Detached"
|
||||
|
||||
def _clean_age_band(self):
|
||||
def _clean_age_band(self) -> None:
|
||||
"""
|
||||
This method will clean the age band, if empty or invalid
|
||||
"""
|
||||
|
|
@ -856,7 +856,7 @@ class EPCRecord:
|
|||
self.construction_age_band = "England and Wales: 1930-1949"
|
||||
self.prepared_epc["construction-age-band"] = self.construction_age_band
|
||||
|
||||
def _clean_year_built(self):
|
||||
def _clean_year_built(self) -> None:
|
||||
"""
|
||||
This method will clean the year built, if empty or invalid
|
||||
"""
|
||||
|
|
@ -886,7 +886,7 @@ class EPCRecord:
|
|||
# We don't know when the property was built
|
||||
self.year_built = None
|
||||
|
||||
def _clean_ventilation(self):
|
||||
def _clean_ventilation(self) -> None:
|
||||
"""
|
||||
This method will clean the ventilation, if empty or invalid
|
||||
"""
|
||||
|
|
@ -896,7 +896,7 @@ class EPCRecord:
|
|||
else (self.prepared_epc["mechanical-ventilation"])
|
||||
)
|
||||
|
||||
def _field_validation(self):
|
||||
def _field_validation(self) -> None:
|
||||
"""
|
||||
This method will validate each of the fields in the EPC record
|
||||
"""
|
||||
|
|
@ -914,9 +914,10 @@ class EPCRecord:
|
|||
f"Validation type {validation_config['type']} not supported"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _validate_string(
|
||||
self, record_key: str, field_value: Union[str, float], validation_config: dict
|
||||
):
|
||||
record_key: str, field_value: Union[str, float], validation_config: dict
|
||||
) -> None:
|
||||
"""
|
||||
Validate a string field
|
||||
"""
|
||||
|
|
@ -944,7 +945,7 @@ class EPCRecord:
|
|||
@staticmethod
|
||||
def _validate_float(
|
||||
record_key: str, field_value: Union[str, float], validation_config: dict
|
||||
):
|
||||
) -> None:
|
||||
"""
|
||||
Validate a float field
|
||||
"""
|
||||
|
|
@ -972,7 +973,7 @@ class EPCRecord:
|
|||
f"{validation_config['range']}"
|
||||
)
|
||||
|
||||
def create_EPCDifferenceRecord(self, other, fixed_data, auto_sort: bool = True):
|
||||
def create_epc_difference_record(self, other, fixed_data, auto_sort: bool = True):
|
||||
"""
|
||||
This method will create the difference record between the two records
|
||||
"""
|
||||
|
|
@ -986,6 +987,10 @@ class EPCRecord:
|
|||
|
||||
return difference_record
|
||||
|
||||
def _require_prepared_epc(self) -> None:
|
||||
if self.prepared_epc is None:
|
||||
raise ValueError("EPCRecord does not contain prepared EPC data")
|
||||
|
||||
def __sub__(self, other):
|
||||
"""
|
||||
This method will return the difference between two EPC records
|
||||
|
|
@ -1042,7 +1047,7 @@ class EPCRecord:
|
|||
key: Union[str, List[str]],
|
||||
return_asdict: bool = False,
|
||||
key_suffix: str | None = None,
|
||||
) -> Any:
|
||||
) -> PreparedEpcValue | list[PreparedEpcValue] | dict[str, PreparedEpcValue]:
|
||||
"""
|
||||
This method will return the value of the key
|
||||
"""
|
||||
|
|
@ -1067,7 +1072,7 @@ class EPCDifferenceRecord:
|
|||
Base class for the difference between two EPC records
|
||||
"""
|
||||
|
||||
def __init__(self, record1: EPCRecord, record2: EPCRecord, auto_sort: bool = False):
|
||||
def __init__(self, record1: EPCRecord, record2: EPCRecord, auto_sort: bool = False) -> None:
|
||||
"""
|
||||
This method will initialise the EPCDifferenceRecord
|
||||
Defaults usage is with record2 to have the higher RDSAP score
|
||||
|
|
@ -1094,7 +1099,7 @@ class EPCDifferenceRecord:
|
|||
self._validate_difference_record()
|
||||
# self._detect_fabric_consistency()
|
||||
|
||||
def _construct_difference_record(self):
|
||||
def _construct_difference_record(self) -> None:
|
||||
"""
|
||||
This method will construct the difference record between the two records
|
||||
"""
|
||||
|
|
@ -1163,13 +1168,6 @@ class EPCDifferenceRecord:
|
|||
"""
|
||||
This method will validate the difference record
|
||||
"""
|
||||
# for key, value in self.difference_record.items():
|
||||
# if key == "LODGEMENT_DATE":
|
||||
# continue
|
||||
# if isinstance(value, str):
|
||||
# continue
|
||||
# if value < 0:
|
||||
# raise ValueError(f"Difference record has negative value for {key}")
|
||||
pass
|
||||
|
||||
def compare_fields_in_records(self, fields: List[str]):
|
||||
|
|
@ -1185,7 +1183,9 @@ class EPCDifferenceRecord:
|
|||
if all_equal:
|
||||
return True
|
||||
|
||||
def get(self, key: str):
|
||||
return False
|
||||
|
||||
def get(self, key: str) -> PreparedEpcValue:
|
||||
"""
|
||||
This method will return the value of the key
|
||||
"""
|
||||
|
|
@ -1195,14 +1195,14 @@ class EPCDifferenceRecord:
|
|||
else None
|
||||
)
|
||||
|
||||
def append_fixed_data(self, fixed_data: dict):
|
||||
def append_fixed_data(self, fixed_data: dict) -> None:
|
||||
"""
|
||||
This method will append fixed data to the difference record
|
||||
"""
|
||||
self._validate_fixed_data(fixed_data)
|
||||
self.difference_record.update(fixed_data)
|
||||
|
||||
def _validate_fixed_data(self, fixed_data: dict):
|
||||
def _validate_fixed_data(self, fixed_data: dict) -> None:
|
||||
"""
|
||||
This method will validate the fixed data
|
||||
"""
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue