diff --git a/backend/Property.py b/backend/Property.py index 491b74b3..62148779 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -304,7 +304,7 @@ class Property: if k in fixed_data_col_names } - difference_record = self.epc_record.create_EPCDifferenceRecord(self.epc_record, fixed_data) + difference_record = self.epc_record.create_epc_difference_record(self.epc_record, fixed_data) # We have rare cases where entire description columns are missing. EpcRecords will convert this to None. # Due to the sensitivity of the EPCDifferenceRecord creation to missing data, we will fill in these missing diff --git a/etl/epc/Pipeline.py b/etl/epc/Pipeline.py index fac58cd9..e48f414c 100644 --- a/etl/epc/Pipeline.py +++ b/etl/epc/Pipeline.py @@ -328,7 +328,7 @@ class EPCPipeline: # model, since EPC standards and rigour have changed over time variable_data = property_data[ VARIABLE_DATA_FEATURES + COST_FEATURES + POST_SAP10_FEATURE - ] + ] uprn = str(uprn) epc_records = [ @@ -391,9 +391,7 @@ class EPCPipeline: # Auto sort the records so that the record with highest RDSAP score is always record1 difference_record: EPCDifferenceRecord = ( - latest_record.create_EPCDifferenceRecord( - other=earliest_record, fixed_data=fixed_data - ) + latest_record.create_epc_difference_record(other=earliest_record, fixed_data=fixed_data) ) # difference_record: EPCDifferenceRecord = latest_record - earliest_record # # TODO: Use method above instead of overloading operator diff --git a/etl/epc/Record.py b/etl/epc/Record.py index 0b5ad31b..bebddf9b 100644 --- a/etl/epc/Record.py +++ b/etl/epc/Record.py @@ -263,7 +263,7 @@ class EPCRecord: heat_loss_corridor_bool: Optional[bool] = None solar_water_heating_flag_bool: Optional[bool] = None - def __post_init__(self): + def __post_init__(self) -> None: # We can have validation and cleaning steps for each of the fields # self.WALLS_DESCRIPTION = 'check' # Could also have cleaning of records if needed @@ -296,7 +296,7 @@ class EPCRecord: return @staticmethod - def _calculate_days_to(lodgement_date): + def _calculate_days_to(lodgement_date: Union[str, pd.Series]) -> Union[int, pd.Series]: if isinstance(lodgement_date, str): return ( pd.to_datetime(lodgement_date) - pd.to_datetime(EARLIEST_EPC_DATE) @@ -306,7 +306,7 @@ class EPCRecord: pd.to_datetime(lodgement_date) - pd.to_datetime(EARLIEST_EPC_DATE) ).dt.days - def _clean_with_data_processor(self): + def _clean_with_data_processor(self) -> None: """ This method will clean the records using the data processor """ @@ -322,7 +322,7 @@ class EPCRecord: self.prepared_epc = cast(RawEpcRow, record) @staticmethod - def _cast_value(value, type_hint): + def _cast_value(value: PreparedEpcValue, type_hint: Any) -> PreparedEpcValue: origin = get_origin(type_hint) args = get_args(type_hint) @@ -392,12 +392,12 @@ class EPCRecord: same_index = df.apply(pd.Series.duplicated).any() self.prepared_epc_delta_metadata = df[same_index[~same_index].index] - def _clean_records_using_epc_records(self): + def _clean_records_using_epc_records(self) -> None: """ This method will clean the records """ - # TODO: Move all the cleaning steps in the Property class into there + # TODO: Move all the cleaning steps in the Property class into here self._clean_built_form() self._clean_energy() self._clean_ventilation() @@ -422,7 +422,7 @@ class EPCRecord: epc_type: str = "prepared_epc", use_upper_columns: bool = True, replace_empty_string: bool = False, - ): + ) -> pd.DataFrame: """ This method will return the dataframe representation of the epc record """ @@ -436,25 +436,25 @@ class EPCRecord: return df - def _clean_floor_height(self): + def _clean_floor_height(self) -> None: """Remaps anomalies in floor height to the average floor height for the property type""" floor_height_data = self.cleaning_data[ (self.cleaning_data["property_type"] == self.prepared_epc["property-type"]) & (self.cleaning_data["built_form"] == self.prepared_epc["built-form"]) ] - average = floor_height_data["floor_height"].mean() - sd = floor_height_data["floor_height"].std() + average = float(np.mean(floor_height_data["floor_height"])) + sd = float(np.std(floor_height_data["floor_height"])) # If we're in the top 0.5 percentile of floor heights, we'll set it to the average if self.prepared_epc["floor-height"] > average + 10 * sd: self.prepared_epc["floor-height"] = average if self.prepared_epc["floor-height"] <= 1.665: self.prepared_epc["floor-height"] = average - def _clean_new_build_descriptions(self): + def _clean_new_build_descriptions(self) -> None: for col in ["roof-description", "walls-description", "floor-description"]: self.prepared_epc[col] = self.prepared_epc[col].replace("W/m²K", "W/m-¦K") - def _clean_constituency(self): + def _clean_constituency(self) -> None: """ We handle the single case of finding a missing constituency by using the local authority """ @@ -467,7 +467,7 @@ class EPCRecord: ) self.prepared_epc["constituency"] = "E14000883" - def _clean_floor_level(self): + def _clean_floor_level(self) -> None: """ This method will clean the floor level, if empty or invalid """ @@ -480,7 +480,7 @@ class EPCRecord: else None ) - def _clean_number_lighting_outlets(self): + def _clean_number_lighting_outlets(self) -> None: """ This method will clean the number of lighting outlets, if empty or invalid """ @@ -539,7 +539,7 @@ class EPCRecord: else: self.prepared_epc["fixed-lighting-outlets-count"] = float(self.prepared_epc["fixed-lighting-outlets-count"]) - def _filter_property_dimensions(self, property_dimensions): + def _filter_property_dimensions(self, property_dimensions) -> pd.Series: """ Will filter the property dimensions dataframe to only include the relevant rows for the property :param property_dimensions: @@ -570,7 +570,7 @@ class EPCRecord: ] ].mean() - def _clean_property_dimensions(self): + def _clean_property_dimensions(self) -> None: """ Cleans up the number of floors, number of habitable rooms, and the floor height """ @@ -585,11 +585,11 @@ class EPCRecord: ): # TODO - this probably shouldn't live here - but we only need to use this for specific properties # when we meet this condition - property_dimensions = read_dataframe_from_s3_parquet( + property_dimensions: pd.DataFrame = read_dataframe_from_s3_parquet( bucket_name=DATA_BUCKET, file_key=f"property_dimensions/{self.prepared_epc['local-authority']}.parquet", ) - self.property_dimensions = self._filter_property_dimensions( + self.property_dimensions: pd.Series = self._filter_property_dimensions( property_dimensions ) @@ -625,7 +625,7 @@ class EPCRecord: else: self.prepared_epc["floor-height"] = float(self.prepared_epc["floor-height"]) - def _clean_floor_area(self): + def _clean_floor_area(self) -> None: """ This method will clean the floor area, if empty or invalid """ @@ -648,7 +648,7 @@ class EPCRecord: ) self.prepared_epc["total-floor-area"] = None - def _clean_mains_gas(self): + def _clean_mains_gas(self) -> None: """ This method will clean the mains gas, if empty or invalid """ @@ -666,7 +666,7 @@ class EPCRecord: else mains_gas_map[self.prepared_epc["mains-gas-flag"]] ) - def _clean_heat_loss_corridor(self): + def _clean_heat_loss_corridor(self) -> None: """ This method will clean the heat loss corridor, if empty or invalid """ @@ -700,14 +700,14 @@ class EPCRecord: self.prepared_epc["heat-loss-corridor"] ] - def _clean_count_variables(self): + def _clean_count_variables(self) -> None: """ This method will clean the count variables, if empty or invalid """ if not self.prepared_epc: raise ValueError("EPC Recrod doesn not contain epc data") - fields = [ + _fields = [ "number-open-fireplaces", "extension-count", "flat-storey-count", @@ -716,7 +716,7 @@ class EPCRecord: null_attributes = ["flat-storey-count", "number-habitable-rooms"] - for attribute in fields: + for attribute in _fields: value = self.prepared_epc[attribute] if value in DATA_ANOMALY_MATCHES or pd.isnull(value): if attribute in null_attributes: @@ -728,7 +728,7 @@ class EPCRecord: self.prepared_epc[attribute] = value - def _clean_wind_turbine(self): + def _clean_wind_turbine(self) -> None: """ This method will clean the wind turbine, if empty or invalid """ @@ -741,7 +741,7 @@ class EPCRecord: else None ) - def _clean_solar_hot_water(self): + def _clean_solar_hot_water(self) -> None: """ This method will clean the solar hot water, if empty or invalid """ @@ -764,7 +764,7 @@ class EPCRecord: self.prepared_epc["solar-water-heating-flag"] ] - def _clean_solar_pv(self): + def _clean_solar_pv(self) -> None: """ This method will clean the solar pv, if empty or invalid """ @@ -777,7 +777,7 @@ class EPCRecord: else None ) - def _clean_energy(self): + def _clean_energy(self) -> None: """ This method will clean the energy, if empty or invalid """ @@ -791,7 +791,7 @@ class EPCRecord: self.prepared_epc["co2-emissions-current"] ) - def _clean_built_form(self): + def _clean_built_form(self) -> None: """ This method will clean the build form, if empty or invalid """ @@ -804,7 +804,7 @@ class EPCRecord: else: self.prepared_epc["built-form"] = "Semi-Detached" - def _clean_age_band(self): + def _clean_age_band(self) -> None: """ This method will clean the age band, if empty or invalid """ @@ -856,7 +856,7 @@ class EPCRecord: self.construction_age_band = "England and Wales: 1930-1949" self.prepared_epc["construction-age-band"] = self.construction_age_band - def _clean_year_built(self): + def _clean_year_built(self) -> None: """ This method will clean the year built, if empty or invalid """ @@ -886,7 +886,7 @@ class EPCRecord: # We don't know when the property was built self.year_built = None - def _clean_ventilation(self): + def _clean_ventilation(self) -> None: """ This method will clean the ventilation, if empty or invalid """ @@ -896,7 +896,7 @@ class EPCRecord: else (self.prepared_epc["mechanical-ventilation"]) ) - def _field_validation(self): + def _field_validation(self) -> None: """ This method will validate each of the fields in the EPC record """ @@ -914,9 +914,10 @@ class EPCRecord: f"Validation type {validation_config['type']} not supported" ) + @staticmethod def _validate_string( - self, record_key: str, field_value: Union[str, float], validation_config: dict - ): + record_key: str, field_value: Union[str, float], validation_config: dict + ) -> None: """ Validate a string field """ @@ -944,7 +945,7 @@ class EPCRecord: @staticmethod def _validate_float( record_key: str, field_value: Union[str, float], validation_config: dict - ): + ) -> None: """ Validate a float field """ @@ -972,7 +973,7 @@ class EPCRecord: f"{validation_config['range']}" ) - def create_EPCDifferenceRecord(self, other, fixed_data, auto_sort: bool = True): + def create_epc_difference_record(self, other, fixed_data, auto_sort: bool = True): """ This method will create the difference record between the two records """ @@ -986,6 +987,10 @@ class EPCRecord: return difference_record + def _require_prepared_epc(self) -> None: + if self.prepared_epc is None: + raise ValueError("EPCRecord does not contain prepared EPC data") + def __sub__(self, other): """ This method will return the difference between two EPC records @@ -1042,7 +1047,7 @@ class EPCRecord: key: Union[str, List[str]], return_asdict: bool = False, key_suffix: str | None = None, - ) -> Any: + ) -> PreparedEpcValue | list[PreparedEpcValue] | dict[str, PreparedEpcValue]: """ This method will return the value of the key """ @@ -1067,7 +1072,7 @@ class EPCDifferenceRecord: Base class for the difference between two EPC records """ - def __init__(self, record1: EPCRecord, record2: EPCRecord, auto_sort: bool = False): + def __init__(self, record1: EPCRecord, record2: EPCRecord, auto_sort: bool = False) -> None: """ This method will initialise the EPCDifferenceRecord Defaults usage is with record2 to have the higher RDSAP score @@ -1094,7 +1099,7 @@ class EPCDifferenceRecord: self._validate_difference_record() # self._detect_fabric_consistency() - def _construct_difference_record(self): + def _construct_difference_record(self) -> None: """ This method will construct the difference record between the two records """ @@ -1163,13 +1168,6 @@ class EPCDifferenceRecord: """ This method will validate the difference record """ - # for key, value in self.difference_record.items(): - # if key == "LODGEMENT_DATE": - # continue - # if isinstance(value, str): - # continue - # if value < 0: - # raise ValueError(f"Difference record has negative value for {key}") pass def compare_fields_in_records(self, fields: List[str]): @@ -1185,7 +1183,9 @@ class EPCDifferenceRecord: if all_equal: return True - def get(self, key: str): + return False + + def get(self, key: str) -> PreparedEpcValue: """ This method will return the value of the key """ @@ -1195,14 +1195,14 @@ class EPCDifferenceRecord: else None ) - def append_fixed_data(self, fixed_data: dict): + def append_fixed_data(self, fixed_data: dict) -> None: """ This method will append fixed data to the difference record """ self._validate_fixed_data(fixed_data) self.difference_record.update(fixed_data) - def _validate_fixed_data(self, fixed_data: dict): + def _validate_fixed_data(self, fixed_data: dict) -> None: """ This method will validate the fixed data """